Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a2674e83
Commit
a2674e83
authored
Jun 17, 2011
by
Mark Friedrichs
Browse files
Mods for GB/VI switching of Born radii
parent
38839771
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
152 additions
and
6090 deletions
+152
-6090
openmmapi/include/openmm/GBVIForce.h
openmmapi/include/openmm/GBVIForce.h
+19
-237
openmmapi/src/GBVIForce.cpp
openmmapi/src/GBVIForce.cpp
+19
-99
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+19
-1507
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+19
-509
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+19
-2756
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+19
-315
platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu
platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu
+19
-226
platforms/cuda/src/kernels/kForces.cu
platforms/cuda/src/kernels/kForces.cu
+19
-441
No files found.
openmmapi/include/openmm/GBVIForce.h
View file @
a2674e83
#ifndef OPENMM_GBVIFORCEFIELD_H_
Vim:
Warning:
Output
is
not
to
a
terminal
#define OPENMM_GBVIFORCEFIELD_H_
[
?
1049
h
[
?
1
h
=
[
1
;
59
r
[
?
12
;
25
h
[
?
12l
[
?
25
h
[
27
m
[
m
[
H
[
2
J
[
?
25l
[
59
;
1
H
"svn-commit.tmp"
15L
,
601
C
[
1
;
1
H
[
33
m
1
2
[
m
[
32
m
--
This
line
,
and
those
below
,
will
be
ignored
--
[
m
/* -------------------------------------------------------------------------- *
[
33
m
3
* OpenMM *
4
[
m
[
1
m
[
35
mM
plugins
/
amoeba
/
platforms
/
cuda
/
src
/
AmoebaCudaKernelFactory
.
cpp
[
m
* -------------------------------------------------------------------------- *
[
33
m
5
[
m
[
1
m
[
35
mM
plugins
/
freeEnergy
/
platforms
/
reference
/
src
/
gbsa
/
CpuGBVISoftcore
.
cpp
[
m
* This is part of the OpenMM molecular simulation toolkit originating from *
[
33
m
6
[
m
[
1
m
[
35
mM
openmmapi
/
include
/
openmm
/
GBVIForce
.
h
[
m
* Simbios, the NIH National Center for Physics-Based Simulation of *
[
33
m
7
[
m
[
1
m
[
35
mM
openmmapi
/
src
/
GBVIForce
.
cpp
[
m
* Biological Structures at Stanford, funded under the NIH Roadmap for *
[
33
m
8
[
m
[
1
m
[
35
mM
olla
/
src
/
Platform
.
cpp
[
m
* Medical Research, grant U54 GM072970. See https://simtk.org. *
[
33
m
9
[
m
[
1
m
[
35
mM
platforms
/
opencl
/
src
/
OpenCLContext
.
h
[
m
* *
[
33
m
10
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
CudaKernels
.
cpp
[
m
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
[
33
m
11
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kCalculateGBVIBornSum
.
cu
[
m
* Authors: Peter Eastman *
[
33
m
12
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gputypes
.
h
[
m
* Contributors: *
[
33
m
13
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
cudatypes
.
h
[
m
* *
[
33
m
14
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kForces
.
cu
[
m
* Permission is hereby granted, free of charge, to any person obtaining a *
[
33
m
15
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gpu
.
cpp
[
m
* copy of this software and associated documentation files (the "Software"), *
[
1
m
[
34
m
~
[
17
;
1
H
~
[
18
;
1
H
~
[
19
;
1
H
~
[
20
;
1
H
~
[
21
;
1
H
~
[
22
;
1
H
~
[
23
;
1
H
~
[
24
;
1
H
~
[
25
;
1
H
~
[
26
;
1
H
~
[
27
;
1
H
~
[
28
;
1
H
~
[
29
;
1
H
~
[
30
;
1
H
~
[
31
;
1
H
~
[
32
;
1
H
~
[
33
;
1
H
~
[
34
;
1
H
~
[
35
;
1
H
~
[
36
;
1
H
~
[
37
;
1
H
~
[
38
;
1
H
~
[
39
;
1
H
~
[
40
;
1
H
~
[
41
;
1
H
~
[
42
;
1
H
~
[
43
;
1
H
~
[
44
;
1
H
~
[
45
;
1
H
~
[
46
;
1
H
~
[
47
;
1
H
~
[
48
;
1
H
~
[
49
;
1
H
~
[
50
;
1
H
~
[
51
;
1
H
~
[
52
;
1
H
~
[
53
;
1
H
~
[
54
;
1
H
~
[
55
;
1
H
~
[
56
;
1
H
~
[
57
;
1
H
~
[
58
;
1
H
~
[
m
[
59
;
217
H1
,
0
-
1
[
9
CAll
[
1
;
5
H
[
?
12l
[
?
25
h
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
:
[
?
12l
[
?
25
hq
!
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
[
?
1l
>
[
?
12l
[
?
25
h
[
?
1049l
* to deal in the Software without restriction, including without limitation *
Log
message
unchanged
or
not
specified
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
a
)
bort
,
c
)
ontinue
,
e
)
dit
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "Force.h"
#include <vector>
#include "internal/windowsExport.h"
namespace
OpenMM
{
/**
* This class implements an implicit solvation force using the GB/VI model.
* <p>
* To use this class, create a GBVIForce object, then call addParticle() once for each particle in the
* System to define its parameters. The number of particles for which you define GB/VI parameters must
* be exactly equal to the number of particles in the System, or else an exception will be thrown when you
* try to create a Context. After a particle has been added, you can modify its force field parameters
* by calling setParticleParameters().
*/
class
OPENMM_EXPORT
GBVIForce
:
public
Force
{
public:
/**
* This is an enumeration of the different methods that may be used for handling long range nonbonded forces.
*/
enum
NonbondedMethod
{
/**
* No cutoff is applied to nonbonded interactions. The full set of N^2 interactions is computed exactly.
* This necessarily means that periodic boundary conditions cannot be used. This is the default.
*/
NoCutoff
=
0
,
/**
* Interactions beyond the cutoff distance are ignored.
*/
CutoffNonPeriodic
=
1
,
/**
* Periodic boundary conditions are used, so that each particle interacts only with the nearest periodic copy of
* each other particle. Interactions beyond the cutoff distance are ignored.
*/
CutoffPeriodic
=
2
,
};
/*
* Create a GBVIForce.
*/
GBVIForce
();
/**
* Get the number of particles in the system.
*/
int
getNumParticles
()
const
{
return
particles
.
size
();
}
/**
* Add the GB/VI parameters for a particle. This should be called once for each particle
* in the System. When it is called for the i'th time, it specifies the parameters for the i'th particle.
*
* @param charge the charge of the particle, measured in units of the proton charge
* @param radius the GB/VI radius of the particle, measured in nm
* @param gamma the gamma parameter
* @return the index of the particle that was added
*/
int
addParticle
(
double
charge
,
double
radius
,
double
gamma
);
/**
* Get the force field parameters for a particle.
*
* @param index the index of the particle for which to get parameters
* @param charge the charge of the particle, measured in units of the proton charge
* @param radius the GBSA radius of the particle, measured in nm
* @param gamma the gamma parameter
*/
void
getParticleParameters
(
int
index
,
double
&
charge
,
double
&
radius
,
double
&
gamma
)
const
;
/**
* Set the force field parameters for a particle.
*
* @param index the index of the particle for which to set parameters
* @param charge the charge of the particle, measured in units of the proton charge
* @param radius the GB/VI radius of the particle, measured in nm
* @param gamma the gamma parameter
*/
void
setParticleParameters
(
int
index
,
double
charge
,
double
radius
,
double
gamma
);
/**
* Add a bond
*
* @param particle1 the index of the first particle
* @param particle2 the index of the second particle
* @param distance the distance between the two particles, measured in nm
* @return the index of the bond that was added
*/
int
addBond
(
int
particle1
,
int
particle2
,
double
distance
);
/**
* Get the parameters defining a bond
*
* @param index the index of the bond for which to get parameters
* @param particle1 the index of the first particle involved in the bond
* @param particle2 the index of the second particle involved in the bond
* @param distance the distance between the two particles, measured in nm
*/
void
getBondParameters
(
int
index
,
int
&
particle1
,
int
&
particle2
,
double
&
distance
)
const
;
/**
* Set 1-2 bonds
*
* @param index index of the bond for which to set parameters
* @param particle1 index of first atom in bond
* @param particle2 index of second atom in bond
* @param bondLength bond length
*/
void
setBondParameters
(
int
index
,
int
particle1
,
int
particle2
,
double
bondLength
);
/**
* Get number of bonds
*
* @return number of bonds
*/
int
getNumBonds
(
void
)
const
;
/**
* Get the dielectric constant for the solvent.
*/
double
getSolventDielectric
()
const
{
return
solventDielectric
;
}
/**
* Set the dielectric constant for the solvent.
*/
void
setSolventDielectric
(
double
dielectric
)
{
solventDielectric
=
dielectric
;
}
/**
* Get the dielectric constant for the solute.
*/
double
getSoluteDielectric
()
const
{
return
soluteDielectric
;
}
/**
* Set the dielectric constant for the solute.
*/
void
setSoluteDielectric
(
double
dielectric
)
{
soluteDielectric
=
dielectric
;
}
/**
* Get the method used for handling long range nonbonded interactions.
*/
NonbondedMethod
getNonbondedMethod
()
const
;
/**
* Set the method used for handling long range nonbonded interactions.
*/
void
setNonbondedMethod
(
NonbondedMethod
method
);
/**
* Get the cutoff distance (in nm) being used for nonbonded interactions. If the NonbondedMethod in use
* is NoCutoff, this value will have no effect.
*
* @return the cutoff distance, measured in nm
*/
double
getCutoffDistance
()
const
;
/**
* Set the cutoff distance (in nm) being used for nonbonded interactions. If the NonbondedMethod in use
* is NoCutoff, this value will have no effect.
*
* @param distance the cutoff distance, measured in nm
*/
void
setCutoffDistance
(
double
distance
);
protected:
ForceImpl
*
createImpl
();
private:
class
ParticleInfo
;
NonbondedMethod
nonbondedMethod
;
double
cutoffDistance
,
solventDielectric
,
soluteDielectric
;
class
BondInfo
;
std
::
vector
<
ParticleInfo
>
particles
;
std
::
vector
<
BondInfo
>
bonds
;
};
/**
* This is an internal class used to record information about a particle.
* @private
*/
class
GBVIForce
::
ParticleInfo
{
public:
double
charge
,
radius
,
gamma
;
ParticleInfo
()
{
charge
=
radius
=
gamma
=
0.0
;
}
ParticleInfo
(
double
charge
,
double
radius
,
double
gamma
)
:
charge
(
charge
),
radius
(
radius
),
gamma
(
gamma
)
{
}
};
/**
* This is an internal class used to record information about a bond.
* @private
*/
class
GBVIForce
::
BondInfo
{
public:
int
particle1
,
particle2
;
double
bondLength
;
BondInfo
()
{
bondLength
=
0.0
;
particle1
=
-
1
;
particle2
=
-
1
;
}
BondInfo
(
int
atomIndex1
,
int
atomIndex2
,
double
bondLength
)
:
particle1
(
atomIndex1
),
particle2
(
atomIndex2
),
bondLength
(
bondLength
)
{
}
};
}
// namespace OpenMM
#endif
/*OPENMM_GBVIFORCEFIELD_H_*/
...
...
openmmapi/src/GBVIForce.cpp
View file @
a2674e83
/* -------------------------------------------------------------------------- *
Vim:
Warning:
Output
is
not
to
a
terminal
* OpenMM *
[
?
1049h
[
?
1h
=
[
1
;
59
r
[
?
12
;
25h
[
?
12l
[
?
25h
[
27
m
[
m
[
H
[
2
J
[
?
25l
[
59
;
1
H
"svn-commit.tmp"
15L
,
601
C
[
1
;
1
H
[
33
m
1
* -------------------------------------------------------------------------- *
2
[
m
[
32
m
--
This
line
,
and
those
below
,
will
be
ignored
--
[
m
* This is part of the OpenMM molecular simulation toolkit originating from *
[
33
m
3
* Simbios, the NIH National Center for Physics-Based Simulation of *
4
[
m
[
1
m
[
35
mM
plugins
/
amoeba
/
platforms
/
cuda
/
src
/
AmoebaCudaKernelFactory
.
cpp
[
m
* Biological Structures at Stanford, funded under the NIH Roadmap for *
[
33
m
5
[
m
[
1
m
[
35
mM
plugins
/
freeEnergy
/
platforms
/
reference
/
src
/
gbsa
/
CpuGBVISoftcore
.
cpp
[
m
* Medical Research, grant U54 GM072970. See https://simtk.org. *
[
33
m
6
[
m
[
1
m
[
35
mM
openmmapi
/
include
/
openmm
/
GBVIForce
.
h
[
m
* *
[
33
m
7
[
m
[
1
m
[
35
mM
openmmapi
/
src
/
GBVIForce
.
cpp
[
m
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
[
33
m
8
[
m
[
1
m
[
35
mM
olla
/
src
/
Platform
.
cpp
[
m
* Authors: Peter Eastman *
[
33
m
9
[
m
[
1
m
[
35
mM
platforms
/
opencl
/
src
/
OpenCLContext
.
h
[
m
* Contributors: *
[
33
m
10
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
CudaKernels
.
cpp
[
m
* *
[
33
m
11
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kCalculateGBVIBornSum
.
cu
[
m
* Permission is hereby granted, free of charge, to any person obtaining a *
[
33
m
12
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gputypes
.
h
[
m
* copy of this software and associated documentation files (the "Software"), *
[
33
m
13
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
cudatypes
.
h
[
m
* to deal in the Software without restriction, including without limitation *
[
33
m
14
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kForces
.
cu
[
m
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
[
33
m
15
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gpu
.
cpp
[
m
* and/or sell copies of the Software, and to permit persons to whom the *
[
1
m
[
34
m
~
[
17
;
1
H
~
[
18
;
1
H
~
[
19
;
1
H
~
[
20
;
1
H
~
[
21
;
1
H
~
[
22
;
1
H
~
[
23
;
1
H
~
[
24
;
1
H
~
[
25
;
1
H
~
[
26
;
1
H
~
[
27
;
1
H
~
[
28
;
1
H
~
[
29
;
1
H
~
[
30
;
1
H
~
[
31
;
1
H
~
[
32
;
1
H
~
[
33
;
1
H
~
[
34
;
1
H
~
[
35
;
1
H
~
[
36
;
1
H
~
[
37
;
1
H
~
[
38
;
1
H
~
[
39
;
1
H
~
[
40
;
1
H
~
[
41
;
1
H
~
[
42
;
1
H
~
[
43
;
1
H
~
[
44
;
1
H
~
[
45
;
1
H
~
[
46
;
1
H
~
[
47
;
1
H
~
[
48
;
1
H
~
[
49
;
1
H
~
[
50
;
1
H
~
[
51
;
1
H
~
[
52
;
1
H
~
[
53
;
1
H
~
[
54
;
1
H
~
[
55
;
1
H
~
[
56
;
1
H
~
[
57
;
1
H
~
[
58
;
1
H
~
[
m
[
59
;
217
H1
,
0
-
1
[
9
CAll
[
1
;
5
H
[
?
12l
[
?
25h
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
:
[
?
12l
[
?
25h
q
!
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
[
?
1l
>
[
?
12l
[
?
25h
[
?
1049l
* Software is furnished to do so, subject to the following conditions: *
Log
message
unchanged
or
not
specified
* *
a
)
bort
,
c
)
ontinue
,
e
)
dit
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/Force.h"
#include "openmm/OpenMMException.h"
#include "openmm/GBVIForce.h"
#include "openmm/internal/GBVIForceImpl.h"
#include <sstream>
using
namespace
OpenMM
;
GBVIForce
::
GBVIForce
()
:
nonbondedMethod
(
NoCutoff
),
cutoffDistance
(
1.0
),
solventDielectric
(
78.3
),
soluteDielectric
(
1.0
)
{
}
int
GBVIForce
::
addParticle
(
double
charge
,
double
radius
,
double
gamma
)
{
particles
.
push_back
(
ParticleInfo
(
charge
,
radius
,
gamma
));
return
particles
.
size
()
-
1
;
}
void
GBVIForce
::
getParticleParameters
(
int
index
,
double
&
charge
,
double
&
radius
,
double
&
gamma
)
const
{
charge
=
particles
[
index
].
charge
;
radius
=
particles
[
index
].
radius
;
gamma
=
particles
[
index
].
gamma
;
}
void
GBVIForce
::
setParticleParameters
(
int
index
,
double
charge
,
double
radius
,
double
gamma
)
{
particles
[
index
].
charge
=
charge
;
particles
[
index
].
radius
=
radius
;
particles
[
index
].
gamma
=
gamma
;
}
GBVIForce
::
NonbondedMethod
GBVIForce
::
getNonbondedMethod
()
const
{
return
nonbondedMethod
;
}
void
GBVIForce
::
setNonbondedMethod
(
NonbondedMethod
method
)
{
nonbondedMethod
=
method
;
}
double
GBVIForce
::
getCutoffDistance
()
const
{
return
cutoffDistance
;
}
void
GBVIForce
::
setCutoffDistance
(
double
distance
)
{
cutoffDistance
=
distance
;
}
int
GBVIForce
::
addBond
(
int
particle1
,
int
particle2
,
double
bondLength
)
{
bonds
.
push_back
(
BondInfo
(
particle1
,
particle2
,
bondLength
));
return
bonds
.
size
()
-
1
;
}
void
GBVIForce
::
setBondParameters
(
int
index
,
int
particle1
,
int
particle2
,
double
bondLength
)
{
bonds
[
index
].
particle1
=
particle1
;
bonds
[
index
].
particle2
=
particle2
;
bonds
[
index
].
bondLength
=
bondLength
;
}
int
GBVIForce
::
getNumBonds
(
void
)
const
{
return
(
int
)
bonds
.
size
();
}
void
GBVIForce
::
getBondParameters
(
int
index
,
int
&
bondIndex1
,
int
&
bondIndex2
,
double
&
bondLength
)
const
{
bondIndex1
=
bonds
[
index
].
particle1
;
bondIndex2
=
bonds
[
index
].
particle2
;
bondLength
=
bonds
[
index
].
bondLength
;
}
ForceImpl
*
GBVIForce
::
createImpl
()
{
return
new
GBVIForceImpl
(
*
this
);
}
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
a2674e83
/* -------------------------------------------------------------------------- *
Vim:
Warning:
Output
is
not
to
a
terminal
* OpenMM *
[
?
1049h
[
?
1h
=
[
1
;
59
r
[
?
12
;
25h
[
?
12l
[
?
25h
[
27
m
[
m
[
H
[
2
J
[
?
25l
[
59
;
1
H
"svn-commit.tmp"
15L
,
601
C
[
1
;
1
H
[
33
m
1
* -------------------------------------------------------------------------- *
2
[
m
[
32
m
--
This
line
,
and
those
below
,
will
be
ignored
--
[
m
* This is part of the OpenMM molecular simulation toolkit originating from *
[
33
m
3
* Simbios, the NIH National Center for Physics-Based Simulation of *
4
[
m
[
1
m
[
35
mM
plugins
/
amoeba
/
platforms
/
cuda
/
src
/
AmoebaCudaKernelFactory
.
cpp
[
m
* Biological Structures at Stanford, funded under the NIH Roadmap for *
[
33
m
5
[
m
[
1
m
[
35
mM
plugins
/
freeEnergy
/
platforms
/
reference
/
src
/
gbsa
/
CpuGBVISoftcore
.
cpp
[
m
* Medical Research, grant U54 GM072970. See https://simtk.org. *
[
33
m
6
[
m
[
1
m
[
35
mM
openmmapi
/
include
/
openmm
/
GBVIForce
.
h
[
m
* *
[
33
m
7
[
m
[
1
m
[
35
mM
openmmapi
/
src
/
GBVIForce
.
cpp
[
m
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
[
33
m
8
[
m
[
1
m
[
35
mM
olla
/
src
/
Platform
.
cpp
[
m
* Authors: Peter Eastman *
[
33
m
9
[
m
[
1
m
[
35
mM
platforms
/
opencl
/
src
/
OpenCLContext
.
h
[
m
* Contributors: *
[
33
m
10
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
CudaKernels
.
cpp
[
m
* *
[
33
m
11
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kCalculateGBVIBornSum
.
cu
[
m
* This program is free software: you can redistribute it and/or modify *
[
33
m
12
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gputypes
.
h
[
m
* it under the terms of the GNU Lesser General Public License as published *
[
33
m
13
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
cudatypes
.
h
[
m
* by the Free Software Foundation, either version 3 of the License, or *
[
33
m
14
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kForces
.
cu
[
m
* (at your option) any later version. *
[
33
m
15
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gpu
.
cpp
[
m
* *
[
1
m
[
34
m
~
[
17
;
1
H
~
[
18
;
1
H
~
[
19
;
1
H
~
[
20
;
1
H
~
[
21
;
1
H
~
[
22
;
1
H
~
[
23
;
1
H
~
[
24
;
1
H
~
[
25
;
1
H
~
[
26
;
1
H
~
[
27
;
1
H
~
[
28
;
1
H
~
[
29
;
1
H
~
[
30
;
1
H
~
[
31
;
1
H
~
[
32
;
1
H
~
[
33
;
1
H
~
[
34
;
1
H
~
[
35
;
1
H
~
[
36
;
1
H
~
[
37
;
1
H
~
[
38
;
1
H
~
[
39
;
1
H
~
[
40
;
1
H
~
[
41
;
1
H
~
[
42
;
1
H
~
[
43
;
1
H
~
[
44
;
1
H
~
[
45
;
1
H
~
[
46
;
1
H
~
[
47
;
1
H
~
[
48
;
1
H
~
[
49
;
1
H
~
[
50
;
1
H
~
[
51
;
1
H
~
[
52
;
1
H
~
[
53
;
1
H
~
[
54
;
1
H
~
[
55
;
1
H
~
[
56
;
1
H
~
[
57
;
1
H
~
[
58
;
1
H
~
[
m
[
59
;
217
H1
,
0
-
1
[
9
CAll
[
1
;
5
H
[
?
12l
[
?
25h
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
:
[
?
12l
[
?
25h
q
!
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
[
?
1l
>
[
?
12l
[
?
25h
[
?
1049l
* This program is distributed in the hope that it will be useful, *
Log
message
unchanged
or
not
specified
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
a
)
bort
,
c
)
ontinue
,
e
)
dit
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaKernels.h"
#include "CudaForceInfo.h"
#include "openmm/LangevinIntegrator.h"
#include "openmm/Context.h"
#include "openmm/OpenMMException.h"
#include "openmm/internal/AndersenThermostatImpl.h"
#include "openmm/internal/CMAPTorsionForceImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "kernels/gputypes.h"
#include "kernels/cudaKernels.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include <cmath>
extern
"C"
int
OPENMMCUDA_EXPORT
gpuSetConstants
(
gpuContext
gpu
);
using
namespace
OpenMM
;
using
namespace
std
;
void
CudaCalcForcesAndEnergyKernel
::
initialize
(
const
System
&
system
)
{
}
void
CudaCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
if
(
data
.
nonbondedMethod
!=
NO_CUTOFF
&&
data
.
computeForceCount
%
100
==
0
)
gpuReorderAtoms
(
gpu
);
data
.
computeForceCount
++
;
if
(
gpu
->
bIncludeGBSA
||
gpu
->
bIncludeGBVI
)
kClearBornSumAndForces
(
gpu
);
else
if
(
includeForces
)
kClearForces
(
gpu
);
if
(
includeEnergy
)
kClearEnergy
(
gpu
);
}
double
CudaCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
if
(
gpu
->
bIncludeGBSA
||
gpu
->
bIncludeGBVI
)
{
gpu
->
bRecalculateBornRadii
=
true
;
kCalculateCDLJObcGbsaForces1
(
gpu
);
kReduceObcGbsaBornForces
(
gpu
);
if
(
gpu
->
bIncludeGBSA
)
{
kCalculateObcGbsaForces2
(
gpu
);
}
else
{
kCalculateGBVIForces2
(
gpu
);
}
}
else
if
(
data
.
hasNonbonded
)
kCalculateCDLJForces
(
gpu
);
if
(
data
.
hasCustomNonbonded
)
kCalculateCustomNonbondedForces
(
gpu
,
data
.
hasNonbonded
);
kCalculateLocalForces
(
gpu
);
if
(
includeForces
)
kReduceForces
(
gpu
);
double
energy
=
0.0
;
if
(
includeEnergy
)
{
energy
=
kReduceEnergy
(
gpu
)
+
data
.
ewaldSelfEnergy
;
if
(
data
.
dispersionCoefficient
!=
0.0
)
energy
+=
data
.
dispersionCoefficient
/
(
gpu
->
sim
.
periodicBoxSizeX
*
gpu
->
sim
.
periodicBoxSizeY
*
gpu
->
sim
.
periodicBoxSizeZ
);
}
return
energy
;
}
void
CudaUpdateStateDataKernel
::
initialize
(
const
System
&
system
)
{
}
double
CudaUpdateStateDataKernel
::
getTime
(
const
ContextImpl
&
context
)
const
{
return
data
.
time
;
}
void
CudaUpdateStateDataKernel
::
setTime
(
ContextImpl
&
context
,
double
time
)
{
data
.
time
=
time
;
}
void
CudaUpdateStateDataKernel
::
getPositions
(
ContextImpl
&
context
,
std
::
vector
<
Vec3
>&
positions
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
gpu
->
psPosq4
->
Download
();
int
*
order
=
gpu
->
psAtomIndex
->
_pSysData
;
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
positions
.
resize
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
float4
pos
=
(
*
gpu
->
psPosq4
)[
i
];
int3
offset
=
gpu
->
posCellOffsets
[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
-
offset
.
x
*
gpu
->
sim
.
periodicBoxSizeX
,
pos
.
y
-
offset
.
y
*
gpu
->
sim
.
periodicBoxSizeY
,
pos
.
z
-
offset
.
z
*
gpu
->
sim
.
periodicBoxSizeZ
);
}
}
void
CudaUpdateStateDataKernel
::
setPositions
(
ContextImpl
&
context
,
const
std
::
vector
<
Vec3
>&
positions
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
int
*
order
=
gpu
->
psAtomIndex
->
_pSysData
;
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
float4
&
pos
=
(
*
gpu
->
psPosq4
)[
i
];
const
Vec3
&
p
=
positions
[
order
[
i
]];
pos
.
x
=
(
float
)
p
[
0
];
pos
.
y
=
(
float
)
p
[
1
];
pos
.
z
=
(
float
)
p
[
2
];
}
gpu
->
psPosq4
->
Upload
();
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
posCellOffsets
.
size
();
i
++
)
gpu
->
posCellOffsets
[
i
]
=
make_int3
(
0
,
0
,
0
);
}
void
CudaUpdateStateDataKernel
::
getVelocities
(
ContextImpl
&
context
,
std
::
vector
<
Vec3
>&
velocities
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
gpu
->
psVelm4
->
Download
();
int
*
order
=
gpu
->
psAtomIndex
->
_pSysData
;
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
velocities
.
resize
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
float4
vel
=
(
*
gpu
->
psVelm4
)[
i
];
velocities
[
order
[
i
]]
=
Vec3
(
vel
.
x
,
vel
.
y
,
vel
.
z
);
}
}
void
CudaUpdateStateDataKernel
::
setVelocities
(
ContextImpl
&
context
,
const
std
::
vector
<
Vec3
>&
velocities
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
int
*
order
=
gpu
->
psAtomIndex
->
_pSysData
;
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
float4
&
vel
=
(
*
gpu
->
psVelm4
)[
i
];
const
Vec3
&
v
=
velocities
[
order
[
i
]];
vel
.
x
=
(
float
)
v
[
0
];
vel
.
y
=
(
float
)
v
[
1
];
vel
.
z
=
(
float
)
v
[
2
];
}
gpu
->
psVelm4
->
Upload
();
}
void
CudaUpdateStateDataKernel
::
getForces
(
ContextImpl
&
context
,
std
::
vector
<
Vec3
>&
forces
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
int
*
order
=
gpu
->
psAtomIndex
->
_pSysData
;
gpu
->
psForce4
->
Download
();
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
forces
.
resize
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
float4
force
=
(
*
gpu
->
psForce4
)[
i
];
forces
[
order
[
i
]]
=
Vec3
(
force
.
x
,
force
.
y
,
force
.
z
);
}
}
void
CudaUpdateStateDataKernel
::
getPeriodicBoxVectors
(
ContextImpl
&
context
,
Vec3
&
a
,
Vec3
&
b
,
Vec3
&
c
)
const
{
_gpuContext
*
gpu
=
data
.
gpu
;
a
=
Vec3
(
gpu
->
sim
.
periodicBoxSizeX
,
0
,
0
);
b
=
Vec3
(
0
,
gpu
->
sim
.
periodicBoxSizeY
,
0
);
c
=
Vec3
(
0
,
0
,
gpu
->
sim
.
periodicBoxSizeZ
);
}
void
CudaUpdateStateDataKernel
::
setPeriodicBoxVectors
(
ContextImpl
&
context
,
const
Vec3
&
a
,
const
Vec3
&
b
,
const
Vec3
&
c
)
const
{
_gpuContext
*
gpu
=
data
.
gpu
;
gpuSetPeriodicBoxSize
(
gpu
,
a
[
0
],
b
[
1
],
c
[
2
]);
gpuSetConstants
(
gpu
);
}
void
CudaApplyConstraintsKernel
::
initialize
(
const
System
&
system
)
{
}
void
CudaApplyConstraintsKernel
::
apply
(
ContextImpl
&
context
,
double
tol
)
{
kApplyConstraints
(
data
.
gpu
);
}
class
CudaCalcHarmonicBondForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
HarmonicBondForce
&
force
)
:
force
(
force
)
{
}
int
getNumParticleGroups
()
{
return
force
.
getNumBonds
();
}
void
getParticlesInGroup
(
int
index
,
std
::
vector
<
int
>&
particles
)
{
int
particle1
,
particle2
;
double
length
,
k
;
force
.
getBondParameters
(
index
,
particle1
,
particle2
,
length
,
k
);
particles
.
resize
(
2
);
particles
[
0
]
=
particle1
;
particles
[
1
]
=
particle2
;
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
int
particle1
,
particle2
;
double
length1
,
length2
,
k1
,
k2
;
force
.
getBondParameters
(
group1
,
particle1
,
particle2
,
length1
,
k1
);
force
.
getBondParameters
(
group2
,
particle1
,
particle2
,
length2
,
k2
);
return
(
length1
==
length2
&&
k1
==
k2
);
}
private:
const
HarmonicBondForce
&
force
;
};
CudaCalcHarmonicBondForceKernel
::~
CudaCalcHarmonicBondForceKernel
()
{
}
void
CudaCalcHarmonicBondForceKernel
::
initialize
(
const
System
&
system
,
const
HarmonicBondForce
&
force
)
{
data
.
hasBonds
=
true
;
numBonds
=
force
.
getNumBonds
();
vector
<
int
>
particle1
(
numBonds
);
vector
<
int
>
particle2
(
numBonds
);
vector
<
float
>
length
(
numBonds
);
vector
<
float
>
k
(
numBonds
);
for
(
int
i
=
0
;
i
<
numBonds
;
i
++
)
{
double
lengthValue
,
kValue
;
force
.
getBondParameters
(
i
,
particle1
[
i
],
particle2
[
i
],
lengthValue
,
kValue
);
length
[
i
]
=
(
float
)
lengthValue
;
k
[
i
]
=
(
float
)
kValue
;
}
gpuSetBondParameters
(
data
.
gpu
,
particle1
,
particle2
,
length
,
k
);
data
.
gpu
->
forces
.
push_back
(
new
ForceInfo
(
force
));
}
double
CudaCalcHarmonicBondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
return
0.0
;
}
class
CudaCalcCustomBondForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
CustomBondForce
&
force
)
:
force
(
force
)
{
}
int
getNumParticleGroups
()
{
return
force
.
getNumBonds
();
}
void
getParticlesInGroup
(
int
index
,
std
::
vector
<
int
>&
particles
)
{
int
particle1
,
particle2
;
vector
<
double
>
parameters
;
force
.
getBondParameters
(
index
,
particle1
,
particle2
,
parameters
);
particles
.
resize
(
2
);
particles
[
0
]
=
particle1
;
particles
[
1
]
=
particle2
;
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
int
particle1
,
particle2
;
vector
<
double
>
parameters1
,
parameters2
;
force
.
getBondParameters
(
group1
,
particle1
,
particle2
,
parameters1
);
force
.
getBondParameters
(
group2
,
particle1
,
particle2
,
parameters2
);
for
(
int
i
=
0
;
i
<
(
int
)
parameters1
.
size
();
i
++
)
if
(
parameters1
[
i
]
!=
parameters2
[
i
])
return
false
;
return
true
;
}
private:
const
CustomBondForce
&
force
;
};
CudaCalcCustomBondForceKernel
::~
CudaCalcCustomBondForceKernel
()
{
}
void
CudaCalcCustomBondForceKernel
::
initialize
(
const
System
&
system
,
const
CustomBondForce
&
force
)
{
numBonds
=
force
.
getNumBonds
();
vector
<
int
>
particle1
(
numBonds
);
vector
<
int
>
particle2
(
numBonds
);
vector
<
vector
<
double
>
>
params
(
numBonds
);
for
(
int
i
=
0
;
i
<
numBonds
;
i
++
)
force
.
getBondParameters
(
i
,
particle1
[
i
],
particle2
[
i
],
params
[
i
]);
vector
<
string
>
paramNames
;
for
(
int
i
=
0
;
i
<
force
.
getNumPerBondParameters
();
i
++
)
paramNames
.
push_back
(
force
.
getPerBondParameterName
(
i
));
globalParamNames
.
resize
(
force
.
getNumGlobalParameters
());
globalParamValues
.
resize
(
force
.
getNumGlobalParameters
());
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
globalParamNames
[
i
]
=
force
.
getGlobalParameterName
(
i
);
globalParamValues
[
i
]
=
(
float
)
force
.
getGlobalParameterDefaultValue
(
i
);
}
gpuSetCustomBondParameters
(
data
.
gpu
,
particle1
,
particle2
,
params
,
force
.
getEnergyFunction
(),
paramNames
,
globalParamNames
);
if
(
globalParamValues
.
size
()
>
0
)
SetCustomBondGlobalParams
(
globalParamValues
);
data
.
gpu
->
forces
.
push_back
(
new
ForceInfo
(
force
));
}
double
CudaCalcCustomBondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
updateGlobalParams
(
context
);
kCalculateCustomBondForces
(
data
.
gpu
);
return
0.0
;
}
void
CudaCalcCustomBondForceKernel
::
updateGlobalParams
(
ContextImpl
&
context
)
{
bool
changed
=
false
;
for
(
int
i
=
0
;
i
<
(
int
)
globalParamNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
globalParamNames
[
i
]);
if
(
value
!=
globalParamValues
[
i
])
changed
=
true
;
globalParamValues
[
i
]
=
value
;
}
if
(
changed
)
SetCustomBondGlobalParams
(
globalParamValues
);
}
class
CudaCalcHarmonicAngleForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
HarmonicAngleForce
&
force
)
:
force
(
force
)
{
}
int
getNumParticleGroups
()
{
return
force
.
getNumAngles
();
}
void
getParticlesInGroup
(
int
index
,
std
::
vector
<
int
>&
particles
)
{
int
particle1
,
particle2
,
particle3
;
double
angle
,
k
;
force
.
getAngleParameters
(
index
,
particle1
,
particle2
,
particle3
,
angle
,
k
);
particles
.
resize
(
3
);
particles
[
0
]
=
particle1
;
particles
[
1
]
=
particle2
;
particles
[
2
]
=
particle3
;
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
int
particle1
,
particle2
,
particle3
;
double
angle1
,
angle2
,
k1
,
k2
;
force
.
getAngleParameters
(
group1
,
particle1
,
particle2
,
particle3
,
angle1
,
k1
);
force
.
getAngleParameters
(
group2
,
particle1
,
particle2
,
particle3
,
angle2
,
k2
);
return
(
angle1
==
angle2
&&
k1
==
k2
);
}
private:
const
HarmonicAngleForce
&
force
;
};
CudaCalcHarmonicAngleForceKernel
::~
CudaCalcHarmonicAngleForceKernel
()
{
}
void
CudaCalcHarmonicAngleForceKernel
::
initialize
(
const
System
&
system
,
const
HarmonicAngleForce
&
force
)
{
data
.
hasAngles
=
true
;
numAngles
=
force
.
getNumAngles
();
const
float
RadiansToDegrees
=
(
float
)
(
180.0
/
3.14159265
);
vector
<
int
>
particle1
(
numAngles
);
vector
<
int
>
particle2
(
numAngles
);
vector
<
int
>
particle3
(
numAngles
);
vector
<
float
>
angle
(
numAngles
);
vector
<
float
>
k
(
numAngles
);
for
(
int
i
=
0
;
i
<
numAngles
;
i
++
)
{
double
angleValue
,
kValue
;
force
.
getAngleParameters
(
i
,
particle1
[
i
],
particle2
[
i
],
particle3
[
i
],
angleValue
,
kValue
);
angle
[
i
]
=
(
float
)
(
angleValue
*
RadiansToDegrees
);
k
[
i
]
=
(
float
)
kValue
;
}
gpuSetBondAngleParameters
(
data
.
gpu
,
particle1
,
particle2
,
particle3
,
angle
,
k
);
data
.
gpu
->
forces
.
push_back
(
new
ForceInfo
(
force
));
}
double
CudaCalcHarmonicAngleForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
return
0.0
;
}
class
CudaCalcCustomAngleForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
CustomAngleForce
&
force
)
:
force
(
force
)
{
}
int
getNumParticleGroups
()
{
return
force
.
getNumAngles
();
}
void
getParticlesInGroup
(
int
index
,
std
::
vector
<
int
>&
particles
)
{
int
particle1
,
particle2
,
particle3
;
vector
<
double
>
parameters
;
force
.
getAngleParameters
(
index
,
particle1
,
particle2
,
particle3
,
parameters
);
particles
.
resize
(
3
);
particles
[
0
]
=
particle1
;
particles
[
1
]
=
particle2
;
particles
[
2
]
=
particle3
;
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
int
particle1
,
particle2
,
particle3
;
vector
<
double
>
parameters1
,
parameters2
;
force
.
getAngleParameters
(
group1
,
particle1
,
particle2
,
particle3
,
parameters1
);
force
.
getAngleParameters
(
group2
,
particle1
,
particle2
,
particle3
,
parameters2
);
for
(
int
i
=
0
;
i
<
(
int
)
parameters1
.
size
();
i
++
)
if
(
parameters1
[
i
]
!=
parameters2
[
i
])
return
false
;
return
true
;
}
private:
const
CustomAngleForce
&
force
;
};
CudaCalcCustomAngleForceKernel
::~
CudaCalcCustomAngleForceKernel
()
{
}
void
CudaCalcCustomAngleForceKernel
::
initialize
(
const
System
&
system
,
const
CustomAngleForce
&
force
)
{
numAngles
=
force
.
getNumAngles
();
vector
<
int
>
particle1
(
numAngles
);
vector
<
int
>
particle2
(
numAngles
);
vector
<
int
>
particle3
(
numAngles
);
vector
<
vector
<
double
>
>
params
(
numAngles
);
for
(
int
i
=
0
;
i
<
numAngles
;
i
++
)
force
.
getAngleParameters
(
i
,
particle1
[
i
],
particle2
[
i
],
particle3
[
i
],
params
[
i
]);
vector
<
string
>
paramNames
;
for
(
int
i
=
0
;
i
<
force
.
getNumPerAngleParameters
();
i
++
)
paramNames
.
push_back
(
force
.
getPerAngleParameterName
(
i
));
globalParamNames
.
resize
(
force
.
getNumGlobalParameters
());
globalParamValues
.
resize
(
force
.
getNumGlobalParameters
());
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
globalParamNames
[
i
]
=
force
.
getGlobalParameterName
(
i
);
globalParamValues
[
i
]
=
(
float
)
force
.
getGlobalParameterDefaultValue
(
i
);
}
gpuSetCustomAngleParameters
(
data
.
gpu
,
particle1
,
particle2
,
particle3
,
params
,
force
.
getEnergyFunction
(),
paramNames
,
globalParamNames
);
if
(
globalParamValues
.
size
()
>
0
)
SetCustomAngleGlobalParams
(
globalParamValues
);
data
.
gpu
->
forces
.
push_back
(
new
ForceInfo
(
force
));
}
double
CudaCalcCustomAngleForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
updateGlobalParams
(
context
);
kCalculateCustomAngleForces
(
data
.
gpu
);
return
0.0
;
}
void
CudaCalcCustomAngleForceKernel
::
updateGlobalParams
(
ContextImpl
&
context
)
{
bool
changed
=
false
;
for
(
int
i
=
0
;
i
<
(
int
)
globalParamNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
globalParamNames
[
i
]);
if
(
value
!=
globalParamValues
[
i
])
changed
=
true
;
globalParamValues
[
i
]
=
value
;
}
if
(
changed
)
SetCustomAngleGlobalParams
(
globalParamValues
);
}
class
CudaCalcPeriodicTorsionForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
PeriodicTorsionForce
&
force
)
:
force
(
force
)
{
}
int
getNumParticleGroups
()
{
return
force
.
getNumTorsions
();
}
void
getParticlesInGroup
(
int
index
,
std
::
vector
<
int
>&
particles
)
{
int
particle1
,
particle2
,
particle3
,
particle4
,
periodicity
;
double
phase
,
k
;
force
.
getTorsionParameters
(
index
,
particle1
,
particle2
,
particle3
,
particle4
,
periodicity
,
phase
,
k
);
particles
.
resize
(
4
);
particles
[
0
]
=
particle1
;
particles
[
1
]
=
particle2
;
particles
[
2
]
=
particle3
;
particles
[
3
]
=
particle4
;
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
int
particle1
,
particle2
,
particle3
,
particle4
,
periodicity1
,
periodicity2
;
double
phase1
,
phase2
,
k1
,
k2
;
force
.
getTorsionParameters
(
group1
,
particle1
,
particle2
,
particle3
,
particle4
,
periodicity1
,
phase1
,
k1
);
force
.
getTorsionParameters
(
group2
,
particle1
,
particle2
,
particle3
,
particle4
,
periodicity2
,
phase2
,
k2
);
return
(
periodicity1
==
periodicity2
&&
phase1
==
phase2
&&
k1
==
k2
);
}
private:
const
PeriodicTorsionForce
&
force
;
};
CudaCalcPeriodicTorsionForceKernel
::~
CudaCalcPeriodicTorsionForceKernel
()
{
}
void
CudaCalcPeriodicTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
PeriodicTorsionForce
&
force
)
{
data
.
hasPeriodicTorsions
=
true
;
numTorsions
=
force
.
getNumTorsions
();
const
float
RadiansToDegrees
=
(
float
)(
180.0
/
3.14159265
);
vector
<
int
>
particle1
(
numTorsions
);
vector
<
int
>
particle2
(
numTorsions
);
vector
<
int
>
particle3
(
numTorsions
);
vector
<
int
>
particle4
(
numTorsions
);
vector
<
float
>
k
(
numTorsions
);
vector
<
float
>
phase
(
numTorsions
);
vector
<
int
>
periodicity
(
numTorsions
);
for
(
int
i
=
0
;
i
<
numTorsions
;
i
++
)
{
double
kValue
,
phaseValue
;
force
.
getTorsionParameters
(
i
,
particle1
[
i
],
particle2
[
i
],
particle3
[
i
],
particle4
[
i
],
periodicity
[
i
],
phaseValue
,
kValue
);
k
[
i
]
=
(
float
)
kValue
;
phase
[
i
]
=
(
float
)
(
phaseValue
*
RadiansToDegrees
);
}
gpuSetDihedralParameters
(
data
.
gpu
,
particle1
,
particle2
,
particle3
,
particle4
,
k
,
phase
,
periodicity
);
data
.
gpu
->
forces
.
push_back
(
new
ForceInfo
(
force
));
}
double
CudaCalcPeriodicTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
return
0.0
;
}
class
CudaCalcRBTorsionForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
RBTorsionForce
&
force
)
:
force
(
force
)
{
}
int
getNumParticleGroups
()
{
return
force
.
getNumTorsions
();
}
void
getParticlesInGroup
(
int
index
,
std
::
vector
<
int
>&
particles
)
{
int
particle1
,
particle2
,
particle3
,
particle4
;
double
c0
,
c1
,
c2
,
c3
,
c4
,
c5
;
force
.
getTorsionParameters
(
index
,
particle1
,
particle2
,
particle3
,
particle4
,
c0
,
c1
,
c2
,
c3
,
c4
,
c5
);
particles
.
resize
(
4
);
particles
[
0
]
=
particle1
;
particles
[
1
]
=
particle2
;
particles
[
2
]
=
particle3
;
particles
[
3
]
=
particle4
;
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
int
particle1
,
particle2
,
particle3
,
particle4
;
double
c0a
,
c0b
,
c1a
,
c1b
,
c2a
,
c2b
,
c3a
,
c3b
,
c4a
,
c4b
,
c5a
,
c5b
;
force
.
getTorsionParameters
(
group1
,
particle1
,
particle2
,
particle3
,
particle4
,
c0a
,
c1a
,
c2a
,
c3a
,
c4a
,
c5a
);
force
.
getTorsionParameters
(
group2
,
particle1
,
particle2
,
particle3
,
particle4
,
c0b
,
c1b
,
c2b
,
c3b
,
c4b
,
c5b
);
return
(
c0a
==
c0b
&&
c1a
==
c1b
&&
c2a
==
c2b
&&
c3a
==
c3b
&&
c4a
==
c4b
&&
c5a
==
c5b
);
}
private:
const
RBTorsionForce
&
force
;
};
CudaCalcRBTorsionForceKernel
::~
CudaCalcRBTorsionForceKernel
()
{
}
void
CudaCalcRBTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
RBTorsionForce
&
force
)
{
data
.
hasRB
=
true
;
numTorsions
=
force
.
getNumTorsions
();
vector
<
int
>
particle1
(
numTorsions
);
vector
<
int
>
particle2
(
numTorsions
);
vector
<
int
>
particle3
(
numTorsions
);
vector
<
int
>
particle4
(
numTorsions
);
vector
<
float
>
c0
(
numTorsions
);
vector
<
float
>
c1
(
numTorsions
);
vector
<
float
>
c2
(
numTorsions
);
vector
<
float
>
c3
(
numTorsions
);
vector
<
float
>
c4
(
numTorsions
);
vector
<
float
>
c5
(
numTorsions
);
for
(
int
i
=
0
;
i
<
numTorsions
;
i
++
)
{
double
c
[
6
];
force
.
getTorsionParameters
(
i
,
particle1
[
i
],
particle2
[
i
],
particle3
[
i
],
particle4
[
i
],
c
[
0
],
c
[
1
],
c
[
2
],
c
[
3
],
c
[
4
],
c
[
5
]);
c0
[
i
]
=
(
float
)
c
[
0
];
c1
[
i
]
=
(
float
)
c
[
1
];
c2
[
i
]
=
(
float
)
c
[
2
];
c3
[
i
]
=
(
float
)
c
[
3
];
c4
[
i
]
=
(
float
)
c
[
4
];
c5
[
i
]
=
(
float
)
c
[
5
];
}
gpuSetRbDihedralParameters
(
data
.
gpu
,
particle1
,
particle2
,
particle3
,
particle4
,
c0
,
c1
,
c2
,
c3
,
c4
,
c5
);
data
.
gpu
->
forces
.
push_back
(
new
ForceInfo
(
force
));
}
double
CudaCalcRBTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
return
0.0
;
}
class
CudaCalcCMAPTorsionForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
CMAPTorsionForce
&
force
)
:
force
(
force
)
{
}
int
getNumParticleGroups
()
{
return
force
.
getNumTorsions
();
}
void
getParticlesInGroup
(
int
index
,
std
::
vector
<
int
>&
particles
)
{
int
map
,
a1
,
a2
,
a3
,
a4
,
b1
,
b2
,
b3
,
b4
;
force
.
getTorsionParameters
(
index
,
map
,
a1
,
a2
,
a3
,
a4
,
b1
,
b2
,
b3
,
b4
);
particles
.
resize
(
8
);
particles
[
0
]
=
a1
;
particles
[
1
]
=
a2
;
particles
[
2
]
=
a3
;
particles
[
3
]
=
a4
;
particles
[
4
]
=
b1
;
particles
[
5
]
=
b2
;
particles
[
6
]
=
b3
;
particles
[
7
]
=
b4
;
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
int
map1
,
map2
,
a1
,
a2
,
a3
,
a4
,
b1
,
b2
,
b3
,
b4
;
force
.
getTorsionParameters
(
group1
,
map1
,
a1
,
a2
,
a3
,
a4
,
b1
,
b2
,
b3
,
b4
);
force
.
getTorsionParameters
(
group2
,
map2
,
a1
,
a2
,
a3
,
a4
,
b1
,
b2
,
b3
,
b4
);
return
(
map1
==
map2
);
}
private:
const
CMAPTorsionForce
&
force
;
};
CudaCalcCMAPTorsionForceKernel
::~
CudaCalcCMAPTorsionForceKernel
()
{
if
(
coefficients
!=
NULL
)
delete
coefficients
;
if
(
mapPositions
!=
NULL
)
delete
mapPositions
;
if
(
torsionMaps
!=
NULL
)
delete
torsionMaps
;
if
(
torsionIndices
!=
NULL
)
delete
torsionIndices
;
}
void
CudaCalcCMAPTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
CMAPTorsionForce
&
force
)
{
numTorsions
=
force
.
getNumTorsions
();
if
(
numTorsions
==
0
)
return
;
int
numMaps
=
force
.
getNumMaps
();
vector
<
float4
>
coeffVec
;
vector
<
int2
>
mapPositionsVec
(
numMaps
);
vector
<
double
>
energy
;
vector
<
vector
<
double
>
>
c
;
int
currentPosition
=
0
;
mapPositions
=
new
CUDAStream
<
int2
>
(
numMaps
,
1
,
"cmapTorsionMapPositions"
);
for
(
int
i
=
0
;
i
<
numMaps
;
i
++
)
{
int
size
;
force
.
getMapParameters
(
i
,
size
,
energy
);
CMAPTorsionForceImpl
::
calcMapDerivatives
(
size
,
energy
,
c
);
(
*
mapPositions
)[
i
]
=
make_int2
(
currentPosition
,
size
);
currentPosition
+=
4
*
size
*
size
;
for
(
int
j
=
0
;
j
<
size
*
size
;
j
++
)
{
coeffVec
.
push_back
(
make_float4
(
c
[
j
][
0
],
c
[
j
][
1
],
c
[
j
][
2
],
c
[
j
][
3
]));
coeffVec
.
push_back
(
make_float4
(
c
[
j
][
4
],
c
[
j
][
5
],
c
[
j
][
6
],
c
[
j
][
7
]));
coeffVec
.
push_back
(
make_float4
(
c
[
j
][
8
],
c
[
j
][
9
],
c
[
j
][
10
],
c
[
j
][
11
]));
coeffVec
.
push_back
(
make_float4
(
c
[
j
][
12
],
c
[
j
][
13
],
c
[
j
][
14
],
c
[
j
][
15
]));
}
}
coefficients
=
new
CUDAStream
<
float4
>
((
int
)
coeffVec
.
size
(),
1
,
"cmapTorsionCoefficients"
);;
for
(
int
i
=
0
;
i
<
(
int
)
coeffVec
.
size
();
i
++
)
(
*
coefficients
)[
i
]
=
coeffVec
[
i
];
torsionMaps
=
new
CUDAStream
<
int
>
(
numTorsions
,
1
,
"cmapTorsionMaps"
);
torsionIndices
=
new
CUDAStream
<
int4
>
(
4
*
numTorsions
,
1
,
"cmapTorsionIndices"
);
vector
<
int
>
forceBufferCounter
(
system
.
getNumParticles
(),
0
);
for
(
int
i
=
0
;
i
<
numTorsions
;
i
++
)
{
int
map
,
a1
,
a2
,
a3
,
a4
,
b1
,
b2
,
b3
,
b4
;
force
.
getTorsionParameters
(
i
,
map
,
a1
,
a2
,
a3
,
a4
,
b1
,
b2
,
b3
,
b4
);
(
*
torsionMaps
)[
i
]
=
map
;
(
*
torsionIndices
)[
i
*
4
]
=
make_int4
(
a1
,
a2
,
a3
,
a4
);
(
*
torsionIndices
)[
i
*
4
+
1
]
=
make_int4
(
b1
,
b2
,
b3
,
b4
);
(
*
torsionIndices
)[
i
*
4
+
2
]
=
make_int4
(
forceBufferCounter
[
a1
]
++
,
forceBufferCounter
[
a2
]
++
,
forceBufferCounter
[
a3
]
++
,
forceBufferCounter
[
a4
]
++
);
(
*
torsionIndices
)[
i
*
4
+
3
]
=
make_int4
(
forceBufferCounter
[
b1
]
++
,
forceBufferCounter
[
b2
]
++
,
forceBufferCounter
[
b3
]
++
,
forceBufferCounter
[
b4
]
++
);
}
coefficients
->
Upload
();
mapPositions
->
Upload
();
torsionMaps
->
Upload
();
torsionIndices
->
Upload
();
int
maxBuffers
=
1
;
for
(
int
i
=
0
;
i
<
(
int
)
forceBufferCounter
.
size
();
i
++
)
maxBuffers
=
max
(
maxBuffers
,
forceBufferCounter
[
i
]);
if
(
maxBuffers
>
data
.
gpu
->
sim
.
outputBuffers
)
data
.
gpu
->
sim
.
outputBuffers
=
maxBuffers
;
data
.
gpu
->
forces
.
push_back
(
new
ForceInfo
(
force
));
}
double
CudaCalcCMAPTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
if
(
numTorsions
)
kCalculateCMAPTorsionForces
(
data
.
gpu
,
*
coefficients
,
*
mapPositions
,
*
torsionIndices
,
*
torsionMaps
);
return
0.0
;
}
class
CudaCalcCustomTorsionForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
CustomTorsionForce
&
force
)
:
force
(
force
)
{
}
int
getNumParticleGroups
()
{
return
force
.
getNumTorsions
();
}
void
getParticlesInGroup
(
int
index
,
std
::
vector
<
int
>&
particles
)
{
int
particle1
,
particle2
,
particle3
,
particle4
;
vector
<
double
>
parameters
;
force
.
getTorsionParameters
(
index
,
particle1
,
particle2
,
particle3
,
particle4
,
parameters
);
particles
.
resize
(
4
);
particles
[
0
]
=
particle1
;
particles
[
1
]
=
particle2
;
particles
[
2
]
=
particle3
;
particles
[
3
]
=
particle4
;
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
int
particle1
,
particle2
,
particle3
,
particle4
;
vector
<
double
>
parameters1
,
parameters2
;
force
.
getTorsionParameters
(
group1
,
particle1
,
particle2
,
particle3
,
particle4
,
parameters1
);
force
.
getTorsionParameters
(
group2
,
particle1
,
particle2
,
particle3
,
particle4
,
parameters2
);
for
(
int
i
=
0
;
i
<
(
int
)
parameters1
.
size
();
i
++
)
if
(
parameters1
[
i
]
!=
parameters2
[
i
])
return
false
;
return
true
;
}
private:
const
CustomTorsionForce
&
force
;
};
CudaCalcCustomTorsionForceKernel
::~
CudaCalcCustomTorsionForceKernel
()
{
}
void
CudaCalcCustomTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
CustomTorsionForce
&
force
)
{
numTorsions
=
force
.
getNumTorsions
();
vector
<
int
>
particle1
(
numTorsions
);
vector
<
int
>
particle2
(
numTorsions
);
vector
<
int
>
particle3
(
numTorsions
);
vector
<
int
>
particle4
(
numTorsions
);
vector
<
vector
<
double
>
>
params
(
numTorsions
);
for
(
int
i
=
0
;
i
<
numTorsions
;
i
++
)
force
.
getTorsionParameters
(
i
,
particle1
[
i
],
particle2
[
i
],
particle3
[
i
],
particle4
[
i
],
params
[
i
]);
vector
<
string
>
paramNames
;
for
(
int
i
=
0
;
i
<
force
.
getNumPerTorsionParameters
();
i
++
)
paramNames
.
push_back
(
force
.
getPerTorsionParameterName
(
i
));
globalParamNames
.
resize
(
force
.
getNumGlobalParameters
());
globalParamValues
.
resize
(
force
.
getNumGlobalParameters
());
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
globalParamNames
[
i
]
=
force
.
getGlobalParameterName
(
i
);
globalParamValues
[
i
]
=
(
float
)
force
.
getGlobalParameterDefaultValue
(
i
);
}
gpuSetCustomTorsionParameters
(
data
.
gpu
,
particle1
,
particle2
,
particle3
,
particle4
,
params
,
force
.
getEnergyFunction
(),
paramNames
,
globalParamNames
);
if
(
globalParamValues
.
size
()
>
0
)
SetCustomTorsionGlobalParams
(
globalParamValues
);
data
.
gpu
->
forces
.
push_back
(
new
ForceInfo
(
force
));
}
double
CudaCalcCustomTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
updateGlobalParams
(
context
);
kCalculateCustomTorsionForces
(
data
.
gpu
);
return
0.0
;
}
void
CudaCalcCustomTorsionForceKernel
::
updateGlobalParams
(
ContextImpl
&
context
)
{
bool
changed
=
false
;
for
(
int
i
=
0
;
i
<
(
int
)
globalParamNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
globalParamNames
[
i
]);
if
(
value
!=
globalParamValues
[
i
])
changed
=
true
;
globalParamValues
[
i
]
=
value
;
}
if
(
changed
)
SetCustomTorsionGlobalParams
(
globalParamValues
);
}
class
CudaCalcNonbondedForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
NonbondedForce
&
force
)
:
force
(
force
)
{
}
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
double
charge1
,
charge2
,
sigma1
,
sigma2
,
epsilon1
,
epsilon2
;
force
.
getParticleParameters
(
particle1
,
charge1
,
sigma1
,
epsilon1
);
force
.
getParticleParameters
(
particle2
,
charge2
,
sigma2
,
epsilon2
);
return
(
charge1
==
charge2
&&
sigma1
==
sigma2
&&
epsilon1
==
epsilon2
);
}
int
getNumParticleGroups
()
{
return
force
.
getNumExceptions
();
}
void
getParticlesInGroup
(
int
index
,
std
::
vector
<
int
>&
particles
)
{
int
particle1
,
particle2
;
double
chargeProd
,
sigma
,
epsilon
;
force
.
getExceptionParameters
(
index
,
particle1
,
particle2
,
chargeProd
,
sigma
,
epsilon
);
particles
.
resize
(
2
);
particles
[
0
]
=
particle1
;
particles
[
1
]
=
particle2
;
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
int
particle1
,
particle2
;
double
chargeProd1
,
chargeProd2
,
sigma1
,
sigma2
,
epsilon1
,
epsilon2
;
force
.
getExceptionParameters
(
group1
,
particle1
,
particle2
,
chargeProd1
,
sigma1
,
epsilon1
);
force
.
getExceptionParameters
(
group2
,
particle1
,
particle2
,
chargeProd2
,
sigma2
,
epsilon2
);
return
(
chargeProd1
==
chargeProd2
&&
sigma1
==
sigma2
&&
epsilon1
==
epsilon2
);
}
private:
const
NonbondedForce
&
force
;
};
CudaCalcNonbondedForceKernel
::~
CudaCalcNonbondedForceKernel
()
{
}
void
CudaCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
data
.
hasNonbonded
=
true
;
numParticles
=
force
.
getNumParticles
();
_gpuContext
*
gpu
=
data
.
gpu
;
// Identify which exceptions are 1-4 interactions.
vector
<
pair
<
int
,
int
>
>
exclusions
;
vector
<
int
>
exceptions
;
for
(
int
i
=
0
;
i
<
force
.
getNumExceptions
();
i
++
)
{
int
particle1
,
particle2
;
double
chargeProd
,
sigma
,
epsilon
;
force
.
getExceptionParameters
(
i
,
particle1
,
particle2
,
chargeProd
,
sigma
,
epsilon
);
exclusions
.
push_back
(
pair
<
int
,
int
>
(
particle1
,
particle2
));
if
(
chargeProd
!=
0.0
||
epsilon
!=
0.0
)
exceptions
.
push_back
(
i
);
}
// Initialize nonbonded interactions.
{
vector
<
int
>
particle
(
numParticles
);
vector
<
float
>
c6
(
numParticles
);
vector
<
float
>
c12
(
numParticles
);
vector
<
float
>
q
(
numParticles
);
vector
<
char
>
symbol
;
vector
<
vector
<
int
>
>
exclusionList
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
double
charge
,
radius
,
depth
;
force
.
getParticleParameters
(
i
,
charge
,
radius
,
depth
);
particle
[
i
]
=
i
;
q
[
i
]
=
(
float
)
charge
;
c6
[
i
]
=
(
float
)
(
4
*
depth
*
pow
(
radius
,
6.0
));
c12
[
i
]
=
(
float
)
(
4
*
depth
*
pow
(
radius
,
12.0
));
exclusionList
[
i
].
push_back
(
i
);
}
for
(
int
i
=
0
;
i
<
(
int
)
exclusions
.
size
();
i
++
)
{
exclusionList
[
exclusions
[
i
].
first
].
push_back
(
exclusions
[
i
].
second
);
exclusionList
[
exclusions
[
i
].
second
].
push_back
(
exclusions
[
i
].
first
);
}
CudaNonbondedMethod
method
=
NO_CUTOFF
;
if
(
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
NoCutoff
)
{
gpuSetNonbondedCutoff
(
gpu
,
(
float
)
force
.
getCutoffDistance
(),
(
float
)
force
.
getReactionFieldDielectric
());
method
=
CUTOFF
;
}
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
CutoffPeriodic
)
{
method
=
PERIODIC
;
}
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
||
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
)
{
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
)
{
double
alpha
;
int
kmaxx
,
kmaxy
,
kmaxz
;
NonbondedForceImpl
::
calcEwaldParameters
(
system
,
force
,
alpha
,
kmaxx
,
kmaxy
,
kmaxz
);
gpuSetEwaldParameters
(
gpu
,
(
float
)
alpha
,
kmaxx
,
kmaxy
,
kmaxz
);
method
=
EWALD
;
}
else
{
double
alpha
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
alpha
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
gpuSetPMEParameters
(
gpu
,
(
float
)
alpha
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
method
=
PARTICLE_MESH_EWALD
;
}
}
data
.
nonbondedMethod
=
method
;
gpuSetCoulombParameters
(
gpu
,
(
float
)
ONE_4PI_EPS0
,
particle
,
c6
,
c12
,
q
,
symbol
,
exclusionList
,
method
);
// Compute the Ewald self energy.
data
.
ewaldSelfEnergy
=
0.0
;
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
||
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
)
{
double
selfEnergyScale
=
gpu
->
sim
.
epsfac
*
gpu
->
sim
.
alphaEwald
/
std
::
sqrt
(
PI
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
data
.
ewaldSelfEnergy
-=
selfEnergyScale
*
q
[
i
]
*
q
[
i
];
}
// Compute the long range dispersion correction.
if
(
force
.
getUseDispersionCorrection
())
data
.
dispersionCoefficient
=
NonbondedForceImpl
::
calcDispersionCorrection
(
system
,
force
);
else
data
.
dispersionCoefficient
=
0.0
;
}
// Initialize 1-4 nonbonded interactions.
{
int
numExceptions
=
exceptions
.
size
();
vector
<
int
>
particle1
(
numExceptions
);
vector
<
int
>
particle2
(
numExceptions
);
vector
<
float
>
c6
(
numExceptions
);
vector
<
float
>
c12
(
numExceptions
);
vector
<
float
>
q1
(
numExceptions
);
vector
<
float
>
q2
(
numExceptions
);
for
(
int
i
=
0
;
i
<
numExceptions
;
i
++
)
{
double
charge
,
sig
,
eps
;
force
.
getExceptionParameters
(
exceptions
[
i
],
particle1
[
i
],
particle2
[
i
],
charge
,
sig
,
eps
);
c6
[
i
]
=
(
float
)
(
4
*
eps
*
pow
(
sig
,
6.0
));
c12
[
i
]
=
(
float
)
(
4
*
eps
*
pow
(
sig
,
12.0
));
q1
[
i
]
=
(
float
)
charge
;
q2
[
i
]
=
1.0
f
;
}
gpuSetLJ14Parameters
(
gpu
,
(
float
)
ONE_4PI_EPS0
,
1.0
f
,
particle1
,
particle2
,
c6
,
c12
,
q1
,
q2
);
}
data
.
gpu
->
forces
.
push_back
(
new
ForceInfo
(
force
));
}
double
CudaCalcNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
return
0.0
;
}
class
CudaCalcCustomNonbondedForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
CustomNonbondedForce
&
force
)
:
force
(
force
)
{
}
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
vector
<
double
>
params1
;
vector
<
double
>
params2
;
force
.
getParticleParameters
(
particle1
,
params1
);
force
.
getParticleParameters
(
particle2
,
params2
);
for
(
int
i
=
0
;
i
<
(
int
)
params1
.
size
();
i
++
)
if
(
params1
[
i
]
!=
params2
[
i
])
return
false
;
return
true
;
}
int
getNumParticleGroups
()
{
return
force
.
getNumExclusions
();
}
void
getParticlesInGroup
(
int
index
,
std
::
vector
<
int
>&
particles
)
{
int
particle1
,
particle2
;
force
.
getExclusionParticles
(
index
,
particle1
,
particle2
);
particles
.
resize
(
2
);
particles
[
0
]
=
particle1
;
particles
[
1
]
=
particle2
;
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
return
true
;
}
private:
const
CustomNonbondedForce
&
force
;
};
CudaCalcCustomNonbondedForceKernel
::~
CudaCalcCustomNonbondedForceKernel
()
{
}
void
CudaCalcCustomNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
CustomNonbondedForce
&
force
)
{
data
.
hasCustomNonbonded
=
true
;
numParticles
=
force
.
getNumParticles
();
_gpuContext
*
gpu
=
data
.
gpu
;
// Initialize nonbonded interactions.
vector
<
int
>
particle
(
numParticles
);
vector
<
vector
<
double
>
>
parameters
(
numParticles
);
vector
<
vector
<
int
>
>
exclusionList
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
force
.
getParticleParameters
(
i
,
parameters
[
i
]);
particle
[
i
]
=
i
;
exclusionList
[
i
].
push_back
(
i
);
}
for
(
int
i
=
0
;
i
<
force
.
getNumExclusions
();
i
++
)
{
int
particle1
,
particle2
;
force
.
getExclusionParticles
(
i
,
particle1
,
particle2
);
exclusionList
[
particle1
].
push_back
(
particle2
);
exclusionList
[
particle2
].
push_back
(
particle1
);
}
CudaNonbondedMethod
method
=
NO_CUTOFF
;
if
(
force
.
getNonbondedMethod
()
!=
CustomNonbondedForce
::
NoCutoff
)
method
=
CUTOFF
;
if
(
force
.
getNonbondedMethod
()
==
CustomNonbondedForce
::
CutoffPeriodic
)
{
method
=
PERIODIC
;
}
data
.
customNonbondedMethod
=
method
;
// Record the tabulated functions.
for
(
int
i
=
0
;
i
<
force
.
getNumFunctions
();
i
++
)
{
string
name
;
vector
<
double
>
values
;
double
min
,
max
;
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
gpuSetTabulatedFunction
(
gpu
,
i
,
name
,
values
,
min
,
max
);
}
// Record information for the expressions.
vector
<
string
>
paramNames
;
for
(
int
i
=
0
;
i
<
force
.
getNumPerParticleParameters
();
i
++
)
paramNames
.
push_back
(
force
.
getPerParticleParameterName
(
i
));
globalParamNames
.
resize
(
force
.
getNumGlobalParameters
());
globalParamValues
.
resize
(
force
.
getNumGlobalParameters
());
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
globalParamNames
[
i
]
=
force
.
getGlobalParameterName
(
i
);
globalParamValues
[
i
]
=
(
float
)
force
.
getGlobalParameterDefaultValue
(
i
);
}
gpuSetCustomNonbondedParameters
(
gpu
,
parameters
,
exclusionList
,
method
,
(
float
)
force
.
getCutoffDistance
(),
force
.
getEnergyFunction
(),
paramNames
,
globalParamNames
);
if
(
globalParamValues
.
size
()
>
0
)
SetCustomNonbondedGlobalParams
(
globalParamValues
);
data
.
gpu
->
forces
.
push_back
(
new
ForceInfo
(
force
));
}
double
CudaCalcCustomNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
updateGlobalParams
(
context
);
return
0.0
;
}
void
CudaCalcCustomNonbondedForceKernel
::
updateGlobalParams
(
ContextImpl
&
context
)
{
bool
changed
=
false
;
for
(
int
i
=
0
;
i
<
(
int
)
globalParamNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
globalParamNames
[
i
]);
if
(
value
!=
globalParamValues
[
i
])
changed
=
true
;
globalParamValues
[
i
]
=
value
;
}
if
(
changed
)
SetCustomNonbondedGlobalParams
(
globalParamValues
);
}
class
CudaCalcGBSAOBCForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
GBSAOBCForce
&
force
)
:
force
(
force
)
{
}
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
double
charge1
,
charge2
,
radius1
,
radius2
,
scale1
,
scale2
;
force
.
getParticleParameters
(
particle1
,
charge1
,
radius1
,
scale1
);
force
.
getParticleParameters
(
particle2
,
charge2
,
radius2
,
scale2
);
return
(
charge1
==
charge2
&&
radius1
==
radius2
&&
scale1
==
scale2
);
}
private:
const
GBSAOBCForce
&
force
;
};
CudaCalcGBSAOBCForceKernel
::~
CudaCalcGBSAOBCForceKernel
()
{
}
void
CudaCalcGBSAOBCForceKernel
::
initialize
(
const
System
&
system
,
const
GBSAOBCForce
&
force
)
{
int
numParticles
=
system
.
getNumParticles
();
_gpuContext
*
gpu
=
data
.
gpu
;
vector
<
float
>
radius
(
numParticles
);
vector
<
float
>
scale
(
numParticles
);
vector
<
float
>
charge
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
double
particleCharge
,
particleRadius
,
scalingFactor
;
force
.
getParticleParameters
(
i
,
particleCharge
,
particleRadius
,
scalingFactor
);
radius
[
i
]
=
(
float
)
particleRadius
;
scale
[
i
]
=
(
float
)
scalingFactor
;
charge
[
i
]
=
(
float
)
particleCharge
;
}
gpuSetObcParameters
(
gpu
,
(
float
)
force
.
getSoluteDielectric
(),
(
float
)
force
.
getSolventDielectric
(),
radius
,
scale
,
charge
);
data
.
gpu
->
forces
.
push_back
(
new
ForceInfo
(
force
));
}
double
CudaCalcGBSAOBCForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
return
0.0
;
}
class
CudaCalcGBVIForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
GBVIForce
&
force
)
:
force
(
force
)
{
}
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
double
charge1
,
charge2
,
radius1
,
radius2
,
gamma1
,
gamma2
;
force
.
getParticleParameters
(
particle1
,
charge1
,
radius1
,
gamma1
);
force
.
getParticleParameters
(
particle2
,
charge2
,
radius2
,
gamma2
);
return
(
charge1
==
charge2
&&
radius1
==
radius2
&&
gamma1
==
gamma2
);
}
private:
const
GBVIForce
&
force
;
};
CudaCalcGBVIForceKernel
::~
CudaCalcGBVIForceKernel
()
{
}
void
CudaCalcGBVIForceKernel
::
initialize
(
const
System
&
system
,
const
GBVIForce
&
force
,
const
std
::
vector
<
double
>
&
inputScaledRadii
)
{
int
numParticles
=
system
.
getNumParticles
();
_gpuContext
*
gpu
=
data
.
gpu
;
vector
<
int
>
particle
(
numParticles
);
vector
<
float
>
radius
(
numParticles
);
vector
<
float
>
scaledRadii
(
numParticles
);
vector
<
float
>
gammas
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
double
charge
,
particleRadius
,
gamma
;
force
.
getParticleParameters
(
i
,
charge
,
particleRadius
,
gamma
);
particle
[
i
]
=
i
;
radius
[
i
]
=
(
float
)
particleRadius
;
gammas
[
i
]
=
(
float
)
gamma
;
scaledRadii
[
i
]
=
(
float
)
inputScaledRadii
[
i
];
}
gpuSetGBVIParameters
(
gpu
,
(
float
)
force
.
getSoluteDielectric
(),
(
float
)
force
.
getSolventDielectric
(),
particle
,
radius
,
gammas
,
scaledRadii
);
data
.
gpu
->
forces
.
push_back
(
new
ForceInfo
(
force
));
}
double
CudaCalcGBVIForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
return
0.0
;
}
class
CudaCalcCustomExternalForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
CustomExternalForce
&
force
,
int
numParticles
)
:
force
(
force
),
indices
(
numParticles
,
-
1
)
{
vector
<
double
>
params
;
for
(
int
i
=
0
;
i
<
force
.
getNumParticles
();
i
++
)
{
int
particle
;
force
.
getParticleParameters
(
i
,
particle
,
params
);
indices
[
particle
]
=
i
;
}
}
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
particle1
=
indices
[
particle1
];
particle2
=
indices
[
particle2
];
if
(
particle1
==
-
1
&&
particle2
==
-
1
)
return
true
;
if
(
particle1
==
-
1
||
particle2
==
-
1
)
return
false
;
int
temp
;
vector
<
double
>
params1
;
vector
<
double
>
params2
;
force
.
getParticleParameters
(
particle1
,
temp
,
params1
);
force
.
getParticleParameters
(
particle2
,
temp
,
params2
);
for
(
int
i
=
0
;
i
<
(
int
)
params1
.
size
();
i
++
)
if
(
params1
[
i
]
!=
params2
[
i
])
return
false
;
return
true
;
}
private:
const
CustomExternalForce
&
force
;
vector
<
int
>
indices
;
};
CudaCalcCustomExternalForceKernel
::~
CudaCalcCustomExternalForceKernel
()
{
}
void
CudaCalcCustomExternalForceKernel
::
initialize
(
const
System
&
system
,
const
CustomExternalForce
&
force
)
{
numParticles
=
force
.
getNumParticles
();
vector
<
int
>
particle
(
numParticles
);
vector
<
vector
<
double
>
>
params
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
force
.
getParticleParameters
(
i
,
particle
[
i
],
params
[
i
]);
vector
<
string
>
paramNames
;
for
(
int
i
=
0
;
i
<
force
.
getNumPerParticleParameters
();
i
++
)
paramNames
.
push_back
(
force
.
getPerParticleParameterName
(
i
));
globalParamNames
.
resize
(
force
.
getNumGlobalParameters
());
globalParamValues
.
resize
(
force
.
getNumGlobalParameters
());
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
globalParamNames
[
i
]
=
force
.
getGlobalParameterName
(
i
);
globalParamValues
[
i
]
=
(
float
)
force
.
getGlobalParameterDefaultValue
(
i
);
}
gpuSetCustomExternalParameters
(
data
.
gpu
,
particle
,
params
,
force
.
getEnergyFunction
(),
paramNames
,
globalParamNames
);
if
(
globalParamValues
.
size
()
>
0
)
SetCustomExternalGlobalParams
(
globalParamValues
);
data
.
gpu
->
forces
.
push_back
(
new
ForceInfo
(
force
,
system
.
getNumParticles
()));
}
double
CudaCalcCustomExternalForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
updateGlobalParams
(
context
);
kCalculateCustomExternalForces
(
data
.
gpu
);
return
0.0
;
}
void
CudaCalcCustomExternalForceKernel
::
updateGlobalParams
(
ContextImpl
&
context
)
{
bool
changed
=
false
;
for
(
int
i
=
0
;
i
<
(
int
)
globalParamNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
globalParamNames
[
i
]);
if
(
value
!=
globalParamValues
[
i
])
changed
=
true
;
globalParamValues
[
i
]
=
value
;
}
if
(
changed
)
SetCustomExternalGlobalParams
(
globalParamValues
);
}
void
OPENMMCUDA_EXPORT
OpenMM
::
cudaOpenMMInitializeIntegration
(
const
System
&
system
,
CudaPlatform
::
PlatformData
&
data
,
const
Integrator
&
integrator
)
{
// Initialize any terms that haven't already been handled by a Force.
_gpuContext
*
gpu
=
data
.
gpu
;
if
(
!
data
.
hasBonds
)
gpuSetBondParameters
(
gpu
,
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
());
if
(
!
data
.
hasAngles
)
gpuSetBondAngleParameters
(
gpu
,
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
());
if
(
!
data
.
hasPeriodicTorsions
)
gpuSetDihedralParameters
(
gpu
,
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
int
>
());
if
(
!
data
.
hasRB
)
gpuSetRbDihedralParameters
(
gpu
,
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
());
if
(
!
data
.
hasNonbonded
)
{
gpuSetCoulombParameters
(
gpu
,
(
float
)
ONE_4PI_EPS0
,
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
char
>
(),
vector
<
vector
<
int
>
>
(),
NO_CUTOFF
);
gpuSetLJ14Parameters
(
gpu
,
(
float
)
ONE_4PI_EPS0
,
1.0
f
,
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
());
if
(
gpu
->
bIncludeGBSA
||
gpu
->
bIncludeGBVI
)
throw
OpenMMException
(
"CudaPlatform requires GBSAOBCForce and GBVIForce to be used with a NonbondedForce"
);
}
// Set masses.
int
numParticles
=
system
.
getNumParticles
();
vector
<
float
>
mass
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
mass
[
i
]
=
(
float
)
system
.
getParticleMass
(
i
);
gpuSetMass
(
gpu
,
mass
);
// Set constraints.
int
numConstraints
=
system
.
getNumConstraints
();
vector
<
int
>
particle1
(
numConstraints
);
vector
<
int
>
particle2
(
numConstraints
);
vector
<
float
>
distance
(
numConstraints
);
vector
<
float
>
invMass1
(
numConstraints
);
vector
<
float
>
invMass2
(
numConstraints
);
for
(
int
i
=
0
;
i
<
numConstraints
;
i
++
)
{
int
particle1Index
,
particle2Index
;
double
constraintDistance
;
system
.
getConstraintParameters
(
i
,
particle1Index
,
particle2Index
,
constraintDistance
);
particle1
[
i
]
=
particle1Index
;
particle2
[
i
]
=
particle2Index
;
distance
[
i
]
=
(
float
)
constraintDistance
;
invMass1
[
i
]
=
1.0
f
/
mass
[
particle1Index
];
invMass2
[
i
]
=
1.0
f
/
mass
[
particle2Index
];
}
gpuSetConstraintParameters
(
gpu
,
particle1
,
particle2
,
distance
,
invMass1
,
invMass2
,
(
float
)
integrator
.
getConstraintTolerance
());
// Finish initialization.
gpuBuildThreadBlockWorkList
(
gpu
);
gpuBuildExclusionList
(
gpu
);
gpuBuildOutputBuffers
(
gpu
);
gpuSetConstants
(
gpu
);
if
(
gpu
->
bIncludeGBSA
||
gpu
->
bIncludeGBVI
)
kClearBornSumAndForces
(
gpu
);
else
kClearForces
(
gpu
);
cudaThreadSynchronize
();
}
CudaIntegrateVerletStepKernel
::~
CudaIntegrateVerletStepKernel
()
{
}
void
CudaIntegrateVerletStepKernel
::
initialize
(
const
System
&
system
,
const
VerletIntegrator
&
integrator
)
{
cudaOpenMMInitializeIntegration
(
system
,
data
,
integrator
);
prevStepSize
=
-
1.0
;
}
void
CudaIntegrateVerletStepKernel
::
execute
(
ContextImpl
&
context
,
const
VerletIntegrator
&
integrator
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
double
stepSize
=
integrator
.
getStepSize
();
if
(
stepSize
!=
prevStepSize
)
{
// Initialize the GPU parameters.
gpuSetVerletIntegrationParameters
(
gpu
,
(
float
)
stepSize
,
0.0
f
);
gpuSetConstants
(
gpu
);
prevStepSize
=
stepSize
;
}
kVerletUpdatePart1
(
gpu
);
kApplyShake
(
gpu
);
kApplySettle
(
gpu
);
kApplyCCMA
(
gpu
);
if
(
data
.
removeCM
)
if
(
data
.
stepCount
%
data
.
cmMotionFrequency
==
0
)
gpu
->
bCalculateCM
=
true
;
kVerletUpdatePart2
(
gpu
);
data
.
time
+=
stepSize
;
data
.
stepCount
++
;
}
CudaIntegrateLangevinStepKernel
::~
CudaIntegrateLangevinStepKernel
()
{
}
void
CudaIntegrateLangevinStepKernel
::
initialize
(
const
System
&
system
,
const
LangevinIntegrator
&
integrator
)
{
cudaOpenMMInitializeIntegration
(
system
,
data
,
integrator
);
_gpuContext
*
gpu
=
data
.
gpu
;
gpu
->
seed
=
(
unsigned
long
)
integrator
.
getRandomNumberSeed
();
gpuInitializeRandoms
(
gpu
);
prevTemp
=
-
1.0
;
prevFriction
=
-
1.0
;
prevStepSize
=
-
1.0
;
}
void
CudaIntegrateLangevinStepKernel
::
execute
(
ContextImpl
&
context
,
const
LangevinIntegrator
&
integrator
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
double
temperature
=
integrator
.
getTemperature
();
double
friction
=
integrator
.
getFriction
();
double
stepSize
=
integrator
.
getStepSize
();
if
(
temperature
!=
prevTemp
||
friction
!=
prevFriction
||
stepSize
!=
prevStepSize
)
{
// Initialize the GPU parameters.
double
tau
=
(
friction
==
0.0
?
0.0
:
1.0
/
friction
);
gpuSetLangevinIntegrationParameters
(
gpu
,
(
float
)
tau
,
(
float
)
stepSize
,
(
float
)
temperature
,
0.0
f
);
gpuSetConstants
(
gpu
);
kGenerateRandoms
(
gpu
);
prevTemp
=
temperature
;
prevFriction
=
friction
;
prevStepSize
=
stepSize
;
}
kLangevinUpdatePart1
(
gpu
);
if
(
data
.
removeCM
)
if
(
data
.
stepCount
%
data
.
cmMotionFrequency
==
0
)
gpu
->
bCalculateCM
=
true
;
kLangevinUpdatePart2
(
gpu
);
kApplyShake
(
gpu
);
kApplySettle
(
gpu
);
kApplyCCMA
(
gpu
);
kSetVelocitiesFromPositions
(
gpu
);
data
.
time
+=
stepSize
;
data
.
stepCount
++
;
}
CudaIntegrateBrownianStepKernel
::~
CudaIntegrateBrownianStepKernel
()
{
}
void
CudaIntegrateBrownianStepKernel
::
initialize
(
const
System
&
system
,
const
BrownianIntegrator
&
integrator
)
{
cudaOpenMMInitializeIntegration
(
system
,
data
,
integrator
);
_gpuContext
*
gpu
=
data
.
gpu
;
gpu
->
seed
=
(
unsigned
long
)
integrator
.
getRandomNumberSeed
();
gpuInitializeRandoms
(
gpu
);
prevTemp
=
-
1.0
;
prevFriction
=
-
1.0
;
prevStepSize
=
-
1.0
;
}
void
CudaIntegrateBrownianStepKernel
::
execute
(
ContextImpl
&
context
,
const
BrownianIntegrator
&
integrator
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
double
temperature
=
integrator
.
getTemperature
();
double
friction
=
integrator
.
getFriction
();
double
stepSize
=
integrator
.
getStepSize
();
if
(
temperature
!=
prevTemp
||
friction
!=
prevFriction
||
stepSize
!=
prevStepSize
)
{
// Initialize the GPU parameters.
double
tau
=
(
friction
==
0.0
?
0.0
:
1.0
/
friction
);
gpuSetBrownianIntegrationParameters
(
gpu
,
(
float
)
tau
,
(
float
)
stepSize
,
(
float
)
temperature
);
gpuSetConstants
(
gpu
);
kGenerateRandoms
(
gpu
);
prevTemp
=
temperature
;
prevFriction
=
friction
;
prevStepSize
=
stepSize
;
}
kBrownianUpdatePart1
(
gpu
);
kApplyShake
(
gpu
);
kApplySettle
(
gpu
);
kApplyCCMA
(
gpu
);
if
(
data
.
removeCM
)
if
(
data
.
stepCount
%
data
.
cmMotionFrequency
==
0
)
gpu
->
bCalculateCM
=
true
;
kBrownianUpdatePart2
(
gpu
);
data
.
time
+=
stepSize
;
data
.
stepCount
++
;
}
CudaIntegrateVariableVerletStepKernel
::~
CudaIntegrateVariableVerletStepKernel
()
{
}
void
CudaIntegrateVariableVerletStepKernel
::
initialize
(
const
System
&
system
,
const
VariableVerletIntegrator
&
integrator
)
{
cudaOpenMMInitializeIntegration
(
system
,
data
,
integrator
);
prevErrorTol
=
-
1.0
;
}
void
CudaIntegrateVariableVerletStepKernel
::
execute
(
ContextImpl
&
context
,
const
VariableVerletIntegrator
&
integrator
,
double
maxTime
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
double
errorTol
=
integrator
.
getErrorTolerance
();
if
(
errorTol
!=
prevErrorTol
)
{
// Initialize the GPU parameters.
gpuSetVerletIntegrationParameters
(
gpu
,
0.0
f
,
(
float
)
errorTol
);
gpuSetConstants
(
gpu
);
prevErrorTol
=
errorTol
;
}
float
maxStepSize
=
(
float
)(
maxTime
-
data
.
time
);
kSelectVerletStepSize
(
gpu
,
maxStepSize
);
kVerletUpdatePart1
(
gpu
);
kApplyShake
(
gpu
);
kApplySettle
(
gpu
);
kApplyCCMA
(
gpu
);
if
(
data
.
removeCM
)
if
(
data
.
stepCount
%
data
.
cmMotionFrequency
==
0
)
gpu
->
bCalculateCM
=
true
;
kVerletUpdatePart2
(
gpu
);
gpu
->
psStepSize
->
Download
();
data
.
time
+=
(
*
gpu
->
psStepSize
)[
0
].
y
;
if
((
*
gpu
->
psStepSize
)[
0
].
y
==
maxStepSize
)
data
.
time
=
maxTime
;
// Avoid round-off error
data
.
stepCount
++
;
}
CudaIntegrateVariableLangevinStepKernel
::~
CudaIntegrateVariableLangevinStepKernel
()
{
}
void
CudaIntegrateVariableLangevinStepKernel
::
initialize
(
const
System
&
system
,
const
VariableLangevinIntegrator
&
integrator
)
{
cudaOpenMMInitializeIntegration
(
system
,
data
,
integrator
);
_gpuContext
*
gpu
=
data
.
gpu
;
gpu
->
seed
=
(
unsigned
long
)
integrator
.
getRandomNumberSeed
();
gpuInitializeRandoms
(
gpu
);
prevTemp
=
-
1.0
;
prevFriction
=
-
1.0
;
prevErrorTol
=
-
1.0
;
}
void
CudaIntegrateVariableLangevinStepKernel
::
execute
(
ContextImpl
&
context
,
const
VariableLangevinIntegrator
&
integrator
,
double
maxTime
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
double
temperature
=
integrator
.
getTemperature
();
double
friction
=
integrator
.
getFriction
();
double
errorTol
=
integrator
.
getErrorTolerance
();
if
(
temperature
!=
prevTemp
||
friction
!=
prevFriction
||
errorTol
!=
prevErrorTol
)
{
// Initialize the GPU parameters.
double
tau
=
(
friction
==
0.0
?
0.0
:
1.0
/
friction
);
gpuSetLangevinIntegrationParameters
(
gpu
,
(
float
)
tau
,
0.0
f
,
(
float
)
temperature
,
(
float
)
errorTol
);
gpuSetConstants
(
gpu
);
kGenerateRandoms
(
gpu
);
prevTemp
=
temperature
;
prevFriction
=
friction
;
prevErrorTol
=
errorTol
;
}
float
maxStepSize
=
(
float
)(
maxTime
-
data
.
time
);
kSelectLangevinStepSize
(
gpu
,
maxStepSize
);
kLangevinUpdatePart1
(
gpu
);
if
(
data
.
removeCM
)
if
(
data
.
stepCount
%
data
.
cmMotionFrequency
==
0
)
gpu
->
bCalculateCM
=
true
;
kLangevinUpdatePart2
(
gpu
);
kApplyShake
(
gpu
);
kApplySettle
(
gpu
);
kApplyCCMA
(
gpu
);
kSetVelocitiesFromPositions
(
gpu
);
gpu
->
psStepSize
->
Download
();
data
.
time
+=
(
*
gpu
->
psStepSize
)[
0
].
y
;
if
((
*
gpu
->
psStepSize
)[
0
].
y
==
maxStepSize
)
data
.
time
=
maxTime
;
// Avoid round-off error
data
.
stepCount
++
;
}
CudaApplyAndersenThermostatKernel
::~
CudaApplyAndersenThermostatKernel
()
{
if
(
atomGroups
!=
NULL
)
delete
atomGroups
;
}
void
CudaApplyAndersenThermostatKernel
::
initialize
(
const
System
&
system
,
const
AndersenThermostat
&
thermostat
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
gpu
->
seed
=
(
unsigned
long
)
thermostat
.
getRandomNumberSeed
();
gpuInitializeRandoms
(
gpu
);
prevTemp
=
-
1.0
;
prevFrequency
=
-
1.0
;
prevStepSize
=
-
1.0
;
// Create the arrays with the group definitions.
vector
<
vector
<
int
>
>
groups
=
AndersenThermostatImpl
::
calcParticleGroups
(
system
);
atomGroups
=
new
CUDAStream
<
int
>
(
system
.
getNumParticles
(),
1
,
"atomGroups"
);
for
(
int
i
=
0
;
i
<
(
int
)
groups
.
size
();
i
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
groups
[
i
].
size
();
j
++
)
(
*
atomGroups
)[
groups
[
i
][
j
]]
=
i
;
}
atomGroups
->
Upload
();
}
void
CudaApplyAndersenThermostatKernel
::
execute
(
ContextImpl
&
context
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
double
temperature
=
context
.
getParameter
(
AndersenThermostat
::
Temperature
());
double
frequency
=
context
.
getParameter
(
AndersenThermostat
::
CollisionFrequency
());
double
stepSize
=
context
.
getIntegrator
().
getStepSize
();
if
(
temperature
!=
prevTemp
||
frequency
!=
prevFrequency
||
stepSize
!=
prevStepSize
)
{
// Initialize the GPU parameters.
gpuSetAndersenThermostatParameters
(
gpu
,
(
float
)
temperature
,
(
float
)
frequency
);
gpuSetConstants
(
gpu
);
kGenerateRandoms
(
gpu
);
prevTemp
=
temperature
;
prevFrequency
=
frequency
;
prevStepSize
=
stepSize
;
}
kCalculateAndersenThermostat
(
gpu
,
*
atomGroups
);
}
CudaApplyMonteCarloBarostatKernel
::~
CudaApplyMonteCarloBarostatKernel
()
{
if
(
moleculeAtoms
!=
NULL
)
delete
moleculeAtoms
;
if
(
moleculeStartIndex
!=
NULL
)
delete
moleculeStartIndex
;
}
void
CudaApplyMonteCarloBarostatKernel
::
initialize
(
const
System
&
system
,
const
MonteCarloBarostat
&
thermostat
)
{
}
void
CudaApplyMonteCarloBarostatKernel
::
scaleCoordinates
(
ContextImpl
&
context
,
double
scale
)
{
if
(
!
hasInitializedMolecules
)
{
hasInitializedMolecules
=
true
;
// Create the arrays with the molecule definitions.
vector
<
vector
<
int
>
>
molecules
=
context
.
getMolecules
();
numMolecules
=
molecules
.
size
();
moleculeAtoms
=
new
CUDAStream
<
int
>
(
context
.
getSystem
().
getNumParticles
(),
1
,
"moleculeAtoms"
);
moleculeStartIndex
=
new
CUDAStream
<
int
>
(
numMolecules
+
1
,
1
,
"moleculeStartIndex"
);
int
index
=
0
;
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
(
*
moleculeStartIndex
)[
i
]
=
index
;
for
(
int
j
=
0
;
j
<
(
int
)
molecules
[
i
].
size
();
j
++
)
(
*
moleculeAtoms
)[
index
++
]
=
molecules
[
i
][
j
];
}
(
*
moleculeStartIndex
)[
numMolecules
]
=
index
;
moleculeAtoms
->
Upload
();
moleculeStartIndex
->
Upload
();
}
_gpuContext
*
gpu
=
data
.
gpu
;
gpu
->
psPosqP4
->
CopyFrom
(
*
gpu
->
psPosq4
);
kScaleAtomCoordinates
(
gpu
,
scale
,
*
moleculeAtoms
,
*
moleculeStartIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
posCellOffsets
.
size
();
i
++
)
gpu
->
posCellOffsets
[
i
]
=
make_int3
(
0
,
0
,
0
);
}
void
CudaApplyMonteCarloBarostatKernel
::
restoreCoordinates
(
ContextImpl
&
context
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
gpu
->
psPosq4
->
CopyFrom
(
*
gpu
->
psPosqP4
);
}
void
CudaCalcKineticEnergyKernel
::
initialize
(
const
System
&
system
)
{
int
numParticles
=
system
.
getNumParticles
();
masses
.
resize
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
masses
[
i
]
=
system
.
getParticleMass
(
i
);
}
double
CudaCalcKineticEnergyKernel
::
execute
(
ContextImpl
&
context
)
{
// We don't currently have a GPU kernel to do this, so we retrieve the velocities and calculate the energy
// on the CPU.
_gpuContext
*
gpu
=
data
.
gpu
;
gpu
->
psVelm4
->
Download
();
double
energy
=
0.0
;
for
(
int
i
=
0
;
i
<
(
int
)
masses
.
size
();
++
i
)
{
float4
v
=
(
*
gpu
->
psVelm4
)[
i
];
energy
+=
masses
[
i
]
*
(
v
.
x
*
v
.
x
+
v
.
y
*
v
.
y
+
v
.
z
*
v
.
z
);
}
return
0.5
*
energy
;
}
void
CudaRemoveCMMotionKernel
::
initialize
(
const
System
&
system
,
const
CMMotionRemover
&
force
)
{
data
.
removeCM
=
true
;
data
.
cmMotionFrequency
=
force
.
getFrequency
();
}
void
CudaRemoveCMMotionKernel
::
execute
(
ContextImpl
&
context
)
{
}
...
...
platforms/cuda/src/kernels/cudatypes.h
View file @
a2674e83
#ifndef CUDATYPES_H
Vim:
Warning:
Output
is
not
to
a
terminal
#define CUDATYPES_H
[
?
1049
h
[
?
1
h
=
[
1
;
59
r
[
?
12
;
25
h
[
?
12l
[
?
25
h
[
27
m
[
m
[
H
[
2
J
[
?
25l
[
59
;
1
H
"svn-commit.tmp"
15L
,
601
C
[
1
;
1
H
[
33
m
1
2
[
m
[
32
m
--
This
line
,
and
those
below
,
will
be
ignored
--
[
m
/* -------------------------------------------------------------------------- *
[
33
m
3
* OpenMM *
4
[
m
[
1
m
[
35
mM
plugins
/
amoeba
/
platforms
/
cuda
/
src
/
AmoebaCudaKernelFactory
.
cpp
[
m
* -------------------------------------------------------------------------- *
[
33
m
5
[
m
[
1
m
[
35
mM
plugins
/
freeEnergy
/
platforms
/
reference
/
src
/
gbsa
/
CpuGBVISoftcore
.
cpp
[
m
* This is part of the OpenMM molecular simulation toolkit originating from *
[
33
m
6
[
m
[
1
m
[
35
mM
openmmapi
/
include
/
openmm
/
GBVIForce
.
h
[
m
* Simbios, the NIH National Center for Physics-Based Simulation of *
[
33
m
7
[
m
[
1
m
[
35
mM
openmmapi
/
src
/
GBVIForce
.
cpp
[
m
* Biological Structures at Stanford, funded under the NIH Roadmap for *
[
33
m
8
[
m
[
1
m
[
35
mM
olla
/
src
/
Platform
.
cpp
[
m
* Medical Research, grant U54 GM072970. See https://simtk.org. *
[
33
m
9
[
m
[
1
m
[
35
mM
platforms
/
opencl
/
src
/
OpenCLContext
.
h
[
m
* *
[
33
m
10
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
CudaKernels
.
cpp
[
m
* Portions copyright (c) 2009 Stanford University and the Authors. *
[
33
m
11
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kCalculateGBVIBornSum
.
cu
[
m
* Authors: Scott Le Grand, Peter Eastman *
[
33
m
12
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gputypes
.
h
[
m
* Contributors: *
[
33
m
13
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
cudatypes
.
h
[
m
* *
[
33
m
14
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kForces
.
cu
[
m
* This program is free software: you can redistribute it and/or modify *
[
33
m
15
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gpu
.
cpp
[
m
* it under the terms of the GNU Lesser General Public License as published *
[
1
m
[
34
m
~
[
17
;
1
H
~
[
18
;
1
H
~
[
19
;
1
H
~
[
20
;
1
H
~
[
21
;
1
H
~
[
22
;
1
H
~
[
23
;
1
H
~
[
24
;
1
H
~
[
25
;
1
H
~
[
26
;
1
H
~
[
27
;
1
H
~
[
28
;
1
H
~
[
29
;
1
H
~
[
30
;
1
H
~
[
31
;
1
H
~
[
32
;
1
H
~
[
33
;
1
H
~
[
34
;
1
H
~
[
35
;
1
H
~
[
36
;
1
H
~
[
37
;
1
H
~
[
38
;
1
H
~
[
39
;
1
H
~
[
40
;
1
H
~
[
41
;
1
H
~
[
42
;
1
H
~
[
43
;
1
H
~
[
44
;
1
H
~
[
45
;
1
H
~
[
46
;
1
H
~
[
47
;
1
H
~
[
48
;
1
H
~
[
49
;
1
H
~
[
50
;
1
H
~
[
51
;
1
H
~
[
52
;
1
H
~
[
53
;
1
H
~
[
54
;
1
H
~
[
55
;
1
H
~
[
56
;
1
H
~
[
57
;
1
H
~
[
58
;
1
H
~
[
m
[
59
;
217
H1
,
0
-
1
[
9
CAll
[
1
;
5
H
[
?
12l
[
?
25
h
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
:
[
?
12l
[
?
25
hq
!
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
[
?
1l
>
[
?
12l
[
?
25
h
[
?
1049l
* by the Free Software Foundation, either version 3 of the License, or *
Log
message
unchanged
or
not
specified
* (at your option) any later version. *
a
)
bort
,
c
)
ontinue
,
e
)
dit
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <stdarg.h>
#include <limits>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cufft.h>
#include <builtin_types.h>
#include <vector_functions.h>
#define RTERROR(status, s) \
if
(
status
!=
cudaSuccess
)
{
\
printf
(
"%s %s
\n
"
,
s
,
cudaGetErrorString
(
status
));
\
exit
(
-
1
);
\
}
#define LAUNCHERROR(s) \
{
\
cudaError_t
status
=
cudaGetLastError
();
\
if
(
status
!=
cudaSuccess
)
{
\
printf
(
"Error: %s launching kernel %s
\n
"
,
cudaGetErrorString
(
status
),
s
);
\
exit
(
-
1
);
\
}
\
}
// Pure virtual class to define an interface for objects resident both on GPU and CPU
struct
SoADeviceObject
{
virtual
void
Allocate
()
=
0
;
virtual
void
Deallocate
()
=
0
;
virtual
void
Upload
()
=
0
;
virtual
void
Download
()
=
0
;
};
template
<
typename
T
>
struct
CUDAStream
:
public
SoADeviceObject
{
unsigned
int
_length
;
unsigned
int
_subStreams
;
unsigned
int
_stride
;
T
**
_pSysStream
;
T
**
_pDevStream
;
T
*
_pSysData
;
T
*
_pDevData
;
std
::
string
_name
;
CUDAStream
(
int
length
,
int
subStreams
=
1
,
std
::
string
name
=
""
);
CUDAStream
(
unsigned
int
length
,
unsigned
int
subStreams
=
1
,
std
::
string
name
=
""
);
CUDAStream
(
unsigned
int
length
,
int
subStreams
=
1
,
std
::
string
name
=
""
);
CUDAStream
(
int
length
,
unsigned
int
subStreams
=
1
,
std
::
string
name
=
""
);
virtual
~
CUDAStream
();
void
Allocate
();
void
Deallocate
();
void
Upload
();
void
Download
();
void
CopyFrom
(
const
CUDAStream
<
T
>&
src
);
void
Collapse
(
unsigned
int
newstreams
=
1
,
unsigned
int
interleave
=
1
);
T
&
operator
[](
int
index
);
};
float
CompareStreams
(
CUDAStream
<
float
>&
s1
,
CUDAStream
<
float
>&
s2
,
float
tolerance
,
unsigned
int
maxindex
=
0
);
template
<
typename
T
>
CUDAStream
<
T
>::
CUDAStream
(
int
length
,
unsigned
int
subStreams
,
std
::
string
name
)
:
_length
(
length
),
_subStreams
(
subStreams
),
_stride
((
length
+
0xf
)
&
0xfffffff0
),
_name
(
name
)
{
Allocate
();
}
template
<
typename
T
>
CUDAStream
<
T
>::
CUDAStream
(
unsigned
int
length
,
int
subStreams
,
std
::
string
name
)
:
_length
(
length
),
_subStreams
(
subStreams
),
_stride
((
length
+
0xf
)
&
0xfffffff0
),
_name
(
name
)
{
Allocate
();
}
template
<
typename
T
>
CUDAStream
<
T
>::
CUDAStream
(
unsigned
int
length
,
unsigned
int
subStreams
,
std
::
string
name
)
:
_length
(
length
),
_subStreams
(
subStreams
),
_stride
((
length
+
0xf
)
&
0xfffffff0
),
_name
(
name
)
{
Allocate
();
}
template
<
typename
T
>
CUDAStream
<
T
>::
CUDAStream
(
int
length
,
int
subStreams
,
std
::
string
name
)
:
_length
(
length
),
_subStreams
(
subStreams
),
_stride
((
length
+
0xf
)
&
0xfffffff0
),
_name
(
name
)
{
Allocate
();
}
template
<
typename
T
>
CUDAStream
<
T
>::~
CUDAStream
()
{
Deallocate
();
}
template
<
typename
T
>
void
CUDAStream
<
T
>::
Allocate
()
{
cudaError_t
status
;
_pSysStream
=
new
T
*
[
_subStreams
];
_pDevStream
=
new
T
*
[
_subStreams
];
_pSysData
=
new
T
[
_subStreams
*
_stride
];
status
=
cudaMalloc
((
void
**
)
&
_pDevData
,
_stride
*
_subStreams
*
sizeof
(
T
));
RTERROR
(
status
,
(
_name
+
": cudaMalloc in CUDAStream::Allocate failed"
).
c_str
());
for
(
unsigned
int
i
=
0
;
i
<
_subStreams
;
i
++
)
{
_pSysStream
[
i
]
=
_pSysData
+
i
*
_stride
;
_pDevStream
[
i
]
=
_pDevData
+
i
*
_stride
;
}
}
template
<
typename
T
>
void
CUDAStream
<
T
>::
Deallocate
()
{
cudaError_t
status
;
delete
[]
_pSysStream
;
_pSysStream
=
NULL
;
delete
[]
_pDevStream
;
_pDevStream
=
NULL
;
delete
[]
_pSysData
;
_pSysData
=
NULL
;
status
=
cudaFree
(
_pDevData
);
RTERROR
(
status
,
(
_name
+
": cudaFree in CUDAStream::Deallocate failed"
).
c_str
());
}
template
<
typename
T
>
void
CUDAStream
<
T
>::
Upload
()
{
cudaError_t
status
;
status
=
cudaMemcpy
(
_pDevData
,
_pSysData
,
_stride
*
_subStreams
*
sizeof
(
T
),
cudaMemcpyHostToDevice
);
RTERROR
(
status
,
(
_name
+
": cudaMemcpy in CUDAStream::Upload failed"
).
c_str
());
}
template
<
typename
T
>
void
CUDAStream
<
T
>::
Download
()
{
cudaError_t
status
;
status
=
cudaMemcpy
(
_pSysData
,
_pDevData
,
_stride
*
_subStreams
*
sizeof
(
T
),
cudaMemcpyDeviceToHost
);
RTERROR
(
status
,
(
_name
+
": cudaMemcpy in CUDAStream::Download failed"
).
c_str
());
}
template
<
typename
T
>
void
CUDAStream
<
T
>::
CopyFrom
(
const
CUDAStream
<
T
>&
src
)
{
cudaError_t
status
;
status
=
cudaMemcpy
(
_pDevData
,
src
.
_pDevData
,
_stride
*
_subStreams
*
sizeof
(
T
),
cudaMemcpyDeviceToDevice
);
RTERROR
(
status
,
(
_name
+
": cudaMemcpy in CUDAStream::Copy failed"
).
c_str
());
}
template
<
typename
T
>
void
CUDAStream
<
T
>::
Collapse
(
unsigned
int
newstreams
,
unsigned
int
interleave
)
{
T
*
pTemp
=
new
T
[
_subStreams
*
_stride
];
unsigned
int
stream
=
0
;
unsigned
int
pos
=
0
;
unsigned
int
newstride
=
_stride
*
_subStreams
/
newstreams
;
unsigned
int
newlength
=
_length
*
_subStreams
/
newstreams
;
// Copy data into new format
for
(
unsigned
int
i
=
0
;
i
<
_length
;
i
++
)
{
for
(
unsigned
int
j
=
0
;
j
<
_subStreams
;
j
++
)
{
pTemp
[
stream
*
newstride
+
pos
]
=
_pSysStream
[
j
][
i
];
stream
++
;
if
(
stream
==
newstreams
)
{
stream
=
0
;
pos
++
;
}
}
}
// Remap stream pointers;
for
(
unsigned
int
i
=
0
;
i
<
newstreams
;
i
++
)
{
_pSysStream
[
i
]
=
_pSysData
+
i
*
newstride
;
_pDevStream
[
i
]
=
_pDevData
+
i
*
newstride
;
}
// Copy data back intro original stream
for
(
unsigned
int
i
=
0
;
i
<
newlength
;
i
++
)
for
(
unsigned
int
j
=
0
;
j
<
newstreams
;
j
++
)
_pSysStream
[
j
][
i
]
=
pTemp
[
j
*
newstride
+
i
];
_stride
=
newstride
;
_length
=
newlength
;
_subStreams
=
newstreams
;
delete
[]
pTemp
;
}
template
<
typename
T
>
T
&
CUDAStream
<
T
>::
operator
[](
int
index
)
{
return
_pSysData
[
index
];
}
static
const
unsigned
int
GRID
=
32
;
static
const
unsigned
int
GRIDBITS
=
5
;
static
const
int
G8X_BLOCKS_PER_SM
=
1
;
static
const
int
GT2XX_BLOCKS_PER_SM
=
1
;
static
const
int
GF1XX_BLOCKS_PER_SM
=
1
;
static
const
int
G8X_NONBOND_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_NONBOND_THREADS_PER_BLOCK
=
320
;
static
const
int
GF1XX_NONBOND_THREADS_PER_BLOCK
=
768
;
//static const int GF1XX_NONBOND_THREADS_PER_BLOCK = 768;
static
const
int
G8X_BORNFORCE2_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_BORNFORCE2_THREADS_PER_BLOCK
=
320
;
static
const
int
GF1XX_BORNFORCE2_THREADS_PER_BLOCK
=
768
;
//static const int GF1XX_BORNFORCE2_THREADS_PER_BLOCK = 768;
static
const
int
G8X_SHAKE_THREADS_PER_BLOCK
=
128
;
static
const
int
GT2XX_SHAKE_THREADS_PER_BLOCK
=
256
;
static
const
int
GF1XX_SHAKE_THREADS_PER_BLOCK
=
512
;
static
const
int
G8X_UPDATE_THREADS_PER_BLOCK
=
192
;
static
const
int
GT2XX_UPDATE_THREADS_PER_BLOCK
=
384
;
static
const
int
GF1XX_UPDATE_THREADS_PER_BLOCK
=
768
;
static
const
int
G8X_LOCALFORCES_THREADS_PER_BLOCK
=
192
;
static
const
int
GT2XX_LOCALFORCES_THREADS_PER_BLOCK
=
384
;
static
const
int
GF1XX_LOCALFORCES_THREADS_PER_BLOCK
=
768
;
static
const
int
G8X_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_THREADS_PER_BLOCK
=
256
;
static
const
int
GF1XX_THREADS_PER_BLOCK
=
512
;
static
const
int
G8X_RANDOM_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_RANDOM_THREADS_PER_BLOCK
=
384
;
static
const
int
GF1XX_RANDOM_THREADS_PER_BLOCK
=
768
;
static
const
int
G8X_NONBOND_WORKUNITS_PER_SM
=
220
;
static
const
int
GT2XX_NONBOND_WORKUNITS_PER_SM
=
256
;
static
const
int
GF1XX_NONBOND_WORKUNITS_PER_SM
=
768
;
static
const
unsigned
int
MAX_STACK_SIZE
=
8
;
static
const
unsigned
int
MAX_TABULATED_FUNCTIONS
=
4
;
static
const
float
PI
=
3.14159265358979323846
f
;
static
const
int
PME_ORDER
=
5
;
enum
CudaNonbondedMethod
{
NO_CUTOFF
,
CUTOFF
,
PERIODIC
,
EWALD
,
PARTICLE_MESH_EWALD
};
enum
ExpressionOp
{
VARIABLE0
=
0
,
VARIABLE1
,
VARIABLE2
,
VARIABLE3
,
VARIABLE4
,
VARIABLE5
,
VARIABLE6
,
VARIABLE7
,
VARIABLE8
,
MULTIPLY
,
DIVIDE
,
ADD
,
SUBTRACT
,
POWER
,
MULTIPLY_CONSTANT
,
POWER_CONSTANT
,
ADD_CONSTANT
,
GLOBAL
,
CONSTANT
,
CUSTOM
,
CUSTOM_DERIV
,
NEGATE
,
RECIPROCAL
,
SQRT
,
EXP
,
LOG
,
SQUARE
,
CUBE
,
STEP
,
SIN
,
COS
,
SEC
,
CSC
,
TAN
,
COT
,
ASIN
,
ACOS
,
ATAN
,
SINH
,
COSH
,
TANH
,
ERF
,
ERFC
,
MIN
,
MAX
,
ABS
};
template
<
int
SIZE
>
struct
Expression
{
int
op
[
SIZE
];
float
arg
[
SIZE
];
int
length
,
stackSize
;
};
struct
cudaGmxSimulation
{
// Constants
unsigned
int
atoms
;
// Number of atoms
unsigned
int
paddedNumberOfAtoms
;
// Padded number of atoms
unsigned
int
blocks
;
// Number of blocks to launch across linear kernels
unsigned
int
blocksPerSM
;
// Number of blocks per share memory
unsigned
int
nonbond_blocks
;
// Number of blocks to launch across CDLJ and Born Force Part1
unsigned
int
bornForce2_blocks
;
// Number of blocks to launch across Born Force 2
unsigned
int
interaction_blocks
;
// Number of blocks to launch when identifying interacting tiles
unsigned
int
threads_per_block
;
// Threads per block to launch
unsigned
int
nonbond_threads_per_block
;
// Threads per block in nonbond kernel calls
unsigned
int
bornForce2_threads_per_block
;
// Threads per block in nonbond kernel calls
unsigned
int
max_update_threads_per_block
;
// Maximum threads per block in update kernel calls
unsigned
int
update_threads_per_block
;
// Threads per block in update kernel calls
unsigned
int
bf_reduce_threads_per_block
;
// Threads per block in Born Force reduction calls
unsigned
int
bsf_reduce_threads_per_block
;
// Threads per block in Born Sum And Forces reduction calls
unsigned
int
max_shake_threads_per_block
;
// Maximum threads per block in shake kernel calls
unsigned
int
shake_threads_per_block
;
// Threads per block in shake kernel calls
unsigned
int
settle_threads_per_block
;
// Threads per block in SETTLE kernel calls
unsigned
int
ccma_threads_per_block
;
// Threads per block in CCMA kernel calls
unsigned
int
max_localForces_threads_per_block
;
// Threads per block in local forces kernel calls
unsigned
int
localForces_threads_per_block
;
// Threads per block in local forces kernel calls
unsigned
int
random_threads_per_block
;
// Threads per block in RNG kernel calls
unsigned
int
interaction_threads_per_block
;
// Threads per block when identifying interacting tiles
unsigned
int
custom_exception_threads_per_block
;
// Threads per block in custom nonbonded exception kernel calls
unsigned
int
customExpressionStackSize
;
// Stack size for evaluating custom nonbonded forces
unsigned
int
workUnits
;
// Number of work units
unsigned
int
*
pWorkUnit
;
// Pointer to work units
unsigned
int
*
pInteractingWorkUnit
;
// Pointer to work units that have interactions
unsigned
int
*
pInteractionFlag
;
// Flags for which work units have interactions
float2
*
pStepSize
;
// The size of the previous and current time steps
float
*
pLangevinParameters
;
// Parameters used for Langevin integration
float
errorTol
;
// Error tolerance for selecting the step size
size_t
*
pInteractionCount
;
// A count of the number of work units which have interactions
unsigned
int
nonbond_workBlock
;
// Number of work units running simultaneously per block in CDLJ and Born Force Part 1
unsigned
int
bornForce2_workBlock
;
// Number of work units running second half of Born Forces calculation
unsigned
int
workUnitsPerSM
;
// Number of workblocks per SM
unsigned
int
nbWorkUnitsPerBlock
;
// Number of work units assigned to each nonbond block
unsigned
int
nbWorkUnitsPerBlockRemainder
;
// Remainder of work units to assign across lower numbered nonbond blocks
unsigned
int
bf2WorkUnitsPerBlock
;
// Number of work units assigned to each bornForce2 block
unsigned
int
bf2WorkUnitsPerBlockRemainder
;
// Remainder of work units to assign across lower numbered bornForce2 blocks
unsigned
int
stride
;
// Atomic attributes stride
unsigned
int
stride2
;
// Atomic attributes stride x 2
unsigned
int
stride3
;
// Atomic attributes stride x 3
unsigned
int
stride4
;
// Atomic attributes stride x 4
unsigned
int
nonbondOutputBuffers
;
// Nonbond output buffers per nonbond call
unsigned
int
outputBuffers
;
// Number of output buffers
unsigned
int
energyOutputBuffers
;
// Number of energy output buffers
float
bigFloat
;
// Floating point value used as a flag for Shaken atoms
float
epsfac
;
// Epsilon factor for CDLJ calculations
CudaNonbondedMethod
nonbondedMethod
;
// How to handle nonbonded interactions
CudaNonbondedMethod
customNonbondedMethod
;
// How to handle custom nonbonded interactions
float
nonbondedCutoff
;
// Cutoff distance for nonbonded interactions
float
nonbondedCutoffSqr
;
// Square of the cutoff distance for nonbonded interactions
float
periodicBoxSizeX
;
// The X dimension of the periodic box
float
periodicBoxSizeY
;
// The Y dimension of the periodic box
float
periodicBoxSizeZ
;
// The Z dimension of the periodic box
float
invPeriodicBoxSizeX
;
// The 1 over the X dimension of the periodic box
float
invPeriodicBoxSizeY
;
// The 1 over the Y dimension of the periodic box
float
invPeriodicBoxSizeZ
;
// The 1 over the Z dimension of the periodic box
float
recipBoxSizeX
;
// The X dimension of the reciprocal box for Ewald summation
float
recipBoxSizeY
;
// The Y dimension of the reciprocal box for Ewald summation
float
recipBoxSizeZ
;
// The Z dimension of the reciprocal box for Ewald summation
float
cellVolume
;
// Ewald parameter alpha (a.k.a. kappa)
float
alphaEwald
;
// Ewald parameter alpha (a.k.a. kappa)
float
factorEwald
;
// - 1 ( 4 * alphaEwald * alphaEwald)
int
kmaxX
;
// Maximum number of reciprocal vectors in the X direction
int
kmaxY
;
// Maximum number of reciprocal vectors in the Y direction
int
kmaxZ
;
// Maximum number of reciprocal vectors in the Z direction
float
reactionFieldK
;
// Constant for reaction field correction
float
reactionFieldC
;
// Constant for reaction field correction
float
probeRadius
;
// SASA probe radius
float
surfaceAreaFactor
;
// ACE approximation surface area factor
float
electricConstant
;
// ACE approximation electric constant
float
forceConversionFactor
;
// kJ to kcal force conversion factor
float
preFactor
;
// Born electrostatic pre-factor
float
dielectricOffset
;
// Born dielectric offset
float
alphaOBC
;
// OBC alpha factor
float
betaOBC
;
// OBC beta factor
float
gammaOBC
;
// OBC gamma factor
float
deltaT
;
// Molecular dynamics deltaT constant
float
oneOverDeltaT
;
// 1/deltaT
float
T
;
// Temperature
float
kT
;
// Boltzmann's constant times T
float
noiseAmplitude
;
// The magnitude of the noise for Brownian dynamics
float
tau
;
// Inverse friction for Langevin or Brownian dynamics
float
tauDeltaT
;
// tau*deltaT
float
collisionFrequency
;
// Collision frequency for Andersen thermostat
float2
*
pObcData
;
// Pointer to fixed Born data
float4
*
pGBVIData
;
// Pointer to fixed Born data for GB/VI algorithm
float2
*
pAttr
;
// Pointer to additional atom attributes (sig, eps)
float4
*
pCustomParams
;
// Pointer to atom parameters for custom nonbonded force
unsigned
int
customExceptions
;
// Number of custom nonbonded exceptions
unsigned
int
customParameters
;
// Number of parameters for custom nonbonded interactions
int4
*
pCustomBondID
;
// Atom indices for custom bonds
float4
*
pCustomBondParams
;
// Parameters for custom bonds
unsigned
int
customBonds
;
// Number of custom bonds
unsigned
int
customBondParameters
;
// Number of parameters for custom bonds
int4
*
pCustomAngleID1
;
// Atom indices for custom angles
int2
*
pCustomAngleID2
;
// Atom indices for custom angles
float4
*
pCustomAngleParams
;
// Parameters for custom angles
unsigned
int
customAngles
;
// Number of custom angles
unsigned
int
customAngleParameters
;
// Number of parameters for custom angles
int4
*
pCustomTorsionID1
;
// Atom indices for custom torsions
int4
*
pCustomTorsionID2
;
// Atom indices for custom torsions
float4
*
pCustomTorsionParams
;
// Parameters for custom torsions
unsigned
int
customTorsions
;
// Number of custom torsions
unsigned
int
customTorsionParameters
;
// Number of parameters for custom torsions
int
*
pCustomExternalID
;
// Atom indices for custom external force
float4
*
pCustomExternalParams
;
// Parameters for custom external force
unsigned
int
customExternals
;
// Number of particles for custom external force
unsigned
int
customExternalParameters
;
// Number of parameters for custom external force
float4
*
pTabulatedFunctionCoefficients
[
MAX_TABULATED_FUNCTIONS
];
// The spline coefficients for each tabulated function
float4
*
pTabulatedFunctionParams
;
// The min, max, and spacing for each tabulated function
float2
*
pEwaldCosSinSum
;
// Pointer to the cos/sin sums (ewald)
float
*
pTabulatedErfc
;
// Tabulated values for erfc()
int
tabulatedErfcSize
;
// The number of tabulated values for erfc()
float
tabulatedErfcScale
;
// Scale factor for the argument to erfc()
int3
pmeGridSize
;
// The dimensions of the grid for particle mesh Ewald
int3
pmeGroupSize
;
// The dimensions of the groups used in charge spreading for PME
cufftComplex
*
pPmeGrid
;
// Grid points for particle mesh Ewald
float
*
pPmeBsplineModuli
[
3
];
float4
*
pPmeBsplineTheta
;
float4
*
pPmeBsplineDtheta
;
int
*
pPmeAtomRange
;
// The range of sorted atoms at each grid point
int2
*
pPmeAtomGridIndex
;
// The grid point each atom is at
unsigned
int
bonds
;
// Number of bonds
int4
*
pBondID
;
// Bond atom and output buffer IDs
float2
*
pBondParameter
;
// Bond parameters
unsigned
int
bond_angles
;
// Number of bond angles
int4
*
pBondAngleID1
;
// Bond angle atom and first output buffer IDs
int2
*
pBondAngleID2
;
// Bond angle output buffer IDs
float2
*
pBondAngleParameter
;
// Bond angle parameters
unsigned
int
dihedrals
;
// Number of dihedrals
int4
*
pDihedralID1
;
// Dihedral IDs
int4
*
pDihedralID2
;
// Dihedral output buffer IDs
float4
*
pDihedralParameter
;
// Dihedral parameters
unsigned
int
rb_dihedrals
;
// Number of Ryckaert Bellemans dihedrals
int4
*
pRbDihedralID1
;
// Ryckaert Bellemans Dihedral IDs
int4
*
pRbDihedralID2
;
// Ryckaert Bellemans Dihedral output buffer IDs
float4
*
pRbDihedralParameter1
;
// Ryckaert Bellemans Dihedral parameters
float2
*
pRbDihedralParameter2
;
// Ryckaert Bellemans Dihedral parameters
unsigned
int
LJ14s
;
// Number of Lennard Jones 1-4 interactions
int4
*
pLJ14ID
;
// Lennard Jones 1-4 atom and output buffer IDs
float4
*
pLJ14Parameter
;
// Lennard Jones 1-4 parameters
float
inverseTotalMass
;
// Used in linear momentum removal
unsigned
int
ShakeConstraints
;
// Total number of Shake constraints
unsigned
int
settleConstraints
;
// Total number of Settle constraints
unsigned
int
ccmaConstraints
;
// Total number of CCMA constraints.
unsigned
int
rigidClusters
;
// Total number of rigid clusters
unsigned
int
maxRigidClusterSize
;
// The size of the largest rigid cluster
unsigned
int
clusterShakeBlockSize
;
// The number of threads to process each rigid cluster
unsigned
int
maxShakeIterations
;
// Maximum shake iterations
unsigned
int
degreesOfFreedom
;
// Number of degrees of freedom in system
float
shakeTolerance
;
// Shake tolerance
float
InvMassJ
;
// Shake inverse mass for hydrogens
int
*
pNonShakeID
;
// Not Shaking atoms
int4
*
pShakeID
;
// Shake atoms and phase
float4
*
pShakeParameter
;
// Shake parameters
int4
*
pSettleID
;
// Settle atoms
float2
*
pSettleParameter
;
// Settle parameters
unsigned
int
*
pExclusion
;
// Nonbond exclusion data
unsigned
int
*
pExclusionIndex
;
// Index of exclusion data for each work unit
unsigned
int
bond_offset
;
// Offset to end of bonds
unsigned
int
bond_angle_offset
;
// Offset to end of bond angles
unsigned
int
dihedral_offset
;
// Offset to end of dihedrals
unsigned
int
rb_dihedral_offset
;
// Offset to end of Ryckaert Bellemans dihedrals
unsigned
int
LJ14_offset
;
// Offset to end of Lennard Jones 1-4 parameters
int
*
pAtomIndex
;
// The original index of each atom
float4
*
pGridBoundingBox
;
// The size of each grid cell
float4
*
pGridCenter
;
// The center of each grid cell
int2
*
pCcmaAtoms
;
// The atoms connected by each CCMA constraint
float4
*
pCcmaDistance
;
// The displacement vector (x, y, z) and constraint distance (w) for each CCMA constraint
float
*
pCcmaDelta1
;
// Workspace for CCMA
float
*
pCcmaDelta2
;
// Workspace for CCMA
int
*
pCcmaAtomConstraints
;
// The indices of constraints involving each atom
int
*
pCcmaNumAtomConstraints
;
// The number of constraints involving each atom
int
*
ccmaConvergedDeviceMarker
;
// Device memory used to communicate that CCMA has converged
float
*
pCcmaReducedMass
;
// The reduced mass for each CCMA constraint
unsigned
int
*
pConstraintMatrixColumn
;
// The column of each element in the constraint matrix.
float
*
pConstraintMatrixValue
;
// The value of each element in the constraint matrix.
// Mutable stuff
float4
*
pPosq
;
// Pointer to atom positions and charges
float4
*
pPosqP
;
// Pointer to mid-integration atom positions
float4
*
pOldPosq
;
// Pointer to old atom positions
float4
*
pVelm4
;
// Pointer to atom velocity and inverse mass
float4
*
pForce4
;
// Pointer to force data
float
*
pEnergy
;
// Pointer to energy output buffer
float
*
pBornForce
;
// Pointer to Born force data
float
*
pBornSum
;
// Pointer to Born Radii calculation output buffers
float
*
pBornRadii
;
// Pointer to Born Radii
float
*
pObcChain
;
// Pointer to OBC chain data
float4
*
pLinearMomentum
;
// Pointer to linear momentum
// Random numbers
float4
*
pRandom4
;
// Pointer to 4 random numbers
float2
*
pRandom2
;
// Pointer to 2 random numbers
uint4
*
pRandomSeed
;
// Pointer to random seeds
int
*
pRandomPosition
;
// Pointer to random number positions
unsigned
int
randoms
;
// Number of randoms
unsigned
int
totalRandoms
;
// Number of randoms plus overflow.
unsigned
int
randomIterations
;
// Number of iterations before regenerating randoms
unsigned
int
randomFrames
;
// Number of frames of random numbers
};
struct
Vectors
{
float3
v0
;
float3
v1
;
float3
v2
;
};
#endif
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
a2674e83
/* -------------------------------------------------------------------------- *
Vim:
Warning:
Output
is
not
to
a
terminal
* OpenMM *
[
?
1049h
[
?
1h
=
[
1
;
59
r
[
?
12
;
25h
[
?
12l
[
?
25h
[
27
m
[
m
[
H
[
2
J
[
?
25l
[
59
;
1
H
"svn-commit.tmp"
15L
,
601
C
[
1
;
1
H
[
33
m
1
* -------------------------------------------------------------------------- *
2
[
m
[
32
m
--
This
line
,
and
those
below
,
will
be
ignored
--
[
m
* This is part of the OpenMM molecular simulation toolkit originating from *
[
33
m
3
* Simbios, the NIH National Center for Physics-Based Simulation of *
4
[
m
[
1
m
[
35
mM
plugins
/
amoeba
/
platforms
/
cuda
/
src
/
AmoebaCudaKernelFactory
.
cpp
[
m
* Biological Structures at Stanford, funded under the NIH Roadmap for *
[
33
m
5
[
m
[
1
m
[
35
mM
plugins
/
freeEnergy
/
platforms
/
reference
/
src
/
gbsa
/
CpuGBVISoftcore
.
cpp
[
m
* Medical Research, grant U54 GM072970. See https://simtk.org. *
[
33
m
6
[
m
[
1
m
[
35
mM
openmmapi
/
include
/
openmm
/
GBVIForce
.
h
[
m
* *
[
33
m
7
[
m
[
1
m
[
35
mM
openmmapi
/
src
/
GBVIForce
.
cpp
[
m
* Portions copyright (c) 2009 Stanford University and the Authors. *
[
33
m
8
[
m
[
1
m
[
35
mM
olla
/
src
/
Platform
.
cpp
[
m
* Authors: Scott Le Grand, Peter Eastman *
[
33
m
9
[
m
[
1
m
[
35
mM
platforms
/
opencl
/
src
/
OpenCLContext
.
h
[
m
* Contributors: *
[
33
m
10
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
CudaKernels
.
cpp
[
m
* *
[
33
m
11
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kCalculateGBVIBornSum
.
cu
[
m
* This program is free software: you can redistribute it and/or modify *
[
33
m
12
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gputypes
.
h
[
m
* it under the terms of the GNU Lesser General Public License as published *
[
33
m
13
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
cudatypes
.
h
[
m
* by the Free Software Foundation, either version 3 of the License, or *
[
33
m
14
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kForces
.
cu
[
m
* (at your option) any later version. *
[
33
m
15
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gpu
.
cpp
[
m
* *
[
1
m
[
34
m
~
[
17
;
1
H
~
[
18
;
1
H
~
[
19
;
1
H
~
[
20
;
1
H
~
[
21
;
1
H
~
[
22
;
1
H
~
[
23
;
1
H
~
[
24
;
1
H
~
[
25
;
1
H
~
[
26
;
1
H
~
[
27
;
1
H
~
[
28
;
1
H
~
[
29
;
1
H
~
[
30
;
1
H
~
[
31
;
1
H
~
[
32
;
1
H
~
[
33
;
1
H
~
[
34
;
1
H
~
[
35
;
1
H
~
[
36
;
1
H
~
[
37
;
1
H
~
[
38
;
1
H
~
[
39
;
1
H
~
[
40
;
1
H
~
[
41
;
1
H
~
[
42
;
1
H
~
[
43
;
1
H
~
[
44
;
1
H
~
[
45
;
1
H
~
[
46
;
1
H
~
[
47
;
1
H
~
[
48
;
1
H
~
[
49
;
1
H
~
[
50
;
1
H
~
[
51
;
1
H
~
[
52
;
1
H
~
[
53
;
1
H
~
[
54
;
1
H
~
[
55
;
1
H
~
[
56
;
1
H
~
[
57
;
1
H
~
[
58
;
1
H
~
[
m
[
59
;
217
H1
,
0
-
1
[
9
CAll
[
1
;
5
H
[
?
12l
[
?
25h
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
:
[
?
12l
[
?
25h
q
!
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
[
?
1l
>
[
?
12l
[
?
25h
[
?
1049l
* This program is distributed in the hope that it will be useful, *
Log
message
unchanged
or
not
specified
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
a
)
bort
,
c
)
ontinue
,
e
)
dit
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <string.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#include <cmath>
#include <map>
#include <set>
#include <algorithm>
#ifdef WIN32
#define _USE_MATH_DEFINES
/* M_PI */
#include <math.h>
#include <windows.h>
#else
#include <stdint.h>
#endif
using
namespace
std
;
#include "gputypes.h"
#include "cudaKernels.h"
#include "hilbert.h"
#include "openmm/OpenMMException.h"
#include "openmm/internal/SplineFitter.h"
#include "quern.h"
#include "Lepton.h"
#include "rng.h"
#include "../CudaForceInfo.h"
// In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined.
#include "openmm/internal/MSVC_erfc.h"
#include "openmm/internal/windowsExport.h"
using
OpenMM
::
OpenMMException
;
using
Lepton
::
Operation
;
struct
ShakeCluster
{
int
centralID
;
int
peripheralID
[
3
];
int
size
;
bool
valid
;
float
distance
;
float
centralInvMass
,
peripheralInvMass
;
ShakeCluster
()
:
valid
(
true
)
{
}
ShakeCluster
(
int
centralID
,
float
invMass
)
:
centralID
(
centralID
),
centralInvMass
(
invMass
),
size
(
0
),
valid
(
true
)
{
}
void
addAtom
(
int
id
,
float
dist
,
float
invMass
)
{
if
(
size
==
3
||
(
size
>
0
&&
dist
!=
distance
)
||
(
size
>
0
&&
invMass
!=
peripheralInvMass
))
valid
=
false
;
else
{
peripheralID
[
size
++
]
=
id
;
distance
=
dist
;
peripheralInvMass
=
invMass
;
}
}
};
struct
Constraint
{
Constraint
(
int
atom1
,
int
atom2
,
float
distance2
)
:
atom1
(
atom1
),
atom2
(
atom2
),
distance2
(
distance2
)
{
}
int
atom1
,
atom2
;
float
distance2
;
};
struct
ConstraintOrderer
:
public
binary_function
<
int
,
int
,
bool
>
{
const
vector
<
int
>&
atom1
;
const
vector
<
int
>&
atom2
;
ConstraintOrderer
(
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
)
:
atom1
(
atom1
),
atom2
(
atom2
)
{
}
bool
operator
()(
int
x
,
int
y
)
{
if
(
atom1
[
x
]
!=
atom1
[
y
])
return
atom1
[
x
]
<
atom1
[
y
];
return
atom2
[
x
]
<
atom2
[
y
];
}
};
struct
Molecule
{
vector
<
int
>
atoms
;
vector
<
int
>
constraints
;
vector
<
vector
<
int
>
>
groups
;
};
static
const
float
dielectricOffset
=
0.009
f
;
static
const
float
probeRadius
=
0.14
f
;
static
const
float
forceConversionFactor
=
0.4184
f
;
//static const float surfaceAreaFactor = -6.0f * 0.06786f * forceConversionFactor * 1000.0f; // PI * 4.0f * 0.0049f * 1000.0f;
//static const float surfaceAreaFactor = -6.0f * PI * 4.0f * 0.0049f * 1000.0f;
static
const
float
surfaceAreaFactor
=
-
6.0
f
*
PI
*
0.0216
f
*
1000.0
f
*
0.4184
f
;
//static const float surfaceAreaFactor = -1.7035573959e+001;
//static const float surfaceAreaFactor = -166.03185f;
//static const float surfaceAreaFactor = 1.0f;
static
const
float
alphaOBC
=
1.0
f
;
static
const
float
betaOBC
=
0.8
f
;
static
const
float
gammaOBC
=
4.85
f
;
static
const
float
kcalMolTokJNM
=
-
0.4184
f
;
static
const
float
electricConstant
=
-
166.03185
f
;
static
const
float
defaultInnerDielectric
=
1.0
f
;
static
const
float
defaultSolventDielectric
=
78.3
f
;
static
const
float
KILO
=
1e3
;
// Thousand
static
const
float
BOLTZMANN
=
1.380658e-23
f
;
// (J/K)
static
const
float
AVOGADRO
=
6.0221367e23
f
;
// ()
static
const
float
RGAS
=
BOLTZMANN
*
AVOGADRO
;
// (J/(mol K))
static
const
float
BOLTZ
=
(
RGAS
/
KILO
);
// (kJ/(mol K))
#define DUMP_PARAMETERS 0
template
<
int
SIZE
>
static
Expression
<
SIZE
>
createExpression
(
gpuContext
gpu
,
const
string
&
expression
,
const
Lepton
::
ExpressionProgram
&
program
,
const
vector
<
string
>&
variables
,
const
vector
<
string
>&
globalParamNames
,
unsigned
int
&
maxStackSize
)
{
Expression
<
SIZE
>
exp
;
if
(
program
.
getNumOperations
()
>
SIZE
)
throw
OpenMMException
(
"Expression contains too many operations: "
+
expression
);
exp
.
length
=
program
.
getNumOperations
();
exp
.
stackSize
=
program
.
getStackSize
();
if
(
exp
.
stackSize
>
(
int
)
maxStackSize
)
maxStackSize
=
exp
.
stackSize
;
for
(
int
i
=
0
;
i
<
program
.
getNumOperations
();
i
++
)
{
const
Operation
&
op
=
program
.
getOperation
(
i
);
switch
(
op
.
getId
())
{
case
Operation
::
CONSTANT
:
exp
.
op
[
i
]
=
CONSTANT
;
exp
.
arg
[
i
]
=
(
float
)
dynamic_cast
<
const
Operation
::
Constant
*>
(
&
op
)
->
getValue
();
break
;
case
Operation
::
VARIABLE
:
if
(
variables
.
size
()
>
0
&&
op
.
getName
()
==
variables
[
0
])
exp
.
op
[
i
]
=
VARIABLE0
;
else
if
(
variables
.
size
()
>
1
&&
op
.
getName
()
==
variables
[
1
])
exp
.
op
[
i
]
=
VARIABLE1
;
else
if
(
variables
.
size
()
>
2
&&
op
.
getName
()
==
variables
[
2
])
exp
.
op
[
i
]
=
VARIABLE2
;
else
if
(
variables
.
size
()
>
3
&&
op
.
getName
()
==
variables
[
3
])
exp
.
op
[
i
]
=
VARIABLE3
;
else
if
(
variables
.
size
()
>
4
&&
op
.
getName
()
==
variables
[
4
])
exp
.
op
[
i
]
=
VARIABLE4
;
else
if
(
variables
.
size
()
>
5
&&
op
.
getName
()
==
variables
[
5
])
exp
.
op
[
i
]
=
VARIABLE5
;
else
if
(
variables
.
size
()
>
6
&&
op
.
getName
()
==
variables
[
6
])
exp
.
op
[
i
]
=
VARIABLE6
;
else
if
(
variables
.
size
()
>
7
&&
op
.
getName
()
==
variables
[
7
])
exp
.
op
[
i
]
=
VARIABLE7
;
else
if
(
variables
.
size
()
>
8
&&
op
.
getName
()
==
variables
[
8
])
exp
.
op
[
i
]
=
VARIABLE8
;
else
{
int
j
;
for
(
j
=
0
;
j
<
(
int
)
globalParamNames
.
size
()
&&
op
.
getName
()
!=
globalParamNames
[
j
];
j
++
);
if
(
j
==
globalParamNames
.
size
())
throw
OpenMMException
(
"Unknown variable '"
+
op
.
getName
()
+
"' in expression: "
+
expression
);
exp
.
op
[
i
]
=
GLOBAL
;
exp
.
arg
[
i
]
=
(
float
)
j
;
}
break
;
case
Operation
::
CUSTOM
:
exp
.
op
[
i
]
=
dynamic_cast
<
const
Operation
::
Custom
*>
(
&
op
)
->
getDerivOrder
()[
0
]
==
0
?
CUSTOM
:
CUSTOM_DERIV
;
for
(
int
j
=
0
;
j
<
MAX_TABULATED_FUNCTIONS
;
j
++
)
if
(
op
.
getName
()
==
gpu
->
tabulatedFunctions
[
j
].
name
)
{
exp
.
arg
[
i
]
=
(
float
)
j
;
break
;
}
break
;
case
Operation
::
ADD
:
exp
.
op
[
i
]
=
ADD
;
break
;
case
Operation
::
SUBTRACT
:
exp
.
op
[
i
]
=
SUBTRACT
;
break
;
case
Operation
::
MULTIPLY
:
exp
.
op
[
i
]
=
MULTIPLY
;
break
;
case
Operation
::
DIVIDE
:
exp
.
op
[
i
]
=
DIVIDE
;
break
;
case
Operation
::
POWER
:
exp
.
op
[
i
]
=
POWER
;
break
;
case
Operation
::
NEGATE
:
exp
.
op
[
i
]
=
NEGATE
;
break
;
case
Operation
::
SQRT
:
exp
.
op
[
i
]
=
SQRT
;
break
;
case
Operation
::
EXP
:
exp
.
op
[
i
]
=
EXP
;
break
;
case
Operation
::
LOG
:
exp
.
op
[
i
]
=
LOG
;
break
;
case
Operation
::
SIN
:
exp
.
op
[
i
]
=
SIN
;
break
;
case
Operation
::
COS
:
exp
.
op
[
i
]
=
COS
;
break
;
case
Operation
::
SEC
:
exp
.
op
[
i
]
=
SEC
;
break
;
case
Operation
::
CSC
:
exp
.
op
[
i
]
=
CSC
;
break
;
case
Operation
::
TAN
:
exp
.
op
[
i
]
=
TAN
;
break
;
case
Operation
::
COT
:
exp
.
op
[
i
]
=
COT
;
break
;
case
Operation
::
ASIN
:
exp
.
op
[
i
]
=
ASIN
;
break
;
case
Operation
::
ACOS
:
exp
.
op
[
i
]
=
ACOS
;
break
;
case
Operation
::
ATAN
:
exp
.
op
[
i
]
=
ATAN
;
break
;
case
Operation
::
SINH
:
exp
.
op
[
i
]
=
SINH
;
break
;
case
Operation
::
COSH
:
exp
.
op
[
i
]
=
COSH
;
break
;
case
Operation
::
TANH
:
exp
.
op
[
i
]
=
TANH
;
break
;
case
Operation
::
ERF
:
exp
.
op
[
i
]
=
ERF
;
break
;
case
Operation
::
ERFC
:
exp
.
op
[
i
]
=
ERFC
;
break
;
case
Operation
::
STEP
:
exp
.
op
[
i
]
=
STEP
;
break
;
case
Operation
::
SQUARE
:
exp
.
op
[
i
]
=
SQUARE
;
break
;
case
Operation
::
CUBE
:
exp
.
op
[
i
]
=
CUBE
;
break
;
case
Operation
::
RECIPROCAL
:
exp
.
op
[
i
]
=
RECIPROCAL
;
break
;
case
Operation
::
ADD_CONSTANT
:
exp
.
op
[
i
]
=
ADD_CONSTANT
;
exp
.
arg
[
i
]
=
(
float
)
dynamic_cast
<
const
Operation
::
AddConstant
*>
(
&
op
)
->
getValue
();
break
;
case
Operation
::
MULTIPLY_CONSTANT
:
exp
.
op
[
i
]
=
MULTIPLY_CONSTANT
;
exp
.
arg
[
i
]
=
(
float
)
dynamic_cast
<
const
Operation
::
MultiplyConstant
*>
(
&
op
)
->
getValue
();
break
;
case
Operation
::
POWER_CONSTANT
:
exp
.
op
[
i
]
=
POWER_CONSTANT
;
exp
.
arg
[
i
]
=
(
float
)
dynamic_cast
<
const
Operation
::
PowerConstant
*>
(
&
op
)
->
getValue
();
break
;
case
Operation
::
MIN
:
exp
.
op
[
i
]
=
MIN
;
break
;
case
Operation
::
MAX
:
exp
.
op
[
i
]
=
MAX
;
break
;
case
Operation
::
ABS
:
exp
.
op
[
i
]
=
ABS
;
break
;
}
}
return
exp
;
}
extern
"C"
void
gpuSetBondParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
float
>&
length
,
const
vector
<
float
>&
k
)
{
int
bonds
=
atom1
.
size
();
gpu
->
sim
.
bonds
=
bonds
;
CUDAStream
<
int4
>*
psBondID
=
new
CUDAStream
<
int4
>
(
bonds
,
1
,
"BondID"
);
gpu
->
psBondID
=
psBondID
;
gpu
->
sim
.
pBondID
=
psBondID
->
_pDevStream
[
0
];
CUDAStream
<
float2
>*
psBondParameter
=
new
CUDAStream
<
float2
>
(
bonds
,
1
,
"BondParameter"
);
gpu
->
psBondParameter
=
psBondParameter
;
gpu
->
sim
.
pBondParameter
=
psBondParameter
->
_pDevStream
[
0
];
for
(
int
i
=
0
;
i
<
bonds
;
i
++
)
{
(
*
psBondID
)[
i
].
x
=
atom1
[
i
];
(
*
psBondID
)[
i
].
y
=
atom2
[
i
];
(
*
psBondParameter
)[
i
].
x
=
length
[
i
];
(
*
psBondParameter
)[
i
].
y
=
k
[
i
];
psBondID
->
_pSysData
[
i
].
z
=
gpu
->
pOutputBufferCounter
[
psBondID
->
_pSysData
[
i
].
x
]
++
;
psBondID
->
_pSysData
[
i
].
w
=
gpu
->
pOutputBufferCounter
[
psBondID
->
_pSysData
[
i
].
y
]
++
;
#if (DUMP_PARAMETERS == 1)
cout
<<
i
<<
" "
<<
(
*
psBondID
)[
i
].
x
<<
" "
<<
(
*
psBondID
)[
i
].
y
<<
" "
<<
(
*
psBondID
)[
i
].
z
<<
" "
<<
(
*
psBondID
)[
i
].
w
<<
" "
<<
(
*
psBondParameter
)[
i
].
x
<<
" "
<<
(
*
psBondParameter
)[
i
].
y
<<
endl
;
#endif
}
psBondID
->
Upload
();
psBondParameter
->
Upload
();
}
extern
"C"
void
gpuSetBondAngleParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
int
>&
atom3
,
const
vector
<
float
>&
angle
,
const
vector
<
float
>&
k
)
{
int
bond_angles
=
atom1
.
size
();
gpu
->
sim
.
bond_angles
=
bond_angles
;
CUDAStream
<
int4
>*
psBondAngleID1
=
new
CUDAStream
<
int4
>
(
bond_angles
,
1
,
"BondAngleID1"
);
gpu
->
psBondAngleID1
=
psBondAngleID1
;
gpu
->
sim
.
pBondAngleID1
=
psBondAngleID1
->
_pDevStream
[
0
];
CUDAStream
<
int2
>*
psBondAngleID2
=
new
CUDAStream
<
int2
>
(
bond_angles
,
1
,
"BondAngleID2"
);
gpu
->
psBondAngleID2
=
psBondAngleID2
;
gpu
->
sim
.
pBondAngleID2
=
psBondAngleID2
->
_pDevStream
[
0
];
CUDAStream
<
float2
>*
psBondAngleParameter
=
new
CUDAStream
<
float2
>
(
bond_angles
,
1
,
"BondAngleParameter"
);
gpu
->
psBondAngleParameter
=
psBondAngleParameter
;
gpu
->
sim
.
pBondAngleParameter
=
psBondAngleParameter
->
_pDevStream
[
0
];
for
(
int
i
=
0
;
i
<
bond_angles
;
i
++
)
{
(
*
psBondAngleID1
)[
i
].
x
=
atom1
[
i
];
(
*
psBondAngleID1
)[
i
].
y
=
atom2
[
i
];
(
*
psBondAngleID1
)[
i
].
z
=
atom3
[
i
];
(
*
psBondAngleParameter
)[
i
].
x
=
angle
[
i
];
(
*
psBondAngleParameter
)[
i
].
y
=
k
[
i
];
psBondAngleID1
->
_pSysData
[
i
].
w
=
gpu
->
pOutputBufferCounter
[
psBondAngleID1
->
_pSysData
[
i
].
x
]
++
;
psBondAngleID2
->
_pSysData
[
i
].
x
=
gpu
->
pOutputBufferCounter
[
psBondAngleID1
->
_pSysData
[
i
].
y
]
++
;
psBondAngleID2
->
_pSysData
[
i
].
y
=
gpu
->
pOutputBufferCounter
[
psBondAngleID1
->
_pSysData
[
i
].
z
]
++
;
#if (DUMP_PARAMETERS == 1)
cout
<<
i
<<
" "
<<
(
*
psBondAngleID1
)[
i
].
x
<<
" "
<<
(
*
psBondAngleID1
)[
i
].
y
<<
" "
<<
(
*
psBondAngleID1
)[
i
].
z
<<
" "
<<
(
*
psBondAngleID1
)[
i
].
w
<<
" "
<<
(
*
psBondAngleID2
)[
i
].
x
<<
" "
<<
(
*
psBondAngleID2
)[
i
].
y
<<
" "
<<
(
*
psBondAngleParameter
)[
i
].
x
<<
" "
<<
(
*
psBondAngleParameter
)[
i
].
y
<<
endl
;
#endif
}
psBondAngleID1
->
Upload
();
psBondAngleID2
->
Upload
();
psBondAngleParameter
->
Upload
();
}
extern
"C"
void
gpuSetDihedralParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
int
>&
atom3
,
const
vector
<
int
>&
atom4
,
const
vector
<
float
>&
k
,
const
vector
<
float
>&
phase
,
const
vector
<
int
>&
periodicity
)
{
int
dihedrals
=
atom1
.
size
();
gpu
->
sim
.
dihedrals
=
dihedrals
;
CUDAStream
<
int4
>*
psDihedralID1
=
new
CUDAStream
<
int4
>
(
dihedrals
,
1
,
"DihedralID1"
);
gpu
->
psDihedralID1
=
psDihedralID1
;
gpu
->
sim
.
pDihedralID1
=
psDihedralID1
->
_pDevStream
[
0
];
CUDAStream
<
int4
>*
psDihedralID2
=
new
CUDAStream
<
int4
>
(
dihedrals
,
1
,
"DihedralID2"
);
gpu
->
psDihedralID2
=
psDihedralID2
;
gpu
->
sim
.
pDihedralID2
=
psDihedralID2
->
_pDevStream
[
0
];
CUDAStream
<
float4
>*
psDihedralParameter
=
new
CUDAStream
<
float4
>
(
dihedrals
,
1
,
"DihedralParameter"
);
gpu
->
psDihedralParameter
=
psDihedralParameter
;
gpu
->
sim
.
pDihedralParameter
=
psDihedralParameter
->
_pDevStream
[
0
];
for
(
int
i
=
0
;
i
<
dihedrals
;
i
++
)
{
(
*
psDihedralID1
)[
i
].
x
=
atom1
[
i
];
(
*
psDihedralID1
)[
i
].
y
=
atom2
[
i
];
(
*
psDihedralID1
)[
i
].
z
=
atom3
[
i
];
(
*
psDihedralID1
)[
i
].
w
=
atom4
[
i
];
(
*
psDihedralParameter
)[
i
].
x
=
k
[
i
];
(
*
psDihedralParameter
)[
i
].
y
=
phase
[
i
];
(
*
psDihedralParameter
)[
i
].
z
=
(
float
)
periodicity
[
i
];
psDihedralID2
->
_pSysData
[
i
].
x
=
gpu
->
pOutputBufferCounter
[
psDihedralID1
->
_pSysData
[
i
].
x
]
++
;
psDihedralID2
->
_pSysData
[
i
].
y
=
gpu
->
pOutputBufferCounter
[
psDihedralID1
->
_pSysData
[
i
].
y
]
++
;
psDihedralID2
->
_pSysData
[
i
].
z
=
gpu
->
pOutputBufferCounter
[
psDihedralID1
->
_pSysData
[
i
].
z
]
++
;
psDihedralID2
->
_pSysData
[
i
].
w
=
gpu
->
pOutputBufferCounter
[
psDihedralID1
->
_pSysData
[
i
].
w
]
++
;
#if (DUMP_PARAMETERS == 1)
cout
<<
i
<<
" "
<<
(
*
psDihedralID1
)[
i
].
x
<<
" "
<<
(
*
psDihedralID1
)[
i
].
y
<<
" "
<<
(
*
psDihedralID1
)[
i
].
z
<<
" "
<<
(
*
psDihedralID1
)[
i
].
w
<<
" "
<<
(
*
psDihedralID2
)[
i
].
x
<<
" "
<<
(
*
psDihedralID2
)[
i
].
y
<<
" "
<<
(
*
psDihedralID2
)[
i
].
z
<<
" "
<<
(
*
psDihedralID2
)[
i
].
w
<<
" "
<<
(
*
psDihedralParameter
)[
i
].
x
<<
" "
<<
(
*
psDihedralParameter
)[
i
].
y
<<
" "
<<
(
*
psDihedralParameter
)[
i
].
z
<<
endl
;
#endif
}
psDihedralID1
->
Upload
();
psDihedralID2
->
Upload
();
psDihedralParameter
->
Upload
();
}
extern
"C"
void
gpuSetRbDihedralParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
int
>&
atom3
,
const
vector
<
int
>&
atom4
,
const
vector
<
float
>&
c0
,
const
vector
<
float
>&
c1
,
const
vector
<
float
>&
c2
,
const
vector
<
float
>&
c3
,
const
vector
<
float
>&
c4
,
const
vector
<
float
>&
c5
)
{
int
rb_dihedrals
=
atom1
.
size
();
gpu
->
sim
.
rb_dihedrals
=
rb_dihedrals
;
CUDAStream
<
int4
>*
psRbDihedralID1
=
new
CUDAStream
<
int4
>
(
rb_dihedrals
,
1
,
"RbDihedralID1"
);
gpu
->
psRbDihedralID1
=
psRbDihedralID1
;
gpu
->
sim
.
pRbDihedralID1
=
psRbDihedralID1
->
_pDevStream
[
0
];
CUDAStream
<
int4
>*
psRbDihedralID2
=
new
CUDAStream
<
int4
>
(
rb_dihedrals
,
1
,
"RbDihedralID2"
);
gpu
->
psRbDihedralID2
=
psRbDihedralID2
;
gpu
->
sim
.
pRbDihedralID2
=
psRbDihedralID2
->
_pDevStream
[
0
];
CUDAStream
<
float4
>*
psRbDihedralParameter1
=
new
CUDAStream
<
float4
>
(
rb_dihedrals
,
1
,
"RbDihedralParameter1"
);
gpu
->
psRbDihedralParameter1
=
psRbDihedralParameter1
;
gpu
->
sim
.
pRbDihedralParameter1
=
psRbDihedralParameter1
->
_pDevStream
[
0
];
CUDAStream
<
float2
>*
psRbDihedralParameter2
=
new
CUDAStream
<
float2
>
(
rb_dihedrals
,
1
,
"RbDihedralParameter2"
);
gpu
->
psRbDihedralParameter2
=
psRbDihedralParameter2
;
gpu
->
sim
.
pRbDihedralParameter2
=
psRbDihedralParameter2
->
_pDevStream
[
0
];
for
(
int
i
=
0
;
i
<
rb_dihedrals
;
i
++
)
{
(
*
psRbDihedralID1
)[
i
].
x
=
atom1
[
i
];
(
*
psRbDihedralID1
)[
i
].
y
=
atom2
[
i
];
(
*
psRbDihedralID1
)[
i
].
z
=
atom3
[
i
];
(
*
psRbDihedralID1
)[
i
].
w
=
atom4
[
i
];
(
*
psRbDihedralParameter1
)[
i
].
x
=
c0
[
i
];
(
*
psRbDihedralParameter1
)[
i
].
y
=
c1
[
i
];
(
*
psRbDihedralParameter1
)[
i
].
z
=
c2
[
i
];
(
*
psRbDihedralParameter1
)[
i
].
w
=
c3
[
i
];
(
*
psRbDihedralParameter2
)[
i
].
x
=
c4
[
i
];
(
*
psRbDihedralParameter2
)[
i
].
y
=
c5
[
i
];
psRbDihedralID2
->
_pSysData
[
i
].
x
=
gpu
->
pOutputBufferCounter
[
psRbDihedralID1
->
_pSysData
[
i
].
x
]
++
;
psRbDihedralID2
->
_pSysData
[
i
].
y
=
gpu
->
pOutputBufferCounter
[
psRbDihedralID1
->
_pSysData
[
i
].
y
]
++
;
psRbDihedralID2
->
_pSysData
[
i
].
z
=
gpu
->
pOutputBufferCounter
[
psRbDihedralID1
->
_pSysData
[
i
].
z
]
++
;
psRbDihedralID2
->
_pSysData
[
i
].
w
=
gpu
->
pOutputBufferCounter
[
psRbDihedralID1
->
_pSysData
[
i
].
w
]
++
;
#if (DUMP_PARAMETERS == 1)
cout
<<
i
<<
" "
<<
(
*
psRbDihedralID1
)[
i
].
x
<<
" "
<<
(
*
psRbDihedralID1
)[
i
].
y
<<
" "
<<
(
*
psRbDihedralID1
)[
i
].
z
<<
" "
<<
(
*
psRbDihedralID1
)[
i
].
w
<<
" "
<<
(
*
psRbDihedralID2
)[
i
].
x
<<
" "
<<
(
*
psRbDihedralID2
)[
i
].
y
<<
" "
<<
(
*
psRbDihedralID2
)[
i
].
z
<<
" "
<<
(
*
psRbDihedralID2
)[
i
].
w
<<
" "
<<
(
*
psRbDihedralParameter1
)[
i
].
x
<<
" "
<<
(
*
psRbDihedralParameter1
)[
i
].
y
<<
" "
<<
(
*
psRbDihedralParameter1
)[
i
].
z
<<
" "
<<
(
*
psRbDihedralParameter1
)[
i
].
w
<<
" "
<<
(
*
psRbDihedralParameter2
)[
i
].
x
<<
" "
<<
(
*
psRbDihedralParameter2
)[
i
].
y
<<
endl
;
#endif
}
psRbDihedralID1
->
Upload
();
psRbDihedralID2
->
Upload
();
psRbDihedralParameter1
->
Upload
();
psRbDihedralParameter2
->
Upload
();
}
extern
"C"
void
gpuSetLJ14Parameters
(
gpuContext
gpu
,
float
epsfac
,
float
fudge
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
float
>&
c6
,
const
vector
<
float
>&
c12
,
const
vector
<
float
>&
q1
,
const
vector
<
float
>&
q2
)
{
int
LJ14s
=
atom1
.
size
();
float
scale
=
epsfac
*
fudge
;
gpu
->
sim
.
LJ14s
=
LJ14s
;
CUDAStream
<
int4
>*
psLJ14ID
=
new
CUDAStream
<
int4
>
(
LJ14s
,
1
,
"LJ14ID"
);
gpu
->
psLJ14ID
=
psLJ14ID
;
gpu
->
sim
.
pLJ14ID
=
psLJ14ID
->
_pDevStream
[
0
];
CUDAStream
<
float4
>*
psLJ14Parameter
=
new
CUDAStream
<
float4
>
(
LJ14s
,
1
,
"LJ14Parameter"
);
gpu
->
psLJ14Parameter
=
psLJ14Parameter
;
gpu
->
sim
.
pLJ14Parameter
=
psLJ14Parameter
->
_pDevStream
[
0
];
for
(
int
i
=
0
;
i
<
LJ14s
;
i
++
)
{
(
*
psLJ14ID
)[
i
].
x
=
atom1
[
i
];
(
*
psLJ14ID
)[
i
].
y
=
atom2
[
i
];
psLJ14ID
->
_pSysData
[
i
].
z
=
gpu
->
pOutputBufferCounter
[
psLJ14ID
->
_pSysData
[
i
].
x
]
++
;
psLJ14ID
->
_pSysData
[
i
].
w
=
gpu
->
pOutputBufferCounter
[
psLJ14ID
->
_pSysData
[
i
].
y
]
++
;
float
p0
,
p1
,
p2
;
if
(
c12
[
i
]
==
0.0
f
)
{
p0
=
0.0
f
;
p1
=
1.0
f
;
}
else
{
p0
=
c6
[
i
]
*
c6
[
i
]
/
c12
[
i
];
p1
=
pow
(
c12
[
i
]
/
c6
[
i
],
1.0
f
/
6.0
f
);
}
p2
=
scale
*
q1
[
i
]
*
q2
[
i
];
(
*
psLJ14Parameter
)[
i
].
x
=
p0
;
(
*
psLJ14Parameter
)[
i
].
y
=
p1
;
(
*
psLJ14Parameter
)[
i
].
z
=
p2
;
}
#if (DUMP_PARAMETERS == 1)
cout
<<
i
<<
" "
<<
(
*
psLJ14ID
)[
i
].
x
<<
" "
<<
(
*
psLJ14ID
)[
i
].
y
<<
" "
<<
(
*
psLJ14ID
)[
i
].
z
<<
" "
<<
(
*
psLJ14ID
)[
i
].
w
<<
" "
<<
(
*
psLJ14Parameter
)[
i
].
x
<<
" "
<<
(
*
psLJ14Parameter
)[
i
].
y
<<
" "
<<
(
*
psLJ14Parameter
)[
i
].
z
<<
" "
<<
p0
<<
" "
<<
p1
<<
" "
<<
p2
<<
" "
<<
endl
;
#endif
psLJ14ID
->
Upload
();
psLJ14Parameter
->
Upload
();
}
extern
"C"
void
setExclusions
(
gpuContext
gpu
,
const
vector
<
vector
<
int
>
>&
exclusions
)
{
if
(
gpu
->
exclusions
.
size
()
>
0
)
{
bool
ok
=
(
exclusions
.
size
()
==
gpu
->
exclusions
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
exclusions
.
size
()
&&
ok
;
i
++
)
{
if
(
exclusions
[
i
].
size
()
!=
gpu
->
exclusions
[
i
].
size
())
ok
=
false
;
else
{
for
(
int
j
=
0
;
j
<
(
int
)
exclusions
[
i
].
size
();
j
++
)
if
(
find
(
gpu
->
exclusions
[
i
].
begin
(),
gpu
->
exclusions
[
i
].
end
(),
exclusions
[
i
][
j
])
==
gpu
->
exclusions
[
i
].
end
())
ok
=
false
;
}
}
if
(
!
ok
)
throw
OpenMMException
(
"All nonbonded forces must have identical sets of exceptions"
);
}
gpu
->
exclusions
=
exclusions
;
}
extern
"C"
void
gpuSetCoulombParameters
(
gpuContext
gpu
,
float
epsfac
,
const
vector
<
int
>&
atom
,
const
vector
<
float
>&
c6
,
const
vector
<
float
>&
c12
,
const
vector
<
float
>&
q
,
const
vector
<
char
>&
symbol
,
const
vector
<
vector
<
int
>
>&
exclusions
,
CudaNonbondedMethod
method
)
{
unsigned
int
coulombs
=
c6
.
size
();
gpu
->
sim
.
epsfac
=
epsfac
;
gpu
->
sim
.
nonbondedMethod
=
method
;
if
(
coulombs
>
0
)
setExclusions
(
gpu
,
exclusions
);
for
(
unsigned
int
i
=
0
;
i
<
coulombs
;
i
++
)
{
float
p0
=
q
[
i
];
float
p1
=
0.5
f
,
p2
=
0.0
f
;
if
((
c6
[
i
]
>
0.0
f
)
&&
(
c12
[
i
]
>
0.0
f
))
{
p1
=
0.5
f
*
pow
(
c12
[
i
]
/
c6
[
i
],
1.0
f
/
6.0
f
);
p2
=
c6
[
i
]
*
sqrt
(
1.0
f
/
c12
[
i
]);
}
if
(
symbol
.
size
()
>
0
)
gpu
->
pAtomSymbol
[
i
]
=
symbol
[
i
];
(
*
gpu
->
psPosq4
)[
i
].
w
=
p0
;
(
*
gpu
->
psSigEps2
)[
i
].
x
=
p1
;
(
*
gpu
->
psSigEps2
)[
i
].
y
=
p2
;
}
// Dummy out extra atom data
for
(
unsigned
int
i
=
gpu
->
natoms
;
i
<
gpu
->
sim
.
paddedNumberOfAtoms
;
i
++
)
{
(
*
gpu
->
psPosq4
)[
i
].
x
=
100000.0
f
+
i
*
10.0
f
;
(
*
gpu
->
psPosq4
)[
i
].
y
=
100000.0
f
+
i
*
10.0
f
;
(
*
gpu
->
psPosq4
)[
i
].
z
=
100000.0
f
+
i
*
10.0
f
;
(
*
gpu
->
psPosq4
)[
i
].
w
=
0.0
f
;
(
*
gpu
->
psSigEps2
)[
i
].
x
=
0.0
f
;
(
*
gpu
->
psSigEps2
)[
i
].
y
=
0.0
f
;
}
gpu
->
psPosq4
->
Upload
();
gpu
->
psSigEps2
->
Upload
();
}
extern
"C"
void
gpuSetNonbondedCutoff
(
gpuContext
gpu
,
float
cutoffDistance
,
float
solventDielectric
)
{
if
(
gpu
->
sim
.
nonbondedCutoff
!=
0.0
f
&&
gpu
->
sim
.
nonbondedCutoff
!=
cutoffDistance
)
throw
OpenMMException
(
"All nonbonded forces must use the same cutoff"
);
gpu
->
sim
.
nonbondedCutoff
=
cutoffDistance
;
gpu
->
sim
.
nonbondedCutoffSqr
=
cutoffDistance
*
cutoffDistance
;
gpu
->
sim
.
reactionFieldK
=
pow
(
cutoffDistance
,
-
3.0
f
)
*
(
solventDielectric
-
1.0
f
)
/
(
2.0
f
*
solventDielectric
+
1.0
f
);
gpu
->
sim
.
reactionFieldC
=
(
1.0
f
/
cutoffDistance
)
*
(
3.0
f
*
solventDielectric
)
/
(
2.0
f
*
solventDielectric
+
1.0
f
);
}
extern
"C"
void
gpuSetTabulatedFunction
(
gpuContext
gpu
,
int
index
,
const
string
&
name
,
const
vector
<
double
>&
values
,
double
min
,
double
max
)
{
if
(
index
<
0
||
index
>=
MAX_TABULATED_FUNCTIONS
)
{
stringstream
str
;
str
<<
"Only "
<<
MAX_TABULATED_FUNCTIONS
<<
" tabulated functions are supported"
;
throw
OpenMMException
(
str
.
str
());
}
if
(
gpu
->
tabulatedFunctions
[
index
].
coefficients
!=
NULL
)
delete
gpu
->
tabulatedFunctions
[
index
].
coefficients
;
CUDAStream
<
float4
>*
coeff
=
new
CUDAStream
<
float4
>
((
int
)
values
.
size
()
-
1
,
1
,
"TabulatedFunction"
);
gpu
->
tabulatedFunctions
[
index
].
coefficients
=
coeff
;
gpu
->
sim
.
pTabulatedFunctionCoefficients
[
index
]
=
coeff
->
_pDevData
;
gpu
->
tabulatedFunctions
[
index
].
name
=
name
;
gpu
->
tabulatedFunctions
[
index
].
min
=
min
;
gpu
->
tabulatedFunctions
[
index
].
max
=
max
;
gpu
->
tabulatedFunctionsChanged
=
true
;
// Compute the spline coefficients.
int
numValues
=
values
.
size
();
vector
<
double
>
x
(
numValues
),
derivs
;
for
(
int
i
=
0
;
i
<
numValues
;
i
++
)
x
[
i
]
=
min
+
i
*
(
max
-
min
)
/
(
numValues
-
1
);
OpenMM
::
SplineFitter
::
createNaturalSpline
(
x
,
values
,
derivs
);
for
(
int
i
=
0
;
i
<
(
int
)
values
.
size
()
-
1
;
i
++
)
(
*
coeff
)[
i
]
=
make_float4
((
float
)
values
[
i
],
(
float
)
values
[
i
+
1
],
(
float
)
(
derivs
[
i
]
/
6.0
),
(
float
)
(
derivs
[
i
+
1
]
/
6.0
));
coeff
->
Upload
();
}
extern
"C"
void
gpuSetCustomBondParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
bondAtom1
,
const
vector
<
int
>&
bondAtom2
,
const
vector
<
vector
<
double
>
>&
bondParams
,
const
string
&
energyExp
,
const
vector
<
string
>&
paramNames
,
const
vector
<
string
>&
globalParamNames
)
{
if
(
paramNames
.
size
()
>
4
)
throw
OpenMMException
(
"CudaPlatform only supports four per-bond parameters for custom bond forces"
);
if
(
globalParamNames
.
size
()
>
8
)
throw
OpenMMException
(
"CudaPlatform only supports eight global parameters for custom bond forces"
);
if
(
gpu
->
psCustomBondID
!=
NULL
)
throw
OpenMMException
(
"CudaPlatform only supports a single CustomBondForce per System"
);
gpu
->
sim
.
customBonds
=
bondAtom1
.
size
();
gpu
->
sim
.
customBondParameters
=
paramNames
.
size
();
gpu
->
psCustomBondID
=
new
CUDAStream
<
int4
>
(
gpu
->
sim
.
customBonds
,
1
,
"CustomBondId"
);
gpu
->
sim
.
pCustomBondID
=
gpu
->
psCustomBondID
->
_pDevData
;
gpu
->
psCustomBondParams
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
customBonds
,
1
,
"CustomBondParams"
);
gpu
->
sim
.
pCustomBondParams
=
gpu
->
psCustomBondParams
->
_pDevData
;
vector
<
int
>
forceBufferCounter
(
gpu
->
natoms
,
0
);
for
(
int
i
=
0
;
i
<
(
int
)
bondAtom1
.
size
();
i
++
)
{
(
*
gpu
->
psCustomBondID
)[
i
].
x
=
bondAtom1
[
i
];
(
*
gpu
->
psCustomBondID
)[
i
].
y
=
bondAtom2
[
i
];
(
*
gpu
->
psCustomBondID
)[
i
].
z
=
forceBufferCounter
[
bondAtom1
[
i
]]
++
;
(
*
gpu
->
psCustomBondID
)[
i
].
w
=
forceBufferCounter
[
bondAtom2
[
i
]]
++
;
if
(
bondParams
[
i
].
size
()
>
0
)
(
*
gpu
->
psCustomBondParams
)[
i
].
x
=
(
float
)
bondParams
[
i
][
0
];
if
(
bondParams
[
i
].
size
()
>
1
)
(
*
gpu
->
psCustomBondParams
)[
i
].
y
=
(
float
)
bondParams
[
i
][
1
];
if
(
bondParams
[
i
].
size
()
>
2
)
(
*
gpu
->
psCustomBondParams
)[
i
].
z
=
(
float
)
bondParams
[
i
][
2
];
if
(
bondParams
[
i
].
size
()
>
3
)
(
*
gpu
->
psCustomBondParams
)[
i
].
w
=
(
float
)
bondParams
[
i
][
3
];
}
gpu
->
psCustomBondID
->
Upload
();
gpu
->
psCustomBondParams
->
Upload
();
for
(
int
i
=
0
;
i
<
(
int
)
forceBufferCounter
.
size
();
i
++
)
if
(
forceBufferCounter
[
i
]
>
(
int
)
gpu
->
pOutputBufferCounter
[
i
])
gpu
->
pOutputBufferCounter
[
i
]
=
forceBufferCounter
[
i
];
// Create the Expressions.
vector
<
string
>
variables
;
variables
.
push_back
(
"r"
);
for
(
int
i
=
0
;
i
<
(
int
)
paramNames
.
size
();
i
++
)
variables
.
push_back
(
paramNames
[
i
]);
SetCustomBondEnergyExpression
(
createExpression
<
256
>
(
gpu
,
energyExp
,
Lepton
::
Parser
::
parse
(
energyExp
).
optimize
().
createProgram
(),
variables
,
globalParamNames
,
gpu
->
sim
.
customExpressionStackSize
));
SetCustomBondForceExpression
(
createExpression
<
256
>
(
gpu
,
energyExp
,
Lepton
::
Parser
::
parse
(
energyExp
).
differentiate
(
"r"
).
optimize
().
createProgram
(),
variables
,
globalParamNames
,
gpu
->
sim
.
customExpressionStackSize
));
}
extern
"C"
void
gpuSetCustomAngleParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
angleAtom1
,
const
vector
<
int
>&
angleAtom2
,
const
vector
<
int
>&
angleAtom3
,
const
vector
<
vector
<
double
>
>&
angleParams
,
const
string
&
energyExp
,
const
vector
<
string
>&
paramNames
,
const
vector
<
string
>&
globalParamNames
)
{
if
(
paramNames
.
size
()
>
4
)
throw
OpenMMException
(
"CudaPlatform only supports four per-angle parameters for custom angle forces"
);
if
(
globalParamNames
.
size
()
>
8
)
throw
OpenMMException
(
"CudaPlatform only supports eight global parameters for custom angle forces"
);
if
(
gpu
->
psCustomAngleID1
!=
NULL
)
throw
OpenMMException
(
"CudaPlatform only supports a single CustomAngleForce per System"
);
gpu
->
sim
.
customAngles
=
angleAtom1
.
size
();
gpu
->
sim
.
customAngleParameters
=
paramNames
.
size
();
gpu
->
psCustomAngleID1
=
new
CUDAStream
<
int4
>
(
gpu
->
sim
.
customAngles
,
1
,
"CustomAngleId1"
);
gpu
->
sim
.
pCustomAngleID1
=
gpu
->
psCustomAngleID1
->
_pDevData
;
gpu
->
psCustomAngleID2
=
new
CUDAStream
<
int2
>
(
gpu
->
sim
.
customAngles
,
1
,
"CustomAngleId2"
);
gpu
->
sim
.
pCustomAngleID2
=
gpu
->
psCustomAngleID2
->
_pDevData
;
gpu
->
psCustomAngleParams
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
customAngles
,
1
,
"CustomAngleParams"
);
gpu
->
sim
.
pCustomAngleParams
=
gpu
->
psCustomAngleParams
->
_pDevData
;
vector
<
int
>
forceBufferCounter
(
gpu
->
natoms
,
0
);
for
(
int
i
=
0
;
i
<
(
int
)
angleAtom1
.
size
();
i
++
)
{
(
*
gpu
->
psCustomAngleID1
)[
i
].
x
=
angleAtom1
[
i
];
(
*
gpu
->
psCustomAngleID1
)[
i
].
y
=
angleAtom2
[
i
];
(
*
gpu
->
psCustomAngleID1
)[
i
].
z
=
angleAtom3
[
i
];
(
*
gpu
->
psCustomAngleID1
)[
i
].
w
=
forceBufferCounter
[
angleAtom1
[
i
]]
++
;
(
*
gpu
->
psCustomAngleID2
)[
i
].
x
=
forceBufferCounter
[
angleAtom2
[
i
]]
++
;
(
*
gpu
->
psCustomAngleID2
)[
i
].
y
=
forceBufferCounter
[
angleAtom3
[
i
]]
++
;
if
(
angleParams
[
i
].
size
()
>
0
)
(
*
gpu
->
psCustomAngleParams
)[
i
].
x
=
(
float
)
angleParams
[
i
][
0
];
if
(
angleParams
[
i
].
size
()
>
1
)
(
*
gpu
->
psCustomAngleParams
)[
i
].
y
=
(
float
)
angleParams
[
i
][
1
];
if
(
angleParams
[
i
].
size
()
>
2
)
(
*
gpu
->
psCustomAngleParams
)[
i
].
z
=
(
float
)
angleParams
[
i
][
2
];
if
(
angleParams
[
i
].
size
()
>
3
)
(
*
gpu
->
psCustomAngleParams
)[
i
].
w
=
(
float
)
angleParams
[
i
][
3
];
}
gpu
->
psCustomAngleID1
->
Upload
();
gpu
->
psCustomAngleID2
->
Upload
();
gpu
->
psCustomAngleParams
->
Upload
();
for
(
int
i
=
0
;
i
<
(
int
)
forceBufferCounter
.
size
();
i
++
)
if
(
forceBufferCounter
[
i
]
>
(
int
)
gpu
->
pOutputBufferCounter
[
i
])
gpu
->
pOutputBufferCounter
[
i
]
=
forceBufferCounter
[
i
];
// Create the Expressions.
vector
<
string
>
variables
;
variables
.
push_back
(
"theta"
);
for
(
int
i
=
0
;
i
<
(
int
)
paramNames
.
size
();
i
++
)
variables
.
push_back
(
paramNames
[
i
]);
SetCustomAngleEnergyExpression
(
createExpression
<
256
>
(
gpu
,
energyExp
,
Lepton
::
Parser
::
parse
(
energyExp
).
optimize
().
createProgram
(),
variables
,
globalParamNames
,
gpu
->
sim
.
customExpressionStackSize
));
SetCustomAngleForceExpression
(
createExpression
<
256
>
(
gpu
,
energyExp
,
Lepton
::
Parser
::
parse
(
energyExp
).
differentiate
(
"theta"
).
optimize
().
createProgram
(),
variables
,
globalParamNames
,
gpu
->
sim
.
customExpressionStackSize
));
}
extern
"C"
void
gpuSetCustomTorsionParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
torsionAtom1
,
const
vector
<
int
>&
torsionAtom2
,
const
vector
<
int
>&
torsionAtom3
,
const
vector
<
int
>&
torsionAtom4
,
const
vector
<
vector
<
double
>
>&
torsionParams
,
const
string
&
energyExp
,
const
vector
<
string
>&
paramNames
,
const
vector
<
string
>&
globalParamNames
)
{
if
(
paramNames
.
size
()
>
4
)
throw
OpenMMException
(
"CudaPlatform only supports four per-torsion parameters for custom torsion forces"
);
if
(
globalParamNames
.
size
()
>
8
)
throw
OpenMMException
(
"CudaPlatform only supports eight global parameters for custom torsion forces"
);
if
(
gpu
->
psCustomTorsionID1
!=
NULL
)
throw
OpenMMException
(
"CudaPlatform only supports a single CustomTorsionForce per System"
);
gpu
->
sim
.
customTorsions
=
torsionAtom1
.
size
();
gpu
->
sim
.
customTorsionParameters
=
paramNames
.
size
();
gpu
->
psCustomTorsionID1
=
new
CUDAStream
<
int4
>
(
gpu
->
sim
.
customTorsions
,
1
,
"CustomTorsionId1"
);
gpu
->
sim
.
pCustomTorsionID1
=
gpu
->
psCustomTorsionID1
->
_pDevData
;
gpu
->
psCustomTorsionID2
=
new
CUDAStream
<
int4
>
(
gpu
->
sim
.
customTorsions
,
1
,
"CustomTorsionId2"
);
gpu
->
sim
.
pCustomTorsionID2
=
gpu
->
psCustomTorsionID2
->
_pDevData
;
gpu
->
psCustomTorsionParams
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
customTorsions
,
1
,
"CustomTorsionParams"
);
gpu
->
sim
.
pCustomTorsionParams
=
gpu
->
psCustomTorsionParams
->
_pDevData
;
vector
<
int
>
forceBufferCounter
(
gpu
->
natoms
,
0
);
for
(
int
i
=
0
;
i
<
(
int
)
torsionAtom1
.
size
();
i
++
)
{
(
*
gpu
->
psCustomTorsionID1
)[
i
].
x
=
torsionAtom1
[
i
];
(
*
gpu
->
psCustomTorsionID1
)[
i
].
y
=
torsionAtom2
[
i
];
(
*
gpu
->
psCustomTorsionID1
)[
i
].
z
=
torsionAtom3
[
i
];
(
*
gpu
->
psCustomTorsionID1
)[
i
].
w
=
torsionAtom4
[
i
];
(
*
gpu
->
psCustomTorsionID2
)[
i
].
x
=
forceBufferCounter
[
torsionAtom1
[
i
]]
++
;
(
*
gpu
->
psCustomTorsionID2
)[
i
].
y
=
forceBufferCounter
[
torsionAtom2
[
i
]]
++
;
(
*
gpu
->
psCustomTorsionID2
)[
i
].
z
=
forceBufferCounter
[
torsionAtom3
[
i
]]
++
;
(
*
gpu
->
psCustomTorsionID2
)[
i
].
w
=
forceBufferCounter
[
torsionAtom4
[
i
]]
++
;
if
(
torsionParams
[
i
].
size
()
>
0
)
(
*
gpu
->
psCustomTorsionParams
)[
i
].
x
=
(
float
)
torsionParams
[
i
][
0
];
if
(
torsionParams
[
i
].
size
()
>
1
)
(
*
gpu
->
psCustomTorsionParams
)[
i
].
y
=
(
float
)
torsionParams
[
i
][
1
];
if
(
torsionParams
[
i
].
size
()
>
2
)
(
*
gpu
->
psCustomTorsionParams
)[
i
].
z
=
(
float
)
torsionParams
[
i
][
2
];
if
(
torsionParams
[
i
].
size
()
>
3
)
(
*
gpu
->
psCustomTorsionParams
)[
i
].
w
=
(
float
)
torsionParams
[
i
][
3
];
}
gpu
->
psCustomTorsionID1
->
Upload
();
gpu
->
psCustomTorsionID2
->
Upload
();
gpu
->
psCustomTorsionParams
->
Upload
();
for
(
int
i
=
0
;
i
<
(
int
)
forceBufferCounter
.
size
();
i
++
)
if
(
forceBufferCounter
[
i
]
>
(
int
)
gpu
->
pOutputBufferCounter
[
i
])
gpu
->
pOutputBufferCounter
[
i
]
=
forceBufferCounter
[
i
];
// Create the Expressions.
vector
<
string
>
variables
;
variables
.
push_back
(
"theta"
);
for
(
int
i
=
0
;
i
<
(
int
)
paramNames
.
size
();
i
++
)
variables
.
push_back
(
paramNames
[
i
]);
SetCustomTorsionEnergyExpression
(
createExpression
<
256
>
(
gpu
,
energyExp
,
Lepton
::
Parser
::
parse
(
energyExp
).
optimize
().
createProgram
(),
variables
,
globalParamNames
,
gpu
->
sim
.
customExpressionStackSize
));
SetCustomTorsionForceExpression
(
createExpression
<
256
>
(
gpu
,
energyExp
,
Lepton
::
Parser
::
parse
(
energyExp
).
differentiate
(
"theta"
).
optimize
().
createProgram
(),
variables
,
globalParamNames
,
gpu
->
sim
.
customExpressionStackSize
));
}
extern
"C"
void
gpuSetCustomExternalParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atomIndex
,
const
vector
<
vector
<
double
>
>&
atomParams
,
const
string
&
energyExp
,
const
vector
<
string
>&
paramNames
,
const
vector
<
string
>&
globalParamNames
)
{
if
(
paramNames
.
size
()
>
4
)
throw
OpenMMException
(
"CudaPlatform only supports four per-particle parameters for custom external forces"
);
if
(
globalParamNames
.
size
()
>
8
)
throw
OpenMMException
(
"CudaPlatform only supports eight global parameters for custom external forces"
);
if
(
gpu
->
psCustomExternalID
!=
NULL
)
throw
OpenMMException
(
"CudaPlatform only supports a single CustomExternalForce per System"
);
gpu
->
sim
.
customExternals
=
atomIndex
.
size
();
gpu
->
sim
.
customExternalParameters
=
paramNames
.
size
();
gpu
->
psCustomExternalID
=
new
CUDAStream
<
int
>
(
gpu
->
sim
.
customExternals
,
1
,
"CustomExternalId"
);
gpu
->
sim
.
pCustomExternalID
=
gpu
->
psCustomExternalID
->
_pDevData
;
gpu
->
psCustomExternalParams
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
customExternals
,
1
,
"CustomExternalParams"
);
gpu
->
sim
.
pCustomExternalParams
=
gpu
->
psCustomExternalParams
->
_pDevData
;
for
(
int
i
=
0
;
i
<
(
int
)
atomIndex
.
size
();
i
++
)
{
(
*
gpu
->
psCustomExternalID
)[
i
]
=
atomIndex
[
i
];
if
(
atomParams
[
i
].
size
()
>
0
)
(
*
gpu
->
psCustomExternalParams
)[
i
].
x
=
(
float
)
atomParams
[
i
][
0
];
if
(
atomParams
[
i
].
size
()
>
1
)
(
*
gpu
->
psCustomExternalParams
)[
i
].
y
=
(
float
)
atomParams
[
i
][
1
];
if
(
atomParams
[
i
].
size
()
>
2
)
(
*
gpu
->
psCustomExternalParams
)[
i
].
z
=
(
float
)
atomParams
[
i
][
2
];
if
(
atomParams
[
i
].
size
()
>
3
)
(
*
gpu
->
psCustomExternalParams
)[
i
].
w
=
(
float
)
atomParams
[
i
][
3
];
}
gpu
->
psCustomExternalID
->
Upload
();
gpu
->
psCustomExternalParams
->
Upload
();
// Create the Expressions.
vector
<
string
>
variables
;
variables
.
push_back
(
"x"
);
variables
.
push_back
(
"y"
);
variables
.
push_back
(
"z"
);
for
(
int
i
=
0
;
i
<
(
int
)
paramNames
.
size
();
i
++
)
variables
.
push_back
(
paramNames
[
i
]);
SetCustomExternalEnergyExpression
(
createExpression
<
256
>
(
gpu
,
energyExp
,
Lepton
::
Parser
::
parse
(
energyExp
).
optimize
().
createProgram
(),
variables
,
globalParamNames
,
gpu
->
sim
.
customExpressionStackSize
));
SetCustomExternalForceExpressions
(
createExpression
<
256
>
(
gpu
,
energyExp
,
Lepton
::
Parser
::
parse
(
energyExp
).
differentiate
(
"x"
).
optimize
().
createProgram
(),
variables
,
globalParamNames
,
gpu
->
sim
.
customExpressionStackSize
),
createExpression
<
256
>
(
gpu
,
energyExp
,
Lepton
::
Parser
::
parse
(
energyExp
).
differentiate
(
"y"
).
optimize
().
createProgram
(),
variables
,
globalParamNames
,
gpu
->
sim
.
customExpressionStackSize
),
createExpression
<
256
>
(
gpu
,
energyExp
,
Lepton
::
Parser
::
parse
(
energyExp
).
differentiate
(
"z"
).
optimize
().
createProgram
(),
variables
,
globalParamNames
,
gpu
->
sim
.
customExpressionStackSize
));
}
extern
"C"
void
gpuSetCustomNonbondedParameters
(
gpuContext
gpu
,
const
vector
<
vector
<
double
>
>&
parameters
,
const
vector
<
vector
<
int
>
>&
exclusions
,
CudaNonbondedMethod
method
,
float
cutoffDistance
,
const
string
&
energyExp
,
const
vector
<
string
>&
paramNames
,
const
vector
<
string
>&
globalParamNames
)
{
if
(
gpu
->
sim
.
nonbondedCutoff
!=
0.0
f
&&
gpu
->
sim
.
nonbondedCutoff
!=
cutoffDistance
)
throw
OpenMMException
(
"All nonbonded forces must use the same cutoff"
);
if
(
paramNames
.
size
()
>
4
)
throw
OpenMMException
(
"CudaPlatform only supports four per-atom parameters for custom nonbonded forces"
);
if
(
globalParamNames
.
size
()
>
8
)
throw
OpenMMException
(
"CudaPlatform only supports eight global parameters for custom nonbonded forces"
);
gpu
->
sim
.
nonbondedCutoff
=
cutoffDistance
;
gpu
->
sim
.
nonbondedCutoffSqr
=
cutoffDistance
*
cutoffDistance
;
gpu
->
sim
.
customNonbondedMethod
=
method
;
gpu
->
sim
.
customParameters
=
paramNames
.
size
();
setExclusions
(
gpu
,
exclusions
);
gpu
->
psCustomParams
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"CustomParams"
);
gpu
->
sim
.
pCustomParams
=
gpu
->
psCustomParams
->
_pDevData
;
for
(
int
i
=
0
;
i
<
(
int
)
parameters
.
size
();
i
++
)
{
if
(
parameters
[
i
].
size
()
>
0
)
(
*
gpu
->
psCustomParams
)[
i
].
x
=
(
float
)
parameters
[
i
][
0
];
if
(
parameters
[
i
].
size
()
>
1
)
(
*
gpu
->
psCustomParams
)[
i
].
y
=
(
float
)
parameters
[
i
][
1
];
if
(
parameters
[
i
].
size
()
>
2
)
(
*
gpu
->
psCustomParams
)[
i
].
z
=
(
float
)
parameters
[
i
][
2
];
if
(
parameters
[
i
].
size
()
>
3
)
(
*
gpu
->
psCustomParams
)[
i
].
w
=
(
float
)
parameters
[
i
][
3
];
}
gpu
->
psCustomParams
->
Upload
();
// This class serves as a placeholder for custom functions in expressions.
class
FunctionPlaceholder
:
public
Lepton
::
CustomFunction
{
public:
int
getNumArguments
()
const
{
return
1
;
}
double
evaluate
(
const
double
*
arguments
)
const
{
return
0.0
;
}
double
evaluateDerivative
(
const
double
*
arguments
,
const
int
*
derivOrder
)
const
{
return
0.0
;
}
CustomFunction
*
clone
()
const
{
return
new
FunctionPlaceholder
();
}
};
// Record the tabulated functions, which were previously set with calls to gpuSetTabulatedFunction().
FunctionPlaceholder
*
fp
=
new
FunctionPlaceholder
();
map
<
string
,
Lepton
::
CustomFunction
*>
functions
;
gpu
->
psTabulatedFunctionParams
=
new
CUDAStream
<
float4
>
(
MAX_TABULATED_FUNCTIONS
,
1
,
"TabulatedFunctionRange"
);
gpu
->
sim
.
pTabulatedFunctionParams
=
gpu
->
psTabulatedFunctionParams
->
_pDevData
;
for
(
int
i
=
0
;
i
<
MAX_TABULATED_FUNCTIONS
;
i
++
)
{
gpuTabulatedFunction
&
func
=
gpu
->
tabulatedFunctions
[
i
];
if
(
func
.
coefficients
!=
NULL
)
{
(
*
gpu
->
psTabulatedFunctionParams
)[
i
]
=
make_float4
((
float
)
func
.
min
,
(
float
)
func
.
max
,
(
float
)
(
func
.
coefficients
->
_length
/
(
func
.
max
-
func
.
min
)),
(
float
)
(
func
.
coefficients
->
_length
-
1
));
functions
[
func
.
name
]
=
fp
;
}
}
gpu
->
psTabulatedFunctionParams
->
Upload
();
// Create the Expressions.
vector
<
string
>
variables
;
for
(
int
j
=
1
;
j
<
3
;
j
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
paramNames
.
size
();
i
++
)
{
stringstream
name
;
name
<<
paramNames
[
i
]
<<
j
;
variables
.
push_back
(
name
.
str
());
}
for
(
int
i
=
paramNames
.
size
();
i
<
4
;
i
++
)
variables
.
push_back
(
""
);
}
variables
.
push_back
(
"r"
);
SetCustomNonbondedEnergyExpression
(
createExpression
<
256
>
(
gpu
,
energyExp
,
Lepton
::
Parser
::
parse
(
energyExp
,
functions
).
optimize
().
createProgram
(),
variables
,
globalParamNames
,
gpu
->
sim
.
customExpressionStackSize
));
SetCustomNonbondedForceExpression
(
createExpression
<
256
>
(
gpu
,
energyExp
,
Lepton
::
Parser
::
parse
(
energyExp
,
functions
).
differentiate
(
"r"
).
optimize
().
createProgram
(),
variables
,
globalParamNames
,
gpu
->
sim
.
customExpressionStackSize
));
delete
fp
;
}
static
void
tabulateErfc
(
gpuContext
gpu
)
{
int
tableSize
=
2048
;
gpu
->
sim
.
tabulatedErfcSize
=
tableSize
;
gpu
->
sim
.
tabulatedErfcScale
=
tableSize
/
(
gpu
->
sim
.
alphaEwald
*
gpu
->
sim
.
nonbondedCutoff
);
gpu
->
psTabulatedErfc
=
new
CUDAStream
<
float
>
(
tableSize
,
1
,
"TabulatedErfc"
);
gpu
->
sim
.
pTabulatedErfc
=
gpu
->
psTabulatedErfc
->
_pDevData
;
for
(
int
i
=
0
;
i
<
tableSize
;
++
i
)
(
*
gpu
->
psTabulatedErfc
)[
i
]
=
(
float
)
erfc
(
i
*
(
gpu
->
sim
.
alphaEwald
*
gpu
->
sim
.
nonbondedCutoff
)
/
tableSize
);
gpu
->
psTabulatedErfc
->
Upload
();
}
extern
"C"
void
gpuSetEwaldParameters
(
gpuContext
gpu
,
float
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
)
{
gpu
->
sim
.
alphaEwald
=
alpha
;
gpu
->
sim
.
factorEwald
=
-
1
/
(
4
*
alpha
*
alpha
);
gpu
->
sim
.
kmaxX
=
kmaxx
;
gpu
->
sim
.
kmaxY
=
kmaxy
;
gpu
->
sim
.
kmaxZ
=
kmaxz
;
gpu
->
psEwaldCosSinSum
=
new
CUDAStream
<
float2
>
((
gpu
->
sim
.
kmaxX
*
2
-
1
)
*
(
gpu
->
sim
.
kmaxY
*
2
-
1
)
*
(
gpu
->
sim
.
kmaxZ
*
2
-
1
),
1
,
"EwaldCosSinSum"
);
gpu
->
sim
.
pEwaldCosSinSum
=
gpu
->
psEwaldCosSinSum
->
_pDevStream
[
0
];
tabulateErfc
(
gpu
);
}
extern
"C"
void
gpuSetPMEParameters
(
gpuContext
gpu
,
float
alpha
,
int
gridSizeX
,
int
gridSizeY
,
int
gridSizeZ
)
{
gpu
->
sim
.
alphaEwald
=
alpha
;
int3
gridSize
=
make_int3
(
gridSizeX
,
gridSizeY
,
gridSizeZ
);
gpu
->
sim
.
pmeGridSize
=
gridSize
;
int3
groupSize
=
make_int3
(
2
,
4
,
4
);
gpu
->
sim
.
pmeGroupSize
=
groupSize
;
const
int3
numGroups
=
make_int3
((
gridSize
.
x
+
groupSize
.
x
-
1
)
/
groupSize
.
x
,
(
gridSize
.
y
+
groupSize
.
y
-
1
)
/
groupSize
.
y
,
(
gridSize
.
z
+
groupSize
.
z
-
1
)
/
groupSize
.
z
);
const
unsigned
int
totalGroups
=
numGroups
.
x
*
numGroups
.
y
*
numGroups
.
z
;
cufftPlan3d
(
&
gpu
->
fftplan
,
gridSize
.
x
,
gridSize
.
y
,
gridSize
.
z
,
CUFFT_C2C
);
gpu
->
psPmeGrid
=
new
CUDAStream
<
cufftComplex
>
(
gridSize
.
x
*
gridSize
.
y
*
gridSize
.
z
,
1
,
"PmeGrid"
);
gpu
->
sim
.
pPmeGrid
=
gpu
->
psPmeGrid
->
_pDevData
;
gpu
->
psPmeBsplineModuli
[
0
]
=
new
CUDAStream
<
float
>
(
gridSize
.
x
,
1
,
"PmeBsplineModuli0"
);
gpu
->
sim
.
pPmeBsplineModuli
[
0
]
=
gpu
->
psPmeBsplineModuli
[
0
]
->
_pDevData
;
gpu
->
psPmeBsplineModuli
[
1
]
=
new
CUDAStream
<
float
>
(
gridSize
.
y
,
1
,
"PmeBsplineModuli1"
);
gpu
->
sim
.
pPmeBsplineModuli
[
1
]
=
gpu
->
psPmeBsplineModuli
[
1
]
->
_pDevData
;
gpu
->
psPmeBsplineModuli
[
2
]
=
new
CUDAStream
<
float
>
(
gridSize
.
z
,
1
,
"PmeBsplineModuli2"
);
gpu
->
sim
.
pPmeBsplineModuli
[
2
]
=
gpu
->
psPmeBsplineModuli
[
2
]
->
_pDevData
;
gpu
->
psPmeBsplineTheta
=
new
CUDAStream
<
float4
>
(
PME_ORDER
*
gpu
->
natoms
,
1
,
"PmeBsplineTheta"
);
gpu
->
sim
.
pPmeBsplineTheta
=
gpu
->
psPmeBsplineTheta
->
_pDevData
;
gpu
->
psPmeBsplineDtheta
=
new
CUDAStream
<
float4
>
(
PME_ORDER
*
gpu
->
natoms
,
1
,
"PmeBsplineDtheta"
);
gpu
->
sim
.
pPmeBsplineDtheta
=
gpu
->
psPmeBsplineDtheta
->
_pDevData
;
gpu
->
psPmeAtomRange
=
new
CUDAStream
<
int
>
(
gridSize
.
x
*
gridSize
.
y
*
gridSize
.
z
+
1
,
1
,
"PmeAtomRange"
);
gpu
->
sim
.
pPmeAtomRange
=
gpu
->
psPmeAtomRange
->
_pDevData
;
gpu
->
psPmeAtomGridIndex
=
new
CUDAStream
<
int2
>
(
gpu
->
natoms
,
1
,
"PmeAtomGridIndex"
);
gpu
->
sim
.
pPmeAtomGridIndex
=
gpu
->
psPmeAtomGridIndex
->
_pDevData
;
tabulateErfc
(
gpu
);
// Initialize the b-spline moduli.
int
maxSize
=
max
(
max
(
gridSize
.
x
,
gridSize
.
y
),
gridSize
.
z
);
vector
<
double
>
data
(
PME_ORDER
);
vector
<
double
>
ddata
(
PME_ORDER
);
vector
<
double
>
bsplines_data
(
maxSize
);
data
[
PME_ORDER
-
1
]
=
0.0
;
data
[
1
]
=
0.0
;
data
[
0
]
=
1.0
;
for
(
int
i
=
3
;
i
<
PME_ORDER
;
i
++
)
{
double
div
=
1.0
/
(
i
-
1.0
);
data
[
i
-
1
]
=
0.0
;
for
(
int
j
=
1
;
j
<
(
i
-
1
);
j
++
)
data
[
i
-
j
-
1
]
=
div
*
(
j
*
data
[
i
-
j
-
2
]
+
(
i
-
j
)
*
data
[
i
-
j
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
}
// Differentiate.
ddata
[
0
]
=
-
data
[
0
];
for
(
int
i
=
1
;
i
<
PME_ORDER
;
i
++
)
ddata
[
i
]
=
data
[
i
-
1
]
-
data
[
i
];
double
div
=
1.0
/
(
PME_ORDER
-
1
);
data
[
PME_ORDER
-
1
]
=
0.0
;
for
(
int
i
=
1
;
i
<
(
PME_ORDER
-
1
);
i
++
)
data
[
PME_ORDER
-
i
-
1
]
=
div
*
(
i
*
data
[
PME_ORDER
-
i
-
2
]
+
(
PME_ORDER
-
i
)
*
data
[
PME_ORDER
-
i
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
for
(
int
i
=
0
;
i
<
maxSize
;
i
++
)
bsplines_data
[
i
]
=
0.0
;
for
(
int
i
=
1
;
i
<=
PME_ORDER
;
i
++
)
bsplines_data
[
i
]
=
data
[
i
-
1
];
// Evaluate the actual bspline moduli for X/Y/Z.
for
(
int
dim
=
0
;
dim
<
3
;
dim
++
)
{
int
ndata
=
(
dim
==
0
?
gridSize
.
x
:
dim
==
1
?
gridSize
.
y
:
gridSize
.
z
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
double
sc
=
0.0
;
double
ss
=
0.0
;
for
(
int
j
=
0
;
j
<
ndata
;
j
++
)
{
double
arg
=
(
2.0
*
M_PI
*
i
*
j
)
/
ndata
;
sc
+=
bsplines_data
[
j
]
*
cos
(
arg
);
ss
+=
bsplines_data
[
j
]
*
sin
(
arg
);
}
(
*
gpu
->
psPmeBsplineModuli
[
dim
])[
i
]
=
(
float
)
(
sc
*
sc
+
ss
*
ss
);
}
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
if
((
*
gpu
->
psPmeBsplineModuli
[
dim
])[
i
]
<
1.0e-7
)
(
*
gpu
->
psPmeBsplineModuli
[
dim
])[
i
]
=
((
*
gpu
->
psPmeBsplineModuli
[
dim
])[
i
-
1
]
+
(
*
gpu
->
psPmeBsplineModuli
[
dim
])[
i
+
1
])
*
0.5
f
;
}
gpu
->
psPmeBsplineModuli
[
dim
]
->
Upload
();
}
}
extern
"C"
void
gpuSetPeriodicBoxSize
(
gpuContext
gpu
,
float
xsize
,
float
ysize
,
float
zsize
)
{
gpu
->
sim
.
periodicBoxSizeX
=
xsize
;
gpu
->
sim
.
periodicBoxSizeY
=
ysize
;
gpu
->
sim
.
periodicBoxSizeZ
=
zsize
;
gpu
->
sim
.
invPeriodicBoxSizeX
=
1.0
f
/
xsize
;
gpu
->
sim
.
invPeriodicBoxSizeY
=
1.0
f
/
ysize
;
gpu
->
sim
.
invPeriodicBoxSizeZ
=
1.0
f
/
zsize
;
gpu
->
sim
.
recipBoxSizeX
=
2.0
f
*
PI
/
gpu
->
sim
.
periodicBoxSizeX
;
gpu
->
sim
.
recipBoxSizeY
=
2.0
f
*
PI
/
gpu
->
sim
.
periodicBoxSizeY
;
gpu
->
sim
.
recipBoxSizeZ
=
2.0
f
*
PI
/
gpu
->
sim
.
periodicBoxSizeZ
;
gpu
->
sim
.
cellVolume
=
gpu
->
sim
.
periodicBoxSizeX
*
gpu
->
sim
.
periodicBoxSizeY
*
gpu
->
sim
.
periodicBoxSizeZ
;
}
extern
"C"
void
gpuSetObcParameters
(
gpuContext
gpu
,
float
innerDielectric
,
float
solventDielectric
,
const
vector
<
float
>&
radius
,
const
vector
<
float
>&
scale
,
const
vector
<
float
>&
charge
)
{
unsigned
int
atoms
=
radius
.
size
();
gpu
->
bIncludeGBSA
=
true
;
for
(
unsigned
int
i
=
0
;
i
<
atoms
;
i
++
)
{
(
*
gpu
->
psObcData
)[
i
].
x
=
radius
[
i
]
-
dielectricOffset
;
(
*
gpu
->
psObcData
)[
i
].
y
=
scale
[
i
]
*
(
*
gpu
->
psObcData
)[
i
].
x
;
(
*
gpu
->
psPosq4
)[
i
].
w
=
charge
[
i
];
#if (DUMP_PARAMETERS == 1)
cout
<<
i
<<
" "
<<
(
*
gpu
->
psObcData
)[
i
].
x
<<
" "
<<
(
*
gpu
->
psObcData
)[
i
].
y
;
#endif
}
// Dummy out extra atom data
for
(
unsigned
int
i
=
atoms
;
i
<
gpu
->
sim
.
paddedNumberOfAtoms
;
i
++
)
{
(
*
gpu
->
psBornRadii
)[
i
]
=
0.2
f
;
(
*
gpu
->
psObcData
)[
i
].
x
=
0.01
f
;
(
*
gpu
->
psObcData
)[
i
].
y
=
0.01
f
;
}
gpu
->
psBornRadii
->
Upload
();
gpu
->
psObcData
->
Upload
();
gpu
->
psPosq4
->
Upload
();
gpu
->
sim
.
preFactor
=
2.0
f
*
electricConstant
*
((
1.0
f
/
innerDielectric
)
-
(
1.0
f
/
solventDielectric
))
*
gpu
->
sim
.
forceConversionFactor
;
}
extern
"C"
void
gpuSetGBVIParameters
(
gpuContext
gpu
,
float
innerDielectric
,
float
solventDielectric
,
const
vector
<
int
>&
atom
,
const
vector
<
float
>&
radius
,
const
vector
<
float
>&
gamma
,
const
vector
<
float
>&
scaledRadii
)
{
unsigned
int
atoms
=
atom
.
size
();
gpu
->
bIncludeGBVI
=
true
;
double
tau
=
((
1.0
f
/
innerDielectric
)
-
(
1.0
f
/
solventDielectric
));
for
(
unsigned
int
i
=
0
;
i
<
atoms
;
i
++
)
{
(
*
gpu
->
psGBVIData
)[
i
].
x
=
radius
[
i
];
(
*
gpu
->
psGBVIData
)[
i
].
y
=
scaledRadii
[
i
];
(
*
gpu
->
psGBVIData
)[
i
].
z
=
(
float
)
(
tau
*
gamma
[
i
]);
(
*
gpu
->
psGBVIData
)[
i
].
w
=
1.0
f
;
(
*
gpu
->
psObcData
)[
i
].
x
=
radius
[
i
];
(
*
gpu
->
psObcData
)[
i
].
y
=
0.9
f
*
radius
[
i
];
#undef DUMP_PARAMETERS
#define DUMP_PARAMETERS 0
#if (DUMP_PARAMETERS == 1)
(
void
)
fprintf
(
stderr
,
"GBVI param: %5u R=%14.7e scaledR=%14.7e gamma*tau=%14.7e bornRadiusScaleFactor=%14.7e
\n
"
,
i
,
(
*
gpu
->
psGBVIData
)[
i
].
x
,
(
*
gpu
->
psGBVIData
)[
i
].
y
,
(
*
gpu
->
psGBVIData
)[
i
].
z
,
(
*
gpu
->
psGBVIData
)[
i
].
w
);
#endif
}
//(void) fprintf( stderr, "gpuSetGBVIParameters: setting Obc parameters!!!! should be removed.\n" );
// Dummy out extra atom data
for
(
unsigned
int
i
=
atoms
;
i
<
gpu
->
sim
.
paddedNumberOfAtoms
;
i
++
)
{
(
*
gpu
->
psBornRadii
)[
i
]
=
0.2
f
;
(
*
gpu
->
psGBVIData
)[
i
].
x
=
0.01
f
;
(
*
gpu
->
psGBVIData
)[
i
].
y
=
0.01
f
;
(
*
gpu
->
psGBVIData
)[
i
].
z
=
0.01
f
;
(
*
gpu
->
psGBVIData
)[
i
].
w
=
1.00
f
;
}
gpu
->
psBornRadii
->
Upload
();
gpu
->
psGBVIData
->
Upload
();
gpu
->
psObcData
->
Upload
();
gpu
->
sim
.
preFactor
=
2.0
f
*
electricConstant
*
((
1.0
f
/
innerDielectric
)
-
(
1.0
f
/
solventDielectric
))
*
gpu
->
sim
.
forceConversionFactor
;
#if (DUMP_PARAMETERS == 1)
(
void
)
fprintf
(
stderr
,
"gpuSetGBVIParameters: preFactor=%14.6e elecCnstnt=%.4f frcCnvrsnFctr=%.4f tau=%.4f.
\n
"
,
gpu
->
sim
.
preFactor
,
2.0
f
*
electricConstant
,
gpu
->
sim
.
forceConversionFactor
,
((
1.0
f
/
innerDielectric
)
-
(
1.0
f
/
solventDielectric
))
);
#endif
}
static
void
markShakeClusterInvalid
(
ShakeCluster
&
cluster
,
map
<
int
,
ShakeCluster
>&
allClusters
,
vector
<
bool
>&
invalidForShake
)
{
cluster
.
valid
=
false
;
invalidForShake
[
cluster
.
centralID
]
=
true
;
for
(
int
i
=
0
;
i
<
cluster
.
size
;
i
++
)
{
invalidForShake
[
cluster
.
peripheralID
[
i
]]
=
true
;
map
<
int
,
ShakeCluster
>::
iterator
otherCluster
=
allClusters
.
find
(
cluster
.
peripheralID
[
i
]);
if
(
otherCluster
!=
allClusters
.
end
()
&&
otherCluster
->
second
.
valid
)
markShakeClusterInvalid
(
otherCluster
->
second
,
allClusters
,
invalidForShake
);
}
}
extern
"C"
void
gpuSetConstraintParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
float
>&
distance
,
const
vector
<
float
>&
invMass1
,
const
vector
<
float
>&
invMass2
,
float
constraintTolerance
)
{
// Create a vector for recording which atoms are handled by SHAKE (or SETTLE).
vector
<
bool
>
isShakeAtom
(
gpu
->
natoms
,
false
);
// Find how many constraints each atom is involved in.
vector
<
int
>
constraintCount
(
gpu
->
natoms
,
0
);
for
(
int
i
=
0
;
i
<
(
int
)
atom1
.
size
();
i
++
)
{
constraintCount
[
atom1
[
i
]]
++
;
constraintCount
[
atom2
[
i
]]
++
;
}
// Identify clusters of three atoms that can be treated with SETTLE. First, for every
// atom that might be part of such a cluster, make a list of the two other atoms it is
// connected to.
vector
<
map
<
int
,
float
>
>
settleConstraints
(
gpu
->
natoms
);
for
(
int
i
=
0
;
i
<
(
int
)
atom1
.
size
();
i
++
)
{
if
(
constraintCount
[
atom1
[
i
]]
==
2
&&
constraintCount
[
atom2
[
i
]]
==
2
)
{
settleConstraints
[
atom1
[
i
]][
atom2
[
i
]]
=
distance
[
i
];
settleConstraints
[
atom2
[
i
]][
atom1
[
i
]]
=
distance
[
i
];
}
}
// Now remove the ones that don't actually form closed loops of three atoms.
vector
<
int
>
settleClusters
;
for
(
int
i
=
0
;
i
<
(
int
)
settleConstraints
.
size
();
i
++
)
{
if
(
settleConstraints
[
i
].
size
()
==
2
)
{
int
partner1
=
settleConstraints
[
i
].
begin
()
->
first
;
int
partner2
=
(
++
settleConstraints
[
i
].
begin
())
->
first
;
if
(
settleConstraints
[
partner1
].
size
()
!=
2
||
settleConstraints
[
partner2
].
size
()
!=
2
||
settleConstraints
[
partner1
].
find
(
partner2
)
==
settleConstraints
[
partner1
].
end
())
settleConstraints
[
i
].
clear
();
else
if
(
i
<
partner1
&&
i
<
partner2
)
settleClusters
.
push_back
(
i
);
}
else
settleConstraints
[
i
].
clear
();
}
// Record the actual SETTLE clusters.
CUDAStream
<
int4
>*
psSettleID
=
new
CUDAStream
<
int4
>
((
int
)
settleClusters
.
size
(),
1
,
"SettleID"
);
gpu
->
psSettleID
=
psSettleID
;
gpu
->
sim
.
pSettleID
=
psSettleID
->
_pDevStream
[
0
];
CUDAStream
<
float2
>*
psSettleParameter
=
new
CUDAStream
<
float2
>
((
int
)
settleClusters
.
size
(),
1
,
"SettleParameter"
);
gpu
->
psSettleParameter
=
psSettleParameter
;
gpu
->
sim
.
pSettleParameter
=
psSettleParameter
->
_pDevStream
[
0
];
gpu
->
sim
.
settleConstraints
=
settleClusters
.
size
();
for
(
int
i
=
0
;
i
<
(
int
)
settleClusters
.
size
();
i
++
)
{
int
atom1
=
settleClusters
[
i
];
int
atom2
=
settleConstraints
[
atom1
].
begin
()
->
first
;
int
atom3
=
(
++
settleConstraints
[
atom1
].
begin
())
->
first
;
float
dist12
=
settleConstraints
[
atom1
].
find
(
atom2
)
->
second
;
float
dist13
=
settleConstraints
[
atom1
].
find
(
atom3
)
->
second
;
float
dist23
=
settleConstraints
[
atom2
].
find
(
atom3
)
->
second
;
if
(
dist12
==
dist13
)
{
// atom1 is the central atom
(
*
psSettleID
)[
i
].
x
=
atom1
;
(
*
psSettleID
)[
i
].
y
=
atom2
;
(
*
psSettleID
)[
i
].
z
=
atom3
;
(
*
psSettleParameter
)[
i
].
x
=
dist12
;
(
*
psSettleParameter
)[
i
].
y
=
dist23
;
}
else
if
(
dist12
==
dist23
)
{
// atom2 is the central atom
(
*
psSettleID
)[
i
].
x
=
atom2
;
(
*
psSettleID
)[
i
].
y
=
atom1
;
(
*
psSettleID
)[
i
].
z
=
atom3
;
(
*
psSettleParameter
)[
i
].
x
=
dist12
;
(
*
psSettleParameter
)[
i
].
y
=
dist13
;
}
else
if
(
dist13
==
dist23
)
{
// atom3 is the central atom
(
*
psSettleID
)[
i
].
x
=
atom3
;
(
*
psSettleID
)[
i
].
y
=
atom1
;
(
*
psSettleID
)[
i
].
z
=
atom2
;
(
*
psSettleParameter
)[
i
].
x
=
dist13
;
(
*
psSettleParameter
)[
i
].
y
=
dist12
;
}
else
throw
OpenMMException
(
"Two of the three distances constrained with SETTLE must be the same."
);
isShakeAtom
[
atom1
]
=
true
;
isShakeAtom
[
atom2
]
=
true
;
isShakeAtom
[
atom3
]
=
true
;
}
psSettleID
->
Upload
();
psSettleParameter
->
Upload
();
gpu
->
sim
.
settle_threads_per_block
=
(
gpu
->
sim
.
settleConstraints
+
gpu
->
sim
.
blocks
-
1
)
/
gpu
->
sim
.
blocks
;
if
(
gpu
->
sim
.
settle_threads_per_block
>
gpu
->
sim
.
max_shake_threads_per_block
)
gpu
->
sim
.
settle_threads_per_block
=
gpu
->
sim
.
max_shake_threads_per_block
;
if
(
gpu
->
sim
.
settle_threads_per_block
<
1
)
gpu
->
sim
.
settle_threads_per_block
=
1
;
// Find clusters consisting of a central atom with up to three peripheral atoms.
map
<
int
,
ShakeCluster
>
clusters
;
vector
<
bool
>
invalidForShake
(
gpu
->
natoms
,
false
);
for
(
int
i
=
0
;
i
<
(
int
)
atom1
.
size
();
i
++
)
{
if
(
isShakeAtom
[
atom1
[
i
]])
continue
;
// This is being taken care of with SETTLE.
// Determine which is the central atom.
bool
firstIsCentral
;
if
(
constraintCount
[
atom1
[
i
]]
>
1
)
firstIsCentral
=
true
;
else
if
(
constraintCount
[
atom2
[
i
]]
>
1
)
firstIsCentral
=
false
;
else
if
(
atom1
[
i
]
<
atom2
[
i
])
firstIsCentral
=
true
;
else
firstIsCentral
=
false
;
int
centralID
,
peripheralID
;
float
centralInvMass
,
peripheralInvMass
;
if
(
firstIsCentral
)
{
centralID
=
atom1
[
i
];
peripheralID
=
atom2
[
i
];
centralInvMass
=
invMass1
[
i
];
peripheralInvMass
=
invMass2
[
i
];
}
else
{
centralID
=
atom2
[
i
];
peripheralID
=
atom1
[
i
];
centralInvMass
=
invMass2
[
i
];
peripheralInvMass
=
invMass1
[
i
];
}
// Add it to the cluster.
if
(
clusters
.
find
(
centralID
)
==
clusters
.
end
())
{
clusters
[
centralID
]
=
ShakeCluster
(
centralID
,
centralInvMass
);
}
ShakeCluster
&
cluster
=
clusters
[
centralID
];
cluster
.
addAtom
(
peripheralID
,
distance
[
i
],
peripheralInvMass
);
if
(
constraintCount
[
peripheralID
]
!=
1
||
invalidForShake
[
atom1
[
i
]]
||
invalidForShake
[
atom2
[
i
]])
{
markShakeClusterInvalid
(
cluster
,
clusters
,
invalidForShake
);
map
<
int
,
ShakeCluster
>::
iterator
otherCluster
=
clusters
.
find
(
peripheralID
);
if
(
otherCluster
!=
clusters
.
end
()
&&
otherCluster
->
second
.
valid
)
markShakeClusterInvalid
(
otherCluster
->
second
,
clusters
,
invalidForShake
);
}
}
int
validShakeClusters
=
0
;
for
(
map
<
int
,
ShakeCluster
>::
iterator
iter
=
clusters
.
begin
();
iter
!=
clusters
.
end
();
++
iter
)
{
ShakeCluster
&
cluster
=
iter
->
second
;
if
(
cluster
.
valid
)
{
cluster
.
valid
=
!
invalidForShake
[
cluster
.
centralID
];
for
(
int
i
=
0
;
i
<
cluster
.
size
;
i
++
)
if
(
invalidForShake
[
cluster
.
peripheralID
[
i
]])
cluster
.
valid
=
false
;
if
(
cluster
.
valid
)
++
validShakeClusters
;
}
}
// Fill in the Cuda streams.
CUDAStream
<
int4
>*
psShakeID
=
new
CUDAStream
<
int4
>
(
validShakeClusters
,
1
,
"ShakeID"
);
gpu
->
psShakeID
=
psShakeID
;
gpu
->
sim
.
pShakeID
=
psShakeID
->
_pDevStream
[
0
];
CUDAStream
<
float4
>*
psShakeParameter
=
new
CUDAStream
<
float4
>
(
validShakeClusters
,
1
,
"ShakeParameter"
);
gpu
->
psShakeParameter
=
psShakeParameter
;
gpu
->
sim
.
pShakeParameter
=
psShakeParameter
->
_pDevStream
[
0
];
gpu
->
sim
.
ShakeConstraints
=
validShakeClusters
;
int
index
=
0
;
for
(
map
<
int
,
ShakeCluster
>::
const_iterator
iter
=
clusters
.
begin
();
iter
!=
clusters
.
end
();
++
iter
)
{
const
ShakeCluster
&
cluster
=
iter
->
second
;
if
(
!
cluster
.
valid
)
continue
;
(
*
psShakeID
)[
index
].
x
=
cluster
.
centralID
;
(
*
psShakeID
)[
index
].
y
=
cluster
.
peripheralID
[
0
];
(
*
psShakeID
)[
index
].
z
=
cluster
.
size
>
1
?
cluster
.
peripheralID
[
1
]
:
-
1
;
(
*
psShakeID
)[
index
].
w
=
cluster
.
size
>
2
?
cluster
.
peripheralID
[
2
]
:
-
1
;
(
*
psShakeParameter
)[
index
].
x
=
cluster
.
centralInvMass
;
(
*
psShakeParameter
)[
index
].
y
=
0.5
f
/
(
cluster
.
centralInvMass
+
cluster
.
peripheralInvMass
);
(
*
psShakeParameter
)[
index
].
z
=
cluster
.
distance
*
cluster
.
distance
;
(
*
psShakeParameter
)[
index
].
w
=
cluster
.
peripheralInvMass
;
isShakeAtom
[
cluster
.
centralID
]
=
true
;
isShakeAtom
[
cluster
.
peripheralID
[
0
]]
=
true
;
if
(
cluster
.
size
>
1
)
isShakeAtom
[
cluster
.
peripheralID
[
1
]]
=
true
;
if
(
cluster
.
size
>
2
)
isShakeAtom
[
cluster
.
peripheralID
[
2
]]
=
true
;
++
index
;
}
psShakeID
->
Upload
();
psShakeParameter
->
Upload
();
gpu
->
sim
.
shakeTolerance
=
constraintTolerance
;
gpu
->
sim
.
shake_threads_per_block
=
(
gpu
->
sim
.
ShakeConstraints
+
gpu
->
sim
.
blocks
-
1
)
/
gpu
->
sim
.
blocks
;
if
(
gpu
->
sim
.
shake_threads_per_block
>
gpu
->
sim
.
max_shake_threads_per_block
)
gpu
->
sim
.
shake_threads_per_block
=
gpu
->
sim
.
max_shake_threads_per_block
;
if
(
gpu
->
sim
.
shake_threads_per_block
<
1
)
gpu
->
sim
.
shake_threads_per_block
=
1
;
// Find connected constraints for CCMA.
vector
<
int
>
ccmaConstraints
;
for
(
unsigned
i
=
0
;
i
<
atom1
.
size
();
i
++
)
if
(
!
isShakeAtom
[
atom1
[
i
]])
ccmaConstraints
.
push_back
(
i
);
// Record the connections between constraints.
int
numCCMA
=
(
int
)
ccmaConstraints
.
size
();
vector
<
vector
<
int
>
>
atomConstraints
(
gpu
->
natoms
);
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
atomConstraints
[
atom1
[
ccmaConstraints
[
i
]]].
push_back
(
i
);
atomConstraints
[
atom2
[
ccmaConstraints
[
i
]]].
push_back
(
i
);
}
vector
<
vector
<
int
>
>
linkedConstraints
(
numCCMA
);
for
(
unsigned
atom
=
0
;
atom
<
atomConstraints
.
size
();
atom
++
)
{
for
(
unsigned
i
=
0
;
i
<
atomConstraints
[
atom
].
size
();
i
++
)
for
(
unsigned
j
=
0
;
j
<
i
;
j
++
)
{
int
c1
=
atomConstraints
[
atom
][
i
];
int
c2
=
atomConstraints
[
atom
][
j
];
linkedConstraints
[
c1
].
push_back
(
c2
);
linkedConstraints
[
c2
].
push_back
(
c1
);
}
}
int
maxLinks
=
0
;
for
(
unsigned
i
=
0
;
i
<
linkedConstraints
.
size
();
i
++
)
maxLinks
=
max
(
maxLinks
,
(
int
)
linkedConstraints
[
i
].
size
());
int
maxAtomConstraints
=
0
;
for
(
unsigned
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
maxAtomConstraints
=
max
(
maxAtomConstraints
,
(
int
)
atomConstraints
[
i
].
size
());
// Compute the constraint coupling matrix
vector
<
vector
<
int
>
>
atomAngles
(
gpu
->
natoms
);
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
sim
.
bond_angles
;
i
++
)
atomAngles
[(
*
gpu
->
psBondAngleID1
)[
i
].
y
].
push_back
(
i
);
vector
<
vector
<
pair
<
int
,
double
>
>
>
matrix
(
numCCMA
);
if
(
numCCMA
>
0
)
{
for
(
int
j
=
0
;
j
<
numCCMA
;
j
++
)
{
for
(
int
k
=
0
;
k
<
numCCMA
;
k
++
)
{
if
(
j
==
k
)
{
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
j
,
1.0
));
continue
;
}
double
scale
;
int
cj
=
ccmaConstraints
[
j
];
int
ck
=
ccmaConstraints
[
k
];
int
atomj0
=
atom1
[
cj
];
int
atomj1
=
atom2
[
cj
];
int
atomk0
=
atom1
[
ck
];
int
atomk1
=
atom2
[
ck
];
int
atoma
,
atomb
,
atomc
;
if
(
atomj0
==
atomk0
)
{
atoma
=
atomj1
;
atomb
=
atomj0
;
atomc
=
atomk1
;
scale
=
invMass1
[
cj
]
/
(
invMass1
[
cj
]
+
invMass2
[
cj
]);
}
else
if
(
atomj1
==
atomk1
)
{
atoma
=
atomj0
;
atomb
=
atomj1
;
atomc
=
atomk0
;
scale
=
invMass2
[
cj
]
/
(
invMass1
[
cj
]
+
invMass2
[
cj
]);
}
else
if
(
atomj0
==
atomk1
)
{
atoma
=
atomj1
;
atomb
=
atomj0
;
atomc
=
atomk0
;
scale
=
invMass1
[
cj
]
/
(
invMass1
[
cj
]
+
invMass2
[
cj
]);
}
else
if
(
atomj1
==
atomk0
)
{
atoma
=
atomj0
;
atomb
=
atomj1
;
atomc
=
atomk1
;
scale
=
invMass2
[
cj
]
/
(
invMass1
[
cj
]
+
invMass2
[
cj
]);
}
else
continue
;
// These constraints are not connected.
// Look for a third constraint forming a triangle with these two.
bool
foundConstraint
=
false
;
for
(
int
other
=
0
;
other
<
numCCMA
;
other
++
)
{
if
((
atom1
[
other
]
==
atoma
&&
atom2
[
other
]
==
atomc
)
||
(
atom1
[
other
]
==
atomc
&&
atom2
[
other
]
==
atoma
))
{
double
d1
=
distance
[
cj
];
double
d2
=
distance
[
ck
];
double
d3
=
distance
[
other
];
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
k
,
scale
*
(
d1
*
d1
+
d2
*
d2
-
d3
*
d3
)
/
(
2.0
*
d1
*
d2
)));
foundConstraint
=
true
;
break
;
}
}
if
(
!
foundConstraint
)
{
// We didn't find one, so look for an angle force field term.
const
vector
<
int
>&
angleCandidates
=
atomAngles
[
atomb
];
for
(
vector
<
int
>::
const_iterator
iter
=
angleCandidates
.
begin
();
iter
!=
angleCandidates
.
end
();
iter
++
)
{
int4
atoms
=
(
*
gpu
->
psBondAngleID1
)[
*
iter
];
if
((
atoms
.
x
==
atoma
&&
atoms
.
z
==
atomc
)
||
(
atoms
.
z
==
atoma
&&
atoms
.
x
==
atomc
))
{
double
angle
=
(
*
gpu
->
psBondAngleParameter
)[
*
iter
].
x
;
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
k
,
scale
*
cos
(
angle
*
PI
/
180.0
)));
break
;
}
}
}
}
}
// Invert it using QR.
vector
<
int
>
matrixRowStart
;
vector
<
int
>
matrixColIndex
;
vector
<
double
>
matrixValue
;
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
matrixRowStart
.
push_back
(
matrixValue
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
matrix
[
i
].
size
();
j
++
)
{
pair
<
int
,
double
>
element
=
matrix
[
i
][
j
];
matrixColIndex
.
push_back
(
element
.
first
);
matrixValue
.
push_back
(
element
.
second
);
}
}
matrixRowStart
.
push_back
(
matrixValue
.
size
());
int
*
qRowStart
,
*
qColIndex
,
*
rRowStart
,
*
rColIndex
;
double
*
qValue
,
*
rValue
;
int
result
=
QUERN_compute_qr
(
numCCMA
,
numCCMA
,
&
matrixRowStart
[
0
],
&
matrixColIndex
[
0
],
&
matrixValue
[
0
],
NULL
,
&
qRowStart
,
&
qColIndex
,
&
qValue
,
&
rRowStart
,
&
rColIndex
,
&
rValue
);
vector
<
double
>
rhs
(
numCCMA
);
matrix
.
clear
();
matrix
.
resize
(
numCCMA
);
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
// Extract column i of the inverse matrix.
for
(
int
j
=
0
;
j
<
numCCMA
;
j
++
)
rhs
[
j
]
=
(
i
==
j
?
1.0
:
0.0
);
result
=
QUERN_multiply_with_q_transpose
(
numCCMA
,
qRowStart
,
qColIndex
,
qValue
,
&
rhs
[
0
]);
result
=
QUERN_solve_with_r
(
numCCMA
,
rRowStart
,
rColIndex
,
rValue
,
&
rhs
[
0
],
&
rhs
[
0
]);
for
(
int
j
=
0
;
j
<
numCCMA
;
j
++
)
{
double
value
=
rhs
[
j
]
*
distance
[
ccmaConstraints
[
i
]]
/
distance
[
ccmaConstraints
[
j
]];
if
(
abs
(
value
)
>
0.05
)
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
i
,
value
));
}
}
QUERN_free_result
(
qRowStart
,
qColIndex
,
qValue
);
QUERN_free_result
(
rRowStart
,
rColIndex
,
rValue
);
}
int
maxRowElements
=
0
;
for
(
unsigned
i
=
0
;
i
<
matrix
.
size
();
i
++
)
maxRowElements
=
max
(
maxRowElements
,
(
int
)
matrix
[
i
].
size
());
maxRowElements
++
;
// Sort the constraints.
vector
<
int
>
constraintOrder
(
numCCMA
);
for
(
int
i
=
0
;
i
<
numCCMA
;
++
i
)
constraintOrder
[
i
]
=
i
;
sort
(
constraintOrder
.
begin
(),
constraintOrder
.
end
(),
ConstraintOrderer
(
atom1
,
atom2
));
vector
<
int
>
inverseOrder
(
numCCMA
);
for
(
int
i
=
0
;
i
<
numCCMA
;
++
i
)
inverseOrder
[
constraintOrder
[
i
]]
=
i
;
for
(
int
i
=
0
;
i
<
(
int
)
matrix
.
size
();
++
i
)
for
(
int
j
=
0
;
j
<
(
int
)
matrix
[
i
].
size
();
++
j
)
matrix
[
i
][
j
].
first
=
inverseOrder
[
matrix
[
i
][
j
].
first
];
// Fill in the CUDA streams.
CUDAStream
<
int2
>*
psCcmaAtoms
=
new
CUDAStream
<
int2
>
(
numCCMA
,
1
,
"CcmaAtoms"
);
gpu
->
psCcmaAtoms
=
psCcmaAtoms
;
gpu
->
sim
.
pCcmaAtoms
=
psCcmaAtoms
->
_pDevData
;
CUDAStream
<
float4
>*
psCcmaDistance
=
new
CUDAStream
<
float4
>
(
numCCMA
,
1
,
"CcmaDistance"
);
gpu
->
psCcmaDistance
=
psCcmaDistance
;
gpu
->
sim
.
pCcmaDistance
=
psCcmaDistance
->
_pDevData
;
CUDAStream
<
int
>*
psCcmaAtomConstraints
=
new
CUDAStream
<
int
>
(
gpu
->
natoms
*
maxAtomConstraints
,
1
,
"CcmaAtomConstraints"
);
gpu
->
psCcmaAtomConstraints
=
psCcmaAtomConstraints
;
gpu
->
sim
.
pCcmaAtomConstraints
=
psCcmaAtomConstraints
->
_pDevData
;
CUDAStream
<
int
>*
psCcmaNumAtomConstraints
=
new
CUDAStream
<
int
>
(
gpu
->
natoms
,
1
,
"CcmaAtomConstraintsIndex"
);
gpu
->
psCcmaNumAtomConstraints
=
psCcmaNumAtomConstraints
;
gpu
->
sim
.
pCcmaNumAtomConstraints
=
psCcmaNumAtomConstraints
->
_pDevData
;
CUDAStream
<
float
>*
psCcmaDelta1
=
new
CUDAStream
<
float
>
(
numCCMA
,
1
,
"CcmaDelta1"
);
gpu
->
psCcmaDelta1
=
psCcmaDelta1
;
gpu
->
sim
.
pCcmaDelta1
=
psCcmaDelta1
->
_pDevData
;
CUDAStream
<
float
>*
psCcmaDelta2
=
new
CUDAStream
<
float
>
(
numCCMA
,
1
,
"CcmaDelta2"
);
gpu
->
psCcmaDelta2
=
psCcmaDelta2
;
gpu
->
sim
.
pCcmaDelta2
=
psCcmaDelta2
->
_pDevData
;
CUDAStream
<
float
>*
psCcmaReducedMass
=
new
CUDAStream
<
float
>
(
numCCMA
,
1
,
"CcmaReducedMass"
);
gpu
->
psCcmaReducedMass
=
psCcmaReducedMass
;
gpu
->
sim
.
pCcmaReducedMass
=
psCcmaReducedMass
->
_pDevData
;
CUDAStream
<
unsigned
int
>*
psConstraintMatrixColumn
=
new
CUDAStream
<
unsigned
int
>
(
numCCMA
*
maxRowElements
,
1
,
"ConstraintMatrixColumn"
);
gpu
->
psConstraintMatrixColumn
=
psConstraintMatrixColumn
;
gpu
->
sim
.
pConstraintMatrixColumn
=
psConstraintMatrixColumn
->
_pDevData
;
CUDAStream
<
float
>*
psConstraintMatrixValue
=
new
CUDAStream
<
float
>
(
numCCMA
*
maxRowElements
,
1
,
"ConstraintMatrixValue"
);
gpu
->
psConstraintMatrixValue
=
psConstraintMatrixValue
;
gpu
->
sim
.
pConstraintMatrixValue
=
psConstraintMatrixValue
->
_pDevData
;
cudaHostAlloc
((
void
**
)
&
gpu
->
ccmaConvergedHostMarker
,
sizeof
(
int
),
cudaHostAllocMapped
);
cudaHostGetDevicePointer
((
void
**
)
&
gpu
->
sim
.
ccmaConvergedDeviceMarker
,
(
void
*
)
gpu
->
ccmaConvergedHostMarker
,
0
);
cudaEventCreate
(
&
gpu
->
ccmaEvent
);
gpu
->
sim
.
ccmaConstraints
=
numCCMA
;
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
int
index
=
constraintOrder
[
i
];
int
c
=
ccmaConstraints
[
index
];
(
*
psCcmaAtoms
)[
i
].
x
=
atom1
[
c
];
(
*
psCcmaAtoms
)[
i
].
y
=
atom2
[
c
];
(
*
psCcmaDistance
)[
i
].
w
=
distance
[
c
];
(
*
psCcmaReducedMass
)[
i
]
=
0.5
f
/
(
invMass1
[
c
]
+
invMass2
[
c
]);
for
(
unsigned
int
j
=
0
;
j
<
matrix
[
index
].
size
();
j
++
)
{
(
*
psConstraintMatrixColumn
)[
i
+
j
*
numCCMA
]
=
matrix
[
index
][
j
].
first
;
(
*
psConstraintMatrixValue
)[
i
+
j
*
numCCMA
]
=
(
float
)
matrix
[
index
][
j
].
second
;
}
(
*
psConstraintMatrixColumn
)[
i
+
matrix
[
index
].
size
()
*
numCCMA
]
=
numCCMA
;
}
for
(
unsigned
int
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
{
(
*
psCcmaNumAtomConstraints
)[
i
]
=
atomConstraints
[
i
].
size
();
for
(
unsigned
int
j
=
0
;
j
<
atomConstraints
[
i
].
size
();
j
++
)
{
bool
forward
=
(
atom1
[
ccmaConstraints
[
atomConstraints
[
i
][
j
]]]
==
i
);
(
*
psCcmaAtomConstraints
)[
i
+
j
*
gpu
->
natoms
]
=
(
forward
?
inverseOrder
[
atomConstraints
[
i
][
j
]]
+
1
:
-
inverseOrder
[
atomConstraints
[
i
][
j
]]
-
1
);
}
}
psCcmaAtoms
->
Upload
();
psCcmaDistance
->
Upload
();
psCcmaReducedMass
->
Upload
();
psCcmaAtomConstraints
->
Upload
();
psCcmaNumAtomConstraints
->
Upload
();
psConstraintMatrixColumn
->
Upload
();
psConstraintMatrixValue
->
Upload
();
gpu
->
sim
.
ccma_threads_per_block
=
(
gpu
->
sim
.
ccmaConstraints
+
gpu
->
sim
.
blocks
-
1
)
/
gpu
->
sim
.
blocks
;
if
(
gpu
->
sim
.
ccma_threads_per_block
>
gpu
->
sim
.
threads_per_block
)
gpu
->
sim
.
ccma_threads_per_block
=
gpu
->
sim
.
threads_per_block
;
if
(
gpu
->
sim
.
ccma_threads_per_block
<
gpu
->
sim
.
blocks
)
gpu
->
sim
.
ccma_threads_per_block
=
gpu
->
sim
.
blocks
;
}
extern
"C"
int
gpuAllocateInitialBuffers
(
gpuContext
gpu
)
{
gpu
->
sim
.
atoms
=
gpu
->
natoms
;
gpu
->
sim
.
paddedNumberOfAtoms
=
((
gpu
->
sim
.
atoms
+
GRID
-
1
)
>>
GRIDBITS
)
<<
GRIDBITS
;
gpu
->
sim
.
degreesOfFreedom
=
3
*
gpu
->
sim
.
atoms
-
6
;
gpu
->
gpAtomTable
=
NULL
;
gpu
->
gAtomTypes
=
0
;
gpu
->
psPosq4
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"Posq"
);
gpu
->
sim
.
stride
=
gpu
->
psPosq4
->
_stride
;
gpu
->
sim
.
stride2
=
gpu
->
sim
.
stride
*
2
;
gpu
->
sim
.
stride3
=
gpu
->
sim
.
stride
*
3
;
gpu
->
sim
.
stride4
=
gpu
->
sim
.
stride
*
4
;
gpu
->
sim
.
pPosq
=
gpu
->
psPosq4
->
_pDevStream
[
0
];
gpu
->
sim
.
stride
=
gpu
->
psPosq4
->
_stride
;
gpu
->
sim
.
stride2
=
2
*
gpu
->
sim
.
stride
;
gpu
->
sim
.
stride3
=
3
*
gpu
->
sim
.
stride
;
gpu
->
sim
.
stride4
=
4
*
gpu
->
sim
.
stride
;
gpu
->
psPosqP4
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"PosqP"
);
gpu
->
sim
.
pPosqP
=
gpu
->
psPosqP4
->
_pDevStream
[
0
];
gpu
->
psOldPosq4
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"OldPosq"
);
gpu
->
sim
.
pOldPosq
=
gpu
->
psOldPosq4
->
_pDevStream
[
0
];
gpu
->
psVelm4
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"Velm"
);
gpu
->
sim
.
pVelm4
=
gpu
->
psVelm4
->
_pDevStream
[
0
];
gpu
->
psBornRadii
=
new
CUDAStream
<
float
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"BornRadii"
);
gpu
->
sim
.
pBornRadii
=
gpu
->
psBornRadii
->
_pDevStream
[
0
];
gpu
->
psObcChain
=
new
CUDAStream
<
float
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"ObcChain"
);
gpu
->
sim
.
pObcChain
=
gpu
->
psObcChain
->
_pDevStream
[
0
];
gpu
->
psSigEps2
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"SigEps2"
);
gpu
->
sim
.
pAttr
=
gpu
->
psSigEps2
->
_pDevStream
[
0
];
gpu
->
psObcData
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"ObcData"
);
gpu
->
sim
.
pObcData
=
gpu
->
psObcData
->
_pDevStream
[
0
];
gpu
->
psGBVIData
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"GBVIData"
);
gpu
->
sim
.
pGBVIData
=
gpu
->
psGBVIData
->
_pDevStream
[
0
];
gpu
->
psStepSize
=
new
CUDAStream
<
float2
>
(
1
,
1
,
"StepSize"
);
gpu
->
sim
.
pStepSize
=
gpu
->
psStepSize
->
_pDevStream
[
0
];
(
*
gpu
->
psStepSize
)[
0
]
=
make_float2
(
0.0
f
,
0.0
f
);
gpu
->
psStepSize
->
Upload
();
gpu
->
psLangevinParameters
=
new
CUDAStream
<
float
>
(
3
,
1
,
"LangevinParameters"
);
gpu
->
sim
.
pLangevinParameters
=
gpu
->
psLangevinParameters
->
_pDevStream
[
0
];
gpu
->
pAtomSymbol
=
new
unsigned
char
[
gpu
->
natoms
];
gpu
->
psAtomIndex
=
new
CUDAStream
<
int
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"AtomIndex"
);
gpu
->
sim
.
pAtomIndex
=
gpu
->
psAtomIndex
->
_pDevStream
[
0
];
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
sim
.
paddedNumberOfAtoms
;
i
++
)
(
*
gpu
->
psAtomIndex
)[
i
]
=
i
;
gpu
->
psAtomIndex
->
Upload
();
gpu
->
posCellOffsets
.
resize
(
gpu
->
natoms
,
make_int3
(
0
,
0
,
0
));
gpu
->
sim
.
outputBuffers
=
0
;
// Determine randoms
gpu
->
seed
=
1
;
gpu
->
sim
.
randomFrames
=
20
;
gpu
->
sim
.
randomIterations
=
gpu
->
sim
.
randomFrames
;
gpu
->
sim
.
randoms
=
gpu
->
sim
.
randomFrames
*
gpu
->
sim
.
paddedNumberOfAtoms
;
gpu
->
sim
.
totalRandoms
=
gpu
->
sim
.
randoms
+
gpu
->
sim
.
paddedNumberOfAtoms
;
gpu
->
psRandom4
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
totalRandoms
,
1
,
"Random4"
);
gpu
->
psRandom2
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
totalRandoms
,
1
,
"Random2"
);
gpu
->
psRandomPosition
=
new
CUDAStream
<
int
>
(
gpu
->
sim
.
blocks
,
1
,
"RandomPosition"
);
gpu
->
psRandomSeed
=
new
CUDAStream
<
uint4
>
(
gpu
->
sim
.
blocks
*
gpu
->
sim
.
random_threads_per_block
,
1
,
"RandomSeed"
);
gpu
->
sim
.
pRandom4
=
gpu
->
psRandom4
->
_pDevStream
[
0
];
gpu
->
sim
.
pRandom2
=
gpu
->
psRandom2
->
_pDevStream
[
0
];
gpu
->
sim
.
pRandomPosition
=
gpu
->
psRandomPosition
->
_pDevStream
[
0
];
gpu
->
sim
.
pRandomSeed
=
gpu
->
psRandomSeed
->
_pDevStream
[
0
];
// Allocate and clear linear momentum buffer
gpu
->
psLinearMomentum
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
blocks
,
1
,
"LinearMomentum"
);
gpu
->
sim
.
pLinearMomentum
=
gpu
->
psLinearMomentum
->
_pDevStream
[
0
];
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
sim
.
blocks
;
i
++
)
{
(
*
gpu
->
psLinearMomentum
)[
i
].
x
=
0.0
f
;
(
*
gpu
->
psLinearMomentum
)[
i
].
y
=
0.0
f
;
(
*
gpu
->
psLinearMomentum
)[
i
].
z
=
0.0
f
;
(
*
gpu
->
psLinearMomentum
)[
i
].
w
=
0.0
f
;
}
gpu
->
psLinearMomentum
->
Upload
();
return
1
;
}
extern
"C"
void
gpuSetPositions
(
gpuContext
gpu
,
const
vector
<
float
>&
x
,
const
vector
<
float
>&
y
,
const
vector
<
float
>&
z
)
{
for
(
int
i
=
0
;
i
<
gpu
->
natoms
;
i
++
)
{
(
*
gpu
->
psPosq4
)[
i
].
x
=
x
[
i
];
(
*
gpu
->
psPosq4
)[
i
].
y
=
y
[
i
];
(
*
gpu
->
psPosq4
)[
i
].
z
=
z
[
i
];
}
gpu
->
psPosq4
->
Upload
();
// set flag to recalculate Born radii
gpu
->
bRecalculateBornRadii
=
true
;
}
extern
"C"
void
gpuSetVelocities
(
gpuContext
gpu
,
const
vector
<
float
>&
x
,
const
vector
<
float
>&
y
,
const
vector
<
float
>&
z
)
{
for
(
int
i
=
0
;
i
<
gpu
->
natoms
;
i
++
)
{
(
*
gpu
->
psVelm4
)[
i
].
x
=
x
[
i
];
(
*
gpu
->
psVelm4
)[
i
].
y
=
y
[
i
];
(
*
gpu
->
psVelm4
)[
i
].
z
=
z
[
i
];
}
gpu
->
psVelm4
->
Upload
();
}
extern
"C"
void
gpuSetMass
(
gpuContext
gpu
,
const
vector
<
float
>&
mass
)
{
float
totalMass
=
0.0
f
;
for
(
int
i
=
0
;
i
<
gpu
->
natoms
;
i
++
)
{
(
*
gpu
->
psVelm4
)[
i
].
w
=
1.0
f
/
mass
[
i
];
totalMass
+=
mass
[
i
];
}
gpu
->
sim
.
inverseTotalMass
=
1.0
f
/
totalMass
;
gpu
->
psVelm4
->
Upload
();
}
extern
"C"
void
gpuInitializeRandoms
(
gpuContext
gpu
)
{
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
sim
.
blocks
;
i
++
)
{
(
*
gpu
->
psRandomPosition
)[
i
]
=
0
;
}
int
seed
=
gpu
->
seed
|
((
gpu
->
seed
^
0xffffffff
)
<<
16
);
#if 0
srand(seed);
for (int i = 0; i < (int) (gpu->sim.blocks * gpu->sim.random_threads_per_block); i++)
{
(*gpu->psRandomSeed)[i].x = rand();
(*gpu->psRandomSeed)[i].y = rand();
(*gpu->psRandomSeed)[i].z = rand();
(*gpu->psRandomSeed)[i].w = rand();
}
#else
RNG
rng
(
seed
);
for
(
int
i
=
0
;
i
<
(
int
)
(
gpu
->
sim
.
blocks
*
gpu
->
sim
.
random_threads_per_block
);
i
++
)
{
(
*
gpu
->
psRandomSeed
)[
i
].
x
=
rng
.
rand_int
();
(
*
gpu
->
psRandomSeed
)[
i
].
y
=
rng
.
rand_int
();
(
*
gpu
->
psRandomSeed
)[
i
].
z
=
rng
.
rand_int
();
(
*
gpu
->
psRandomSeed
)[
i
].
w
=
rng
.
rand_int
();
}
#endif
gpu
->
psRandomPosition
->
Upload
();
gpu
->
psRandomSeed
->
Upload
();
gpuSetConstants
(
gpu
);
kGenerateRandoms
(
gpu
);
return
;
}
extern
"C"
bool
gpuIsAvailable
()
{
int
deviceCount
;
cudaGetDeviceCount
(
&
deviceCount
);
return
(
deviceCount
>
0
);
}
extern
"C"
void
*
gpuInit
(
int
numAtoms
,
unsigned
int
device
,
bool
useBlockingSync
)
{
gpuContext
gpu
=
new
_gpuContext
;
int
LRFSize
=
0
;
int
SMCount
=
0
;
int
SMMajor
=
0
;
int
SMMinor
=
0
;
// Select which device to use
int
currentDevice
;
cudaError_t
status
=
cudaGetDevice
(
&
currentDevice
);
RTERROR
(
status
,
"Error getting CUDA device"
)
if
(
device
!=
currentDevice
)
cudaSetDevice
(
device
);
// Ignore errors
status
=
cudaGetDevice
(
&
gpu
->
device
);
RTERROR
(
status
,
"Error getting CUDA device"
)
status
=
cudaSetDeviceFlags
(
cudaDeviceMapHost
+
(
useBlockingSync
?
cudaDeviceBlockingSync
:
cudaDeviceScheduleAuto
));
RTERROR
(
status
,
"Error setting device flags"
)
gpu
->
useBlockingSync
=
useBlockingSync
;
// Determine kernel call configuration
cudaDeviceProp
deviceProp
;
cudaGetDeviceProperties
(
&
deviceProp
,
currentDevice
);
// Determine SM version
if
(
deviceProp
.
major
==
1
)
{
switch
(
deviceProp
.
minor
)
{
case
0
:
case
1
:
gpu
->
sm_version
=
SM_10
;
gpu
->
sim
.
workUnitsPerSM
=
G8X_NONBOND_WORKUNITS_PER_SM
;
break
;
default:
gpu
->
sm_version
=
SM_12
;
gpu
->
sim
.
workUnitsPerSM
=
GT2XX_NONBOND_WORKUNITS_PER_SM
;
break
;
}
}
else
{
gpu
->
sm_version
=
SM_20
;
gpu
->
sim
.
workUnitsPerSM
=
GF1XX_NONBOND_WORKUNITS_PER_SM
;
}
if
(
deviceProp
.
regsPerBlock
==
8192
)
{
gpu
->
sim
.
nonbond_threads_per_block
=
G8X_NONBOND_THREADS_PER_BLOCK
;
gpu
->
sim
.
bornForce2_threads_per_block
=
G8X_BORNFORCE2_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_shake_threads_per_block
=
G8X_SHAKE_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_update_threads_per_block
=
G8X_UPDATE_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_localForces_threads_per_block
=
G8X_LOCALFORCES_THREADS_PER_BLOCK
;
gpu
->
sim
.
threads_per_block
=
G8X_THREADS_PER_BLOCK
;
gpu
->
sim
.
random_threads_per_block
=
G8X_RANDOM_THREADS_PER_BLOCK
;
gpu
->
blocksPerSM
=
G8X_BLOCKS_PER_SM
;
}
else
if
(
deviceProp
.
regsPerBlock
<=
16384
)
{
gpu
->
sim
.
nonbond_threads_per_block
=
GT2XX_NONBOND_THREADS_PER_BLOCK
;
gpu
->
sim
.
bornForce2_threads_per_block
=
GT2XX_BORNFORCE2_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_shake_threads_per_block
=
GT2XX_SHAKE_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_update_threads_per_block
=
GT2XX_UPDATE_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_localForces_threads_per_block
=
GT2XX_LOCALFORCES_THREADS_PER_BLOCK
;
gpu
->
sim
.
threads_per_block
=
GT2XX_THREADS_PER_BLOCK
;
gpu
->
sim
.
random_threads_per_block
=
GT2XX_RANDOM_THREADS_PER_BLOCK
;
gpu
->
blocksPerSM
=
GT2XX_BLOCKS_PER_SM
;
}
else
{
gpu
->
sim
.
nonbond_threads_per_block
=
GF1XX_NONBOND_THREADS_PER_BLOCK
;
gpu
->
sim
.
bornForce2_threads_per_block
=
GF1XX_BORNFORCE2_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_shake_threads_per_block
=
GF1XX_SHAKE_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_update_threads_per_block
=
GF1XX_UPDATE_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_localForces_threads_per_block
=
GF1XX_LOCALFORCES_THREADS_PER_BLOCK
;
gpu
->
sim
.
threads_per_block
=
GF1XX_THREADS_PER_BLOCK
;
gpu
->
sim
.
random_threads_per_block
=
GF1XX_RANDOM_THREADS_PER_BLOCK
;
gpu
->
blocksPerSM
=
GF1XX_BLOCKS_PER_SM
;
}
gpu
->
sim
.
nonbond_blocks
=
deviceProp
.
multiProcessorCount
*
gpu
->
blocksPerSM
;
gpu
->
sim
.
bornForce2_blocks
=
deviceProp
.
multiProcessorCount
*
gpu
->
blocksPerSM
;
gpu
->
sim
.
blocks
=
deviceProp
.
multiProcessorCount
;
gpu
->
sharedMemoryPerBlock
=
deviceProp
.
sharedMemPerBlock
;
gpu
->
sim
.
shake_threads_per_block
=
gpu
->
sim
.
max_shake_threads_per_block
;
gpu
->
sim
.
localForces_threads_per_block
=
gpu
->
sim
.
max_localForces_threads_per_block
;
gpu
->
natoms
=
numAtoms
;
gpuAllocateInitialBuffers
(
gpu
);
gpu
->
iterations
=
0
;
gpu
->
sim
.
update_threads_per_block
=
(
gpu
->
natoms
+
gpu
->
sim
.
blocks
-
1
)
/
gpu
->
sim
.
blocks
;
if
(
gpu
->
sim
.
update_threads_per_block
>
gpu
->
sim
.
max_update_threads_per_block
)
gpu
->
sim
.
update_threads_per_block
=
gpu
->
sim
.
max_update_threads_per_block
;
if
(
gpu
->
sim
.
update_threads_per_block
<
gpu
->
psLangevinParameters
->
_length
)
gpu
->
sim
.
update_threads_per_block
=
gpu
->
psLangevinParameters
->
_length
;
gpu
->
sim
.
bf_reduce_threads_per_block
=
gpu
->
sim
.
update_threads_per_block
;
gpu
->
sim
.
bsf_reduce_threads_per_block
=
(
gpu
->
sim
.
stride4
+
gpu
->
natoms
+
gpu
->
sim
.
blocks
-
1
)
/
gpu
->
sim
.
blocks
;
gpu
->
sim
.
bsf_reduce_threads_per_block
=
((
gpu
->
sim
.
bsf_reduce_threads_per_block
+
(
GRID
-
1
))
/
GRID
)
*
GRID
;
if
(
gpu
->
sim
.
bsf_reduce_threads_per_block
>
gpu
->
sim
.
threads_per_block
)
gpu
->
sim
.
bsf_reduce_threads_per_block
=
gpu
->
sim
.
threads_per_block
;
if
(
gpu
->
sim
.
bsf_reduce_threads_per_block
<
1
)
gpu
->
sim
.
bsf_reduce_threads_per_block
=
1
;
// Initialize constants to reasonable values
gpu
->
sim
.
probeRadius
=
probeRadius
;
gpu
->
sim
.
surfaceAreaFactor
=
surfaceAreaFactor
;
gpu
->
sim
.
electricConstant
=
electricConstant
;
gpu
->
sim
.
nonbondedMethod
=
NO_CUTOFF
;
gpu
->
sim
.
nonbondedCutoff
=
0.0
f
;
gpu
->
sim
.
nonbondedCutoffSqr
=
0.0
f
;
gpu
->
sim
.
bigFloat
=
99999999.0
f
;
gpu
->
sim
.
forceConversionFactor
=
forceConversionFactor
;
gpu
->
sim
.
preFactor
=
2.0
f
*
electricConstant
*
((
1.0
f
/
defaultInnerDielectric
)
-
(
1.0
f
/
defaultSolventDielectric
))
*
gpu
->
sim
.
forceConversionFactor
;
gpu
->
sim
.
dielectricOffset
=
dielectricOffset
;
gpu
->
sim
.
alphaOBC
=
alphaOBC
;
gpu
->
sim
.
betaOBC
=
betaOBC
;
gpu
->
sim
.
gammaOBC
=
gammaOBC
;
gpu
->
sim
.
maxShakeIterations
=
15
;
gpu
->
sim
.
shakeTolerance
=
1.0e-04
f
*
2.0
f
;
gpu
->
sim
.
InvMassJ
=
9.920635e-001
f
;
gpu
->
grid
=
GRID
;
gpu
->
bCalculateCM
=
false
;
gpu
->
bRemoveCM
=
false
;
gpu
->
bRecalculateBornRadii
=
true
;
gpu
->
bIncludeGBSA
=
false
;
gpu
->
bIncludeGBVI
=
false
;
gpuInitializeRandoms
(
gpu
);
// To be determined later
gpu
->
psLJ14ID
=
NULL
;
gpu
->
psForce4
=
NULL
;
gpu
->
psEnergy
=
NULL
;
gpu
->
sim
.
pForce4
=
NULL
;
gpu
->
psBornForce
=
NULL
;
gpu
->
sim
.
pBornForce
=
NULL
;
gpu
->
psBornSum
=
NULL
;
gpu
->
sim
.
pBornSum
=
NULL
;
gpu
->
psBondID
=
NULL
;
gpu
->
psBondParameter
=
NULL
;
gpu
->
psBondAngleID1
=
NULL
;
gpu
->
psBondAngleID2
=
NULL
;
gpu
->
psBondAngleParameter
=
NULL
;
gpu
->
psDihedralID1
=
NULL
;
gpu
->
psDihedralID2
=
NULL
;
gpu
->
psDihedralParameter
=
NULL
;
gpu
->
psRbDihedralID1
=
NULL
;
gpu
->
psRbDihedralID2
=
NULL
;
gpu
->
psRbDihedralParameter1
=
NULL
;
gpu
->
psRbDihedralParameter2
=
NULL
;
gpu
->
psLJ14ID
=
NULL
;
gpu
->
psLJ14Parameter
=
NULL
;
gpu
->
psCustomParams
=
NULL
;
gpu
->
psCustomBondID
=
NULL
;
gpu
->
psCustomBondParams
=
NULL
;
gpu
->
psCustomAngleID1
=
NULL
;
gpu
->
psCustomAngleID2
=
NULL
;
gpu
->
psCustomAngleParams
=
NULL
;
gpu
->
psCustomTorsionID1
=
NULL
;
gpu
->
psCustomTorsionID2
=
NULL
;
gpu
->
psCustomTorsionParams
=
NULL
;
gpu
->
psCustomExternalID
=
NULL
;
gpu
->
psCustomExternalParams
=
NULL
;
gpu
->
psEwaldCosSinSum
=
NULL
;
gpu
->
psTabulatedErfc
=
NULL
;
gpu
->
psPmeGrid
=
NULL
;
gpu
->
psPmeBsplineModuli
[
0
]
=
NULL
;
gpu
->
psPmeBsplineModuli
[
1
]
=
NULL
;
gpu
->
psPmeBsplineModuli
[
2
]
=
NULL
;
gpu
->
psPmeBsplineTheta
=
NULL
;
gpu
->
psPmeBsplineDtheta
=
NULL
;
gpu
->
psPmeAtomRange
=
NULL
;
gpu
->
psPmeAtomGridIndex
=
NULL
;
gpu
->
psShakeID
=
NULL
;
gpu
->
psShakeParameter
=
NULL
;
gpu
->
psSettleID
=
NULL
;
gpu
->
psSettleParameter
=
NULL
;
gpu
->
psExclusion
=
NULL
;
gpu
->
psExclusionIndex
=
NULL
;
gpu
->
psWorkUnit
=
NULL
;
gpu
->
psInteractingWorkUnit
=
NULL
;
gpu
->
psInteractionFlag
=
NULL
;
gpu
->
psInteractionCount
=
NULL
;
gpu
->
psGridBoundingBox
=
NULL
;
gpu
->
psGridCenter
=
NULL
;
gpu
->
psCcmaAtoms
=
NULL
;
gpu
->
psCcmaDistance
=
NULL
;
gpu
->
psCcmaAtomConstraints
=
NULL
;
gpu
->
psCcmaNumAtomConstraints
=
NULL
;
gpu
->
psCcmaDelta1
=
NULL
;
gpu
->
psCcmaDelta2
=
NULL
;
gpu
->
psCcmaReducedMass
=
NULL
;
gpu
->
psConstraintMatrixColumn
=
NULL
;
gpu
->
psConstraintMatrixValue
=
NULL
;
gpu
->
psTabulatedFunctionParams
=
NULL
;
for
(
int
i
=
0
;
i
<
MAX_TABULATED_FUNCTIONS
;
i
++
)
gpu
->
tabulatedFunctions
[
i
].
coefficients
=
NULL
;
gpu
->
sim
.
customExpressionStackSize
=
0
;
gpu
->
sim
.
customBonds
=
0
;
gpu
->
sim
.
customAngles
=
0
;
gpu
->
sim
.
customTorsions
=
0
;
// Initialize output buffer before reading parameters
gpu
->
pOutputBufferCounter
=
new
unsigned
int
[
gpu
->
sim
.
paddedNumberOfAtoms
];
memset
(
gpu
->
pOutputBufferCounter
,
0
,
gpu
->
sim
.
paddedNumberOfAtoms
*
sizeof
(
unsigned
int
));
return
(
void
*
)
gpu
;
}
extern
"C"
void
gpuSetLangevinIntegrationParameters
(
gpuContext
gpu
,
float
tau
,
float
deltaT
,
float
temperature
,
float
errorTol
)
{
gpu
->
sim
.
deltaT
=
deltaT
;
gpu
->
sim
.
oneOverDeltaT
=
1.0
f
/
deltaT
;
gpu
->
sim
.
errorTol
=
errorTol
;
gpu
->
sim
.
tau
=
tau
;
gpu
->
sim
.
T
=
temperature
;
gpu
->
sim
.
kT
=
BOLTZ
*
gpu
->
sim
.
T
;
double
vscale
=
exp
(
-
deltaT
/
tau
);
double
fscale
=
(
1
-
vscale
)
*
tau
;
double
noisescale
=
sqrt
(
2
*
gpu
->
sim
.
kT
/
tau
)
*
sqrt
(
0.5
*
(
1
-
vscale
*
vscale
)
*
tau
);
(
*
gpu
->
psLangevinParameters
)[
0
]
=
(
float
)
vscale
;
(
*
gpu
->
psLangevinParameters
)[
1
]
=
(
float
)
fscale
;
(
*
gpu
->
psLangevinParameters
)[
2
]
=
(
float
)
noisescale
;
gpu
->
psLangevinParameters
->
Upload
();
gpu
->
psStepSize
->
Download
();
if
((
*
gpu
->
psStepSize
)[
0
].
x
==
0
)
(
*
gpu
->
psStepSize
)[
0
].
x
=
deltaT
;
(
*
gpu
->
psStepSize
)[
0
].
y
=
deltaT
;
gpu
->
psStepSize
->
Upload
();
}
extern
"C"
void
gpuSetVerletIntegrationParameters
(
gpuContext
gpu
,
float
deltaT
,
float
errorTol
)
{
gpu
->
sim
.
deltaT
=
deltaT
;
gpu
->
sim
.
oneOverDeltaT
=
1.0
f
/
deltaT
;
gpu
->
sim
.
errorTol
=
errorTol
;
gpu
->
psStepSize
->
Download
();
if
((
*
gpu
->
psStepSize
)[
0
].
x
==
0
)
(
*
gpu
->
psStepSize
)[
0
].
x
=
deltaT
;
(
*
gpu
->
psStepSize
)[
0
].
y
=
deltaT
;
gpu
->
psStepSize
->
Upload
();
}
extern
"C"
void
gpuSetBrownianIntegrationParameters
(
gpuContext
gpu
,
float
tau
,
float
deltaT
,
float
temperature
)
{
gpu
->
sim
.
deltaT
=
deltaT
;
gpu
->
sim
.
oneOverDeltaT
=
1.0
f
/
deltaT
;
gpu
->
sim
.
tau
=
tau
;
gpu
->
sim
.
tauDeltaT
=
gpu
->
sim
.
deltaT
*
gpu
->
sim
.
tau
;
gpu
->
sim
.
T
=
temperature
;
gpu
->
sim
.
kT
=
BOLTZ
*
gpu
->
sim
.
T
;
gpu
->
sim
.
noiseAmplitude
=
sqrt
(
2.0
f
*
gpu
->
sim
.
kT
*
deltaT
*
tau
);
gpu
->
psStepSize
->
Download
();
if
((
*
gpu
->
psStepSize
)[
0
].
x
==
0
)
(
*
gpu
->
psStepSize
)[
0
].
x
=
deltaT
;
(
*
gpu
->
psStepSize
)[
0
].
y
=
deltaT
;
gpu
->
psStepSize
->
Upload
();
}
extern
"C"
void
gpuSetAndersenThermostatParameters
(
gpuContext
gpu
,
float
temperature
,
float
collisionFrequency
)
{
gpu
->
sim
.
T
=
temperature
;
gpu
->
sim
.
kT
=
BOLTZ
*
gpu
->
sim
.
T
;
gpu
->
sim
.
collisionFrequency
=
collisionFrequency
;
}
extern
"C"
void
gpuShutDown
(
gpuContext
gpu
)
{
// Delete sysmem pointers
delete
[]
gpu
->
pOutputBufferCounter
;
delete
[]
gpu
->
gpAtomTable
;
delete
[]
gpu
->
pAtomSymbol
;
// Delete device pointers
delete
gpu
->
psPosq4
;
delete
gpu
->
psPosqP4
;
delete
gpu
->
psOldPosq4
;
delete
gpu
->
psVelm4
;
delete
gpu
->
psForce4
;
delete
gpu
->
psEnergy
;
delete
gpu
->
psSigEps2
;
if
(
gpu
->
psCustomParams
!=
NULL
)
delete
gpu
->
psCustomParams
;
if
(
gpu
->
psCustomBondParams
!=
NULL
)
{
delete
gpu
->
psCustomBondID
;
delete
gpu
->
psCustomBondParams
;
}
if
(
gpu
->
psCustomAngleParams
!=
NULL
)
{
delete
gpu
->
psCustomAngleID1
;
delete
gpu
->
psCustomAngleID2
;
delete
gpu
->
psCustomAngleParams
;
}
if
(
gpu
->
psCustomTorsionParams
!=
NULL
)
{
delete
gpu
->
psCustomTorsionID1
;
delete
gpu
->
psCustomTorsionID2
;
delete
gpu
->
psCustomTorsionParams
;
}
if
(
gpu
->
psCustomExternalParams
!=
NULL
)
{
delete
gpu
->
psCustomExternalID
;
delete
gpu
->
psCustomExternalParams
;
}
if
(
gpu
->
psEwaldCosSinSum
!=
NULL
)
delete
gpu
->
psEwaldCosSinSum
;
if
(
gpu
->
psPmeGrid
!=
NULL
)
{
delete
gpu
->
psPmeGrid
;
delete
gpu
->
psPmeBsplineModuli
[
0
];
delete
gpu
->
psPmeBsplineModuli
[
1
];
delete
gpu
->
psPmeBsplineModuli
[
2
];
delete
gpu
->
psPmeBsplineTheta
;
delete
gpu
->
psPmeBsplineDtheta
;
delete
gpu
->
psPmeAtomRange
;
delete
gpu
->
psPmeAtomGridIndex
;
cufftDestroy
(
gpu
->
fftplan
);
}
if
(
gpu
->
psTabulatedErfc
!=
NULL
)
delete
gpu
->
psTabulatedErfc
;
delete
gpu
->
psObcData
;
delete
gpu
->
psGBVIData
;
delete
gpu
->
psObcChain
;
delete
gpu
->
psBornForce
;
delete
gpu
->
psBornRadii
;
delete
gpu
->
psBornSum
;
delete
gpu
->
psBondID
;
delete
gpu
->
psBondParameter
;
delete
gpu
->
psBondAngleID1
;
delete
gpu
->
psBondAngleID2
;
delete
gpu
->
psBondAngleParameter
;
delete
gpu
->
psDihedralID1
;
delete
gpu
->
psDihedralID2
;
delete
gpu
->
psDihedralParameter
;
delete
gpu
->
psRbDihedralID1
;
delete
gpu
->
psRbDihedralID2
;
delete
gpu
->
psRbDihedralParameter1
;
delete
gpu
->
psRbDihedralParameter2
;
delete
gpu
->
psLJ14ID
;
delete
gpu
->
psLJ14Parameter
;
delete
gpu
->
psShakeID
;
delete
gpu
->
psShakeParameter
;
delete
gpu
->
psSettleID
;
delete
gpu
->
psSettleParameter
;
delete
gpu
->
psExclusion
;
delete
gpu
->
psExclusionIndex
;
delete
gpu
->
psWorkUnit
;
delete
gpu
->
psInteractingWorkUnit
;
delete
gpu
->
psInteractionFlag
;
delete
gpu
->
psInteractionCount
;
delete
gpu
->
psStepSize
;
delete
gpu
->
psLangevinParameters
;
delete
gpu
->
psRandom4
;
delete
gpu
->
psRandom2
;
delete
gpu
->
psRandomPosition
;
delete
gpu
->
psRandomSeed
;
delete
gpu
->
psLinearMomentum
;
delete
gpu
->
psAtomIndex
;
delete
gpu
->
psGridBoundingBox
;
delete
gpu
->
psGridCenter
;
delete
gpu
->
psCcmaAtoms
;
delete
gpu
->
psCcmaDistance
;
delete
gpu
->
psCcmaAtomConstraints
;
delete
gpu
->
psCcmaNumAtomConstraints
;
delete
gpu
->
psCcmaDelta1
;
delete
gpu
->
psCcmaDelta2
;
delete
gpu
->
psCcmaReducedMass
;
cudaEventDestroy
(
gpu
->
ccmaEvent
);
delete
gpu
->
psConstraintMatrixColumn
;
delete
gpu
->
psConstraintMatrixValue
;
delete
gpu
->
psTabulatedFunctionParams
;
for
(
int
i
=
0
;
i
<
MAX_TABULATED_FUNCTIONS
;
i
++
)
if
(
gpu
->
tabulatedFunctions
[
i
].
coefficients
!=
NULL
)
delete
gpu
->
tabulatedFunctions
[
i
].
coefficients
;
if
(
gpu
->
compactPlan
.
valid
)
destroyCompactionPlan
(
gpu
->
compactPlan
);
// Wrap up
delete
gpu
;
cudaThreadExit
();
return
;
}
extern
"C"
int
gpuBuildOutputBuffers
(
gpuContext
gpu
)
{
// Select the number of output buffer to use.
gpu
->
bOutputBufferPerWarp
=
true
;
gpu
->
sim
.
nonbondOutputBuffers
=
gpu
->
sim
.
nonbond_blocks
*
gpu
->
sim
.
nonbond_threads_per_block
/
GRID
;
if
(
gpu
->
sim
.
nonbondOutputBuffers
>=
gpu
->
sim
.
paddedNumberOfAtoms
/
GRID
)
{
// For small systems, it is more efficient to have one output buffer per block of 32 atoms instead of one per warp.
gpu
->
bOutputBufferPerWarp
=
false
;
gpu
->
sim
.
nonbondOutputBuffers
=
gpu
->
sim
.
paddedNumberOfAtoms
/
GRID
;
}
if
(
gpu
->
sim
.
nonbondOutputBuffers
>
gpu
->
sim
.
outputBuffers
)
gpu
->
sim
.
outputBuffers
=
gpu
->
sim
.
nonbondOutputBuffers
;
unsigned
int
outputBuffers
=
gpu
->
sim
.
outputBuffers
;
for
(
unsigned
int
i
=
0
;
i
<
gpu
->
sim
.
paddedNumberOfAtoms
;
i
++
)
{
if
(
outputBuffers
<
gpu
->
pOutputBufferCounter
[
i
])
{
outputBuffers
=
gpu
->
pOutputBufferCounter
[
i
];
}
}
gpu
->
sim
.
outputBuffers
=
outputBuffers
;
gpu
->
sim
.
energyOutputBuffers
=
max
(
gpu
->
sim
.
nonbond_threads_per_block
,
gpu
->
sim
.
localForces_threads_per_block
)
*
gpu
->
sim
.
blocks
;
gpu
->
psForce4
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
outputBuffers
,
"Force"
);
gpu
->
psEnergy
=
new
CUDAStream
<
float
>
(
gpu
->
sim
.
energyOutputBuffers
,
1
,
"Energy"
);
gpu
->
psBornForce
=
new
CUDAStream
<
float
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
gpu
->
sim
.
nonbondOutputBuffers
,
"BornForce"
);
gpu
->
psBornSum
=
new
CUDAStream
<
float
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
gpu
->
sim
.
nonbondOutputBuffers
,
"BornSum"
);
gpu
->
sim
.
pForce4
=
gpu
->
psForce4
->
_pDevStream
[
0
];
gpu
->
sim
.
pEnergy
=
gpu
->
psEnergy
->
_pDevStream
[
0
];
gpu
->
sim
.
pBornForce
=
gpu
->
psBornForce
->
_pDevStream
[
0
];
gpu
->
sim
.
pBornSum
=
gpu
->
psBornSum
->
_pDevStream
[
0
];
// Determine local energy paramter offsets for bonded interactions
gpu
->
sim
.
bond_offset
=
gpu
->
psBondParameter
->
_stride
;
gpu
->
sim
.
bond_angle_offset
=
gpu
->
sim
.
bond_offset
+
gpu
->
psBondAngleParameter
->
_stride
;
gpu
->
sim
.
dihedral_offset
=
gpu
->
sim
.
bond_angle_offset
+
gpu
->
psDihedralParameter
->
_stride
;
gpu
->
sim
.
rb_dihedral_offset
=
gpu
->
sim
.
dihedral_offset
+
gpu
->
psRbDihedralParameter1
->
_stride
;
gpu
->
sim
.
LJ14_offset
=
gpu
->
sim
.
rb_dihedral_offset
+
gpu
->
psLJ14Parameter
->
_stride
;
gpu
->
sim
.
localForces_threads_per_block
=
(
max
(
gpu
->
sim
.
LJ14_offset
,
gpu
->
sim
.
customBonds
)
/
gpu
->
sim
.
blocks
+
15
)
&
0xfffffff0
;
if
(
gpu
->
sim
.
localForces_threads_per_block
>
gpu
->
sim
.
max_localForces_threads_per_block
)
gpu
->
sim
.
localForces_threads_per_block
=
gpu
->
sim
.
max_localForces_threads_per_block
;
if
(
gpu
->
sim
.
localForces_threads_per_block
<
1
)
gpu
->
sim
.
localForces_threads_per_block
=
1
;
// Flip local force output buffers
int
flip
=
outputBuffers
-
1
;
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
sim
.
bonds
;
i
++
)
{
(
*
gpu
->
psBondID
)[
i
].
z
=
flip
-
(
*
gpu
->
psBondID
)[
i
].
z
;
(
*
gpu
->
psBondID
)[
i
].
w
=
flip
-
(
*
gpu
->
psBondID
)[
i
].
w
;
}
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
sim
.
bond_angles
;
i
++
)
{
(
*
gpu
->
psBondAngleID1
)[
i
].
w
=
flip
-
(
*
gpu
->
psBondAngleID1
)[
i
].
w
;
(
*
gpu
->
psBondAngleID2
)[
i
].
x
=
flip
-
(
*
gpu
->
psBondAngleID2
)[
i
].
x
;
(
*
gpu
->
psBondAngleID2
)[
i
].
y
=
flip
-
(
*
gpu
->
psBondAngleID2
)[
i
].
y
;
}
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
sim
.
dihedrals
;
i
++
)
{
(
*
gpu
->
psDihedralID2
)[
i
].
x
=
flip
-
(
*
gpu
->
psDihedralID2
)[
i
].
x
;
(
*
gpu
->
psDihedralID2
)[
i
].
y
=
flip
-
(
*
gpu
->
psDihedralID2
)[
i
].
y
;
(
*
gpu
->
psDihedralID2
)[
i
].
z
=
flip
-
(
*
gpu
->
psDihedralID2
)[
i
].
z
;
(
*
gpu
->
psDihedralID2
)[
i
].
w
=
flip
-
(
*
gpu
->
psDihedralID2
)[
i
].
w
;
}
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
sim
.
rb_dihedrals
;
i
++
)
{
(
*
gpu
->
psRbDihedralID2
)[
i
].
x
=
flip
-
(
*
gpu
->
psRbDihedralID2
)[
i
].
x
;
(
*
gpu
->
psRbDihedralID2
)[
i
].
y
=
flip
-
(
*
gpu
->
psRbDihedralID2
)[
i
].
y
;
(
*
gpu
->
psRbDihedralID2
)[
i
].
z
=
flip
-
(
*
gpu
->
psRbDihedralID2
)[
i
].
z
;
(
*
gpu
->
psRbDihedralID2
)[
i
].
w
=
flip
-
(
*
gpu
->
psRbDihedralID2
)[
i
].
w
;
}
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
sim
.
LJ14s
;
i
++
)
{
(
*
gpu
->
psLJ14ID
)[
i
].
z
=
flip
-
(
*
gpu
->
psLJ14ID
)[
i
].
z
;
(
*
gpu
->
psLJ14ID
)[
i
].
w
=
flip
-
(
*
gpu
->
psLJ14ID
)[
i
].
w
;
}
gpu
->
psBondID
->
Upload
();
gpu
->
psBondAngleID1
->
Upload
();
gpu
->
psBondAngleID2
->
Upload
();
gpu
->
psDihedralID2
->
Upload
();
gpu
->
psRbDihedralID2
->
Upload
();
gpu
->
psLJ14ID
->
Upload
();
return
1
;
}
extern
"C"
int
gpuBuildThreadBlockWorkList
(
gpuContext
gpu
)
{
const
unsigned
int
atoms
=
gpu
->
sim
.
paddedNumberOfAtoms
;
const
unsigned
int
grid
=
gpu
->
grid
;
const
unsigned
int
dim
=
(
atoms
+
(
grid
-
1
))
/
grid
;
const
unsigned
int
cells
=
dim
*
(
dim
+
1
)
/
2
;
CUDAStream
<
unsigned
int
>*
psWorkUnit
=
new
CUDAStream
<
unsigned
int
>
(
cells
,
1u
,
"WorkUnit"
);
unsigned
int
*
pWorkList
=
psWorkUnit
->
_pSysData
;
gpu
->
psWorkUnit
=
psWorkUnit
;
gpu
->
sim
.
pWorkUnit
=
psWorkUnit
->
_pDevStream
[
0
];
CUDAStream
<
unsigned
int
>*
psInteractingWorkUnit
=
new
CUDAStream
<
unsigned
int
>
(
cells
,
1u
,
"InteractingWorkUnit"
);
gpu
->
psInteractingWorkUnit
=
psInteractingWorkUnit
;
gpu
->
sim
.
pInteractingWorkUnit
=
psInteractingWorkUnit
->
_pDevStream
[
0
];
CUDAStream
<
unsigned
int
>*
psInteractionFlag
=
new
CUDAStream
<
unsigned
int
>
(
cells
,
1u
,
"InteractionFlag"
);
gpu
->
psInteractionFlag
=
psInteractionFlag
;
gpu
->
sim
.
pInteractionFlag
=
psInteractionFlag
->
_pDevStream
[
0
];
CUDAStream
<
size_t
>*
psInteractionCount
=
new
CUDAStream
<
size_t
>
(
1
,
1u
,
"InteractionCount"
);
gpu
->
psInteractionCount
=
psInteractionCount
;
gpu
->
sim
.
pInteractionCount
=
psInteractionCount
->
_pDevStream
[
0
];
CUDAStream
<
float4
>*
psGridBoundingBox
=
new
CUDAStream
<
float4
>
(
dim
,
1u
,
"GridBoundingBox"
);
gpu
->
psGridBoundingBox
=
psGridBoundingBox
;
gpu
->
sim
.
pGridBoundingBox
=
psGridBoundingBox
->
_pDevStream
[
0
];
CUDAStream
<
float4
>*
psGridCenter
=
new
CUDAStream
<
float4
>
(
dim
,
1u
,
"GridCenter"
);
gpu
->
psGridCenter
=
psGridCenter
;
gpu
->
sim
.
pGridCenter
=
psGridCenter
->
_pDevStream
[
0
];
gpu
->
sim
.
nonbond_workBlock
=
gpu
->
sim
.
nonbond_threads_per_block
/
GRID
;
gpu
->
sim
.
bornForce2_workBlock
=
gpu
->
sim
.
bornForce2_threads_per_block
/
GRID
;
gpu
->
sim
.
workUnits
=
cells
;
// Initialize the plan for doing stream compaction.
planCompaction
(
gpu
->
compactPlan
);
// Increase block count if necessary for extra large molecules that would
// otherwise overflow the SM workunit buffers
// int minimumBlocks = (cells + gpu->sim.workUnitsPerSM - 1) / gpu->sim.workUnitsPerSM;
// if ((int) gpu->sim.nonbond_blocks < minimumBlocks)
// {
// gpu->sim.nonbond_blocks = gpu->sim.nonbond_blocks * ((minimumBlocks + gpu->sim.nonbond_blocks - 1) / gpu->sim.nonbond_blocks);
// }
// if ((int) gpu->sim.bornForce2_blocks < minimumBlocks)
// {
// gpu->sim.bornForce2_blocks = gpu->sim.bornForce2_blocks * ((minimumBlocks + gpu->sim.bornForce2_blocks - 1) / gpu->sim.bornForce2_blocks);
// }
gpu
->
sim
.
nbWorkUnitsPerBlock
=
cells
/
gpu
->
sim
.
nonbond_blocks
;
gpu
->
sim
.
nbWorkUnitsPerBlockRemainder
=
cells
-
gpu
->
sim
.
nonbond_blocks
*
gpu
->
sim
.
nbWorkUnitsPerBlock
;
gpu
->
sim
.
bf2WorkUnitsPerBlock
=
cells
/
gpu
->
sim
.
bornForce2_blocks
;
gpu
->
sim
.
bf2WorkUnitsPerBlockRemainder
=
cells
-
gpu
->
sim
.
bornForce2_blocks
*
gpu
->
sim
.
bf2WorkUnitsPerBlock
;
gpu
->
sim
.
interaction_threads_per_block
=
64
;
gpu
->
sim
.
interaction_blocks
=
(
gpu
->
sim
.
workUnits
+
gpu
->
sim
.
interaction_threads_per_block
-
1
)
/
gpu
->
sim
.
interaction_threads_per_block
;
if
(
gpu
->
sim
.
interaction_blocks
>
8
*
gpu
->
sim
.
blocks
)
gpu
->
sim
.
interaction_blocks
=
8
*
gpu
->
sim
.
blocks
;
// Decrease thread count for extra small molecules to spread computation
// across entire chip
int
activeWorkUnits
=
gpu
->
sim
.
nonbond_blocks
*
gpu
->
sim
.
nonbond_workBlock
;
if
(
activeWorkUnits
>
(
int
)
cells
)
{
int
balancedWorkBlock
=
(
cells
+
gpu
->
sim
.
nonbond_blocks
-
1
)
/
gpu
->
sim
.
nonbond_blocks
;
gpu
->
sim
.
nonbond_threads_per_block
=
balancedWorkBlock
*
GRID
;
gpu
->
sim
.
nonbond_workBlock
=
balancedWorkBlock
;
}
activeWorkUnits
=
gpu
->
sim
.
bornForce2_blocks
*
gpu
->
sim
.
bornForce2_workBlock
;
if
(
activeWorkUnits
>
(
int
)
cells
)
{
int
balancedWorkBlock
=
(
cells
+
gpu
->
sim
.
bornForce2_blocks
-
1
)
/
gpu
->
sim
.
bornForce2_blocks
;
gpu
->
sim
.
bornForce2_threads_per_block
=
balancedWorkBlock
*
GRID
;
gpu
->
sim
.
bornForce2_workBlock
=
balancedWorkBlock
;
}
unsigned
int
count
=
0
;
for
(
unsigned
int
y
=
0
;
y
<
dim
;
y
++
)
{
for
(
unsigned
int
x
=
y
;
x
<
dim
;
x
++
)
{
pWorkList
[
count
]
=
(
x
<<
17
)
|
(
y
<<
2
);
count
++
;
}
}
(
*
gpu
->
psInteractionCount
)[
0
]
=
gpu
->
sim
.
workUnits
;
gpu
->
psInteractionCount
->
Upload
();
psWorkUnit
->
Upload
();
gpuSetConstants
(
gpu
);
return
cells
;
}
extern
"C"
void
gpuBuildExclusionList
(
gpuContext
gpu
)
{
const
unsigned
int
atoms
=
gpu
->
sim
.
paddedNumberOfAtoms
;
const
unsigned
int
grid
=
gpu
->
grid
;
const
unsigned
int
dim
=
atoms
/
grid
;
unsigned
int
*
pWorkList
=
gpu
->
psWorkUnit
->
_pSysData
;
// Mark which work units have exclusions.
for
(
int
atom1
=
0
;
atom1
<
(
int
)
gpu
->
exclusions
.
size
();
++
atom1
)
{
int
x
=
atom1
/
grid
;
for
(
int
j
=
0
;
j
<
(
int
)
gpu
->
exclusions
[
atom1
].
size
();
++
j
)
{
int
atom2
=
gpu
->
exclusions
[
atom1
][
j
];
int
y
=
atom2
/
grid
;
int
cell
=
(
x
>
y
?
x
+
y
*
dim
-
y
*
(
y
+
1
)
/
2
:
y
+
x
*
dim
-
x
*
(
x
+
1
)
/
2
);
pWorkList
[
cell
]
|=
1
;
}
}
if
((
int
)
gpu
->
sim
.
paddedNumberOfAtoms
>
gpu
->
natoms
)
{
int
lastBlock
=
gpu
->
natoms
/
grid
;
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
sim
.
workUnits
;
++
i
)
{
int
x
=
pWorkList
[
i
]
>>
17
;
int
y
=
(
pWorkList
[
i
]
>>
2
)
&
0x7FFF
;
if
(
x
==
lastBlock
||
y
==
lastBlock
)
pWorkList
[
i
]
|=
1
;
}
}
// Build a list of indexes for the work units with exclusions.
CUDAStream
<
unsigned
int
>*
psExclusionIndex
=
new
CUDAStream
<
unsigned
int
>
(
gpu
->
sim
.
workUnits
,
1u
,
"ExclusionIndex"
);
gpu
->
psExclusionIndex
=
psExclusionIndex
;
unsigned
int
*
pExclusionIndex
=
psExclusionIndex
->
_pSysData
;
gpu
->
sim
.
pExclusionIndex
=
psExclusionIndex
->
_pDevData
;
int
numWithExclusions
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
psExclusionIndex
->
_length
;
++
i
)
if
((
pWorkList
[
i
]
&
1
)
==
1
)
pExclusionIndex
[
i
]
=
(
numWithExclusions
++
)
*
grid
;
// Record the exclusion data.
CUDAStream
<
unsigned
int
>*
psExclusion
=
new
CUDAStream
<
unsigned
int
>
(
numWithExclusions
*
grid
,
1u
,
"Exclusion"
);
gpu
->
psExclusion
=
psExclusion
;
unsigned
int
*
pExclusion
=
psExclusion
->
_pSysData
;
gpu
->
sim
.
pExclusion
=
psExclusion
->
_pDevData
;
for
(
int
i
=
0
;
i
<
(
int
)
psExclusion
->
_length
;
++
i
)
pExclusion
[
i
]
=
0xFFFFFFFF
;
for
(
int
atom1
=
0
;
atom1
<
(
int
)
gpu
->
exclusions
.
size
();
++
atom1
)
{
int
x
=
atom1
/
grid
;
int
offset1
=
atom1
-
x
*
grid
;
for
(
int
j
=
0
;
j
<
(
int
)
gpu
->
exclusions
[
atom1
].
size
();
++
j
)
{
int
atom2
=
gpu
->
exclusions
[
atom1
][
j
];
int
y
=
atom2
/
grid
;
int
offset2
=
atom2
-
y
*
grid
;
if
(
x
>
y
)
{
int
cell
=
x
+
y
*
dim
-
y
*
(
y
+
1
)
/
2
;
pExclusion
[
pExclusionIndex
[
cell
]
+
offset1
]
&=
0xFFFFFFFF
-
(
1
<<
offset2
);
}
else
{
int
cell
=
y
+
x
*
dim
-
x
*
(
x
+
1
)
/
2
;
pExclusion
[
pExclusionIndex
[
cell
]
+
offset2
]
&=
0xFFFFFFFF
-
(
1
<<
offset1
);
}
}
}
// Mark all interactions that involve a padding atom as being excluded.
for
(
int
atom1
=
gpu
->
natoms
;
atom1
<
(
int
)
atoms
;
++
atom1
)
{
int
x
=
atom1
/
grid
;
int
offset1
=
atom1
-
x
*
grid
;
for
(
int
atom2
=
0
;
atom2
<
(
int
)
atoms
;
++
atom2
)
{
int
y
=
atom2
/
grid
;
int
offset2
=
atom2
-
y
*
grid
;
if
(
x
>=
y
)
{
int
cell
=
x
+
y
*
dim
-
y
*
(
y
+
1
)
/
2
;
pExclusion
[
pExclusionIndex
[
cell
]
+
offset1
]
&=
0xFFFFFFFF
-
(
1
<<
offset2
);
}
if
(
y
>=
x
)
{
int
cell
=
y
+
x
*
dim
-
x
*
(
x
+
1
)
/
2
;
pExclusion
[
pExclusionIndex
[
cell
]
+
offset2
]
&=
0xFFFFFFFF
-
(
1
<<
offset1
);
}
}
}
psExclusion
->
Upload
();
psExclusionIndex
->
Upload
();
gpu
->
psWorkUnit
->
Upload
();
gpuSetConstants
(
gpu
);
}
extern
"C"
int
gpuSetConstants
(
gpuContext
gpu
)
{
SetCalculateCDLJForcesSim
(
gpu
);
SetCalculateCDLJObcGbsaForces1Sim
(
gpu
);
SetCalculateCustomNonbondedForcesSim
(
gpu
);
SetCalculateCustomBondForcesSim
(
gpu
);
SetCalculateCustomAngleForcesSim
(
gpu
);
SetCalculateCustomTorsionForcesSim
(
gpu
);
SetCalculateCustomExternalForcesSim
(
gpu
);
SetCalculateLocalForcesSim
(
gpu
);
SetCalculateObcGbsaBornSumSim
(
gpu
);
SetCalculateGBVIBornSumSim
(
gpu
);
SetCalculateObcGbsaForces2Sim
(
gpu
);
SetCalculateGBVIForces2Sim
(
gpu
);
SetCalculateAndersenThermostatSim
(
gpu
);
SetCalculatePMESim
(
gpu
);
SetForcesSim
(
gpu
);
SetShakeHSim
(
gpu
);
SetLangevinUpdateSim
(
gpu
);
SetVerletUpdateSim
(
gpu
);
SetBrownianUpdateSim
(
gpu
);
SetSettleSim
(
gpu
);
SetCCMASim
(
gpu
);
SetRandomSim
(
gpu
);
return
1
;
}
static
void
tagAtomsInMolecule
(
int
atom
,
int
molecule
,
vector
<
int
>&
atomMolecule
,
vector
<
vector
<
int
>
>&
atomBonds
)
{
// Recursively tag atoms as belonging to a particular molecule.
atomMolecule
[
atom
]
=
molecule
;
for
(
int
i
=
0
;
i
<
(
int
)
atomBonds
[
atom
].
size
();
i
++
)
if
(
atomMolecule
[
atomBonds
[
atom
][
i
]]
==
-
1
)
tagAtomsInMolecule
(
atomBonds
[
atom
][
i
],
molecule
,
atomMolecule
,
atomBonds
);
}
static
void
findMoleculeGroups
(
gpuContext
gpu
)
{
// First make a list of constraints for future use.
vector
<
Constraint
>
constraints
;
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
sim
.
ShakeConstraints
;
i
++
)
{
int
atom1
=
(
*
gpu
->
psShakeID
)[
i
].
x
;
int
atom2
=
(
*
gpu
->
psShakeID
)[
i
].
y
;
int
atom3
=
(
*
gpu
->
psShakeID
)[
i
].
z
;
int
atom4
=
(
*
gpu
->
psShakeID
)[
i
].
w
;
float
distance2
=
(
*
gpu
->
psShakeParameter
)[
i
].
z
;
constraints
.
push_back
(
Constraint
(
atom1
,
atom2
,
distance2
));
if
(
atom3
!=
-
1
)
constraints
.
push_back
(
Constraint
(
atom1
,
atom3
,
distance2
));
if
(
atom4
!=
-
1
)
constraints
.
push_back
(
Constraint
(
atom1
,
atom4
,
distance2
));
}
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
sim
.
settleConstraints
;
i
++
)
{
int
atom1
=
(
*
gpu
->
psSettleID
)[
i
].
x
;
int
atom2
=
(
*
gpu
->
psSettleID
)[
i
].
y
;
int
atom3
=
(
*
gpu
->
psSettleID
)[
i
].
z
;
float
distance12
=
(
*
gpu
->
psSettleParameter
)[
i
].
x
;
float
distance23
=
(
*
gpu
->
psSettleParameter
)[
i
].
y
;
constraints
.
push_back
(
Constraint
(
atom1
,
atom2
,
distance12
*
distance12
));
constraints
.
push_back
(
Constraint
(
atom1
,
atom3
,
distance12
*
distance12
));
constraints
.
push_back
(
Constraint
(
atom2
,
atom3
,
distance23
*
distance23
));
}
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
sim
.
ccmaConstraints
;
i
++
)
{
int
atom1
=
(
*
gpu
->
psCcmaAtoms
)[
i
].
x
;
int
atom2
=
(
*
gpu
->
psCcmaAtoms
)[
i
].
y
;
float
distance2
=
(
*
gpu
->
psCcmaDistance
)[
i
].
w
;
constraints
.
push_back
(
Constraint
(
atom1
,
atom2
,
distance2
));
}
// First make a list of every other atom to which each atom is connect by a bond, constraint, or exclusion.
int
numAtoms
=
gpu
->
natoms
;
vector
<
vector
<
int
>
>
atomBonds
(
numAtoms
);
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
forces
.
size
();
i
++
)
{
for
(
int
j
=
0
;
j
<
gpu
->
forces
[
i
]
->
getNumParticleGroups
();
j
++
)
{
vector
<
int
>
particles
;
gpu
->
forces
[
i
]
->
getParticlesInGroup
(
j
,
particles
);
for
(
int
k
=
0
;
k
<
(
int
)
particles
.
size
();
k
++
)
for
(
int
m
=
0
;
m
<
(
int
)
particles
.
size
();
m
++
)
if
(
k
!=
m
)
atomBonds
[
particles
[
k
]].
push_back
(
particles
[
m
]);
}
}
for
(
int
i
=
0
;
i
<
(
int
)
constraints
.
size
();
i
++
)
{
int
atom1
=
constraints
[
i
].
atom1
;
int
atom2
=
constraints
[
i
].
atom2
;
atomBonds
[
atom1
].
push_back
(
atom2
);
atomBonds
[
atom2
].
push_back
(
atom1
);
}
// Now tag atoms by which molecule they belong to.
vector
<
int
>
atomMolecule
(
numAtoms
,
-
1
);
int
numMolecules
=
0
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
if
(
atomMolecule
[
i
]
==
-
1
)
tagAtomsInMolecule
(
i
,
numMolecules
++
,
atomMolecule
,
atomBonds
);
vector
<
vector
<
int
>
>
atomIndices
(
numMolecules
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
atomIndices
[
atomMolecule
[
i
]].
push_back
(
i
);
// Construct a description of each molecule.
vector
<
Molecule
>
molecules
(
numMolecules
);
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
molecules
[
i
].
atoms
=
atomIndices
[
i
];
molecules
[
i
].
groups
.
resize
(
gpu
->
forces
.
size
());
}
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
forces
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
gpu
->
forces
[
i
]
->
getNumParticleGroups
();
j
++
)
{
vector
<
int
>
particles
;
gpu
->
forces
[
i
]
->
getParticlesInGroup
(
j
,
particles
);
molecules
[
atomMolecule
[
particles
[
0
]]].
groups
[
i
].
push_back
(
j
);
}
for
(
int
i
=
0
;
i
<
(
int
)
constraints
.
size
();
i
++
)
{
molecules
[
atomMolecule
[
constraints
[
i
].
atom1
]].
constraints
.
push_back
(
i
);
}
// Sort them into groups of identical molecules.
vector
<
Molecule
>
uniqueMolecules
;
vector
<
vector
<
int
>
>
moleculeInstances
;
for
(
int
molIndex
=
0
;
molIndex
<
(
int
)
molecules
.
size
();
molIndex
++
)
{
Molecule
&
mol
=
molecules
[
molIndex
];
// See if it is identical to another molecule.
bool
isNew
=
true
;
for
(
int
j
=
0
;
j
<
(
int
)
uniqueMolecules
.
size
()
&&
isNew
;
j
++
)
{
Molecule
&
mol2
=
uniqueMolecules
[
j
];
bool
identical
=
(
mol
.
atoms
.
size
()
==
mol2
.
atoms
.
size
()
&&
mol
.
constraints
.
size
()
==
mol2
.
constraints
.
size
());
// See if the atoms are identical.
int
atomOffset
=
mol2
.
atoms
[
0
]
-
mol
.
atoms
[
0
];
float4
*
velm
=
gpu
->
psVelm4
->
_pSysData
;
for
(
int
i
=
0
;
i
<
(
int
)
mol
.
atoms
.
size
()
&&
identical
;
i
++
)
{
if
(
mol
.
atoms
[
i
]
!=
mol2
.
atoms
[
i
]
-
atomOffset
||
velm
[
mol
.
atoms
[
i
]].
w
!=
velm
[
mol2
.
atoms
[
i
]].
w
)
identical
=
false
;
for
(
int
k
=
0
;
k
<
(
int
)
gpu
->
forces
.
size
();
k
++
)
if
(
!
gpu
->
forces
[
k
]
->
areParticlesIdentical
(
mol
.
atoms
[
i
],
mol2
.
atoms
[
i
]))
identical
=
false
;
}
// See if the constraints are identical.
for
(
int
i
=
0
;
i
<
(
int
)
mol
.
constraints
.
size
()
&&
identical
;
i
++
)
if
(
constraints
[
mol
.
constraints
[
i
]].
atom1
!=
constraints
[
mol2
.
constraints
[
i
]].
atom1
-
atomOffset
||
constraints
[
mol
.
constraints
[
i
]].
atom2
!=
constraints
[
mol2
.
constraints
[
i
]].
atom2
-
atomOffset
||
constraints
[
mol
.
constraints
[
i
]].
distance2
!=
constraints
[
mol2
.
constraints
[
i
]].
distance2
)
identical
=
false
;
// See if the force groups are identical.
for
(
int
i
=
0
;
i
<
(
int
)
gpu
->
forces
.
size
()
&&
identical
;
i
++
)
{
if
(
mol
.
groups
[
i
].
size
()
!=
mol2
.
groups
[
i
].
size
())
identical
=
false
;
for
(
int
k
=
0
;
k
<
(
int
)
mol
.
groups
[
i
].
size
()
&&
identical
;
k
++
)
if
(
!
gpu
->
forces
[
i
]
->
areGroupsIdentical
(
mol
.
groups
[
i
][
k
],
mol2
.
groups
[
i
][
k
]))
identical
=
false
;
}
if
(
identical
)
{
moleculeInstances
[
j
].
push_back
(
mol
.
atoms
[
0
]);
isNew
=
false
;
}
}
if
(
isNew
)
{
uniqueMolecules
.
push_back
(
mol
);
moleculeInstances
.
push_back
(
vector
<
int
>
());
moleculeInstances
[
moleculeInstances
.
size
()
-
1
].
push_back
(
mol
.
atoms
[
0
]);
}
}
gpu
->
moleculeGroups
.
resize
(
moleculeInstances
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
moleculeInstances
.
size
();
i
++
)
{
gpu
->
moleculeGroups
[
i
].
instances
=
moleculeInstances
[
i
];
vector
<
int
>&
atoms
=
uniqueMolecules
[
i
].
atoms
;
gpu
->
moleculeGroups
[
i
].
atoms
.
resize
(
atoms
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
gpu
->
moleculeGroups
[
i
].
atoms
[
j
]
=
atoms
[
j
]
-
atoms
[
0
];
}
}
extern
"C"
void
gpuReorderAtoms
(
gpuContext
gpu
)
{
if
(
gpu
->
natoms
==
0
||
gpu
->
sim
.
nonbondedCutoffSqr
==
0.0
)
return
;
if
(
gpu
->
moleculeGroups
.
size
()
==
0
)
findMoleculeGroups
(
gpu
);
// Find the range of positions and the number of bins along each axis.
int
numAtoms
=
gpu
->
natoms
;
gpu
->
psPosq4
->
Download
();
gpu
->
psVelm4
->
Download
();
float4
*
posq
=
gpu
->
psPosq4
->
_pSysData
;
float4
*
velm
=
gpu
->
psVelm4
->
_pSysData
;
float
minx
=
posq
[
0
].
x
,
maxx
=
posq
[
0
].
x
;
float
miny
=
posq
[
0
].
y
,
maxy
=
posq
[
0
].
y
;
float
minz
=
posq
[
0
].
z
,
maxz
=
posq
[
0
].
z
;
if
(
gpu
->
sim
.
nonbondedMethod
==
PERIODIC
||
gpu
->
sim
.
nonbondedMethod
==
EWALD
||
gpu
->
sim
.
nonbondedMethod
==
PARTICLE_MESH_EWALD
)
{
minx
=
miny
=
minz
=
0.0
;
maxx
=
gpu
->
sim
.
periodicBoxSizeX
;
maxy
=
gpu
->
sim
.
periodicBoxSizeY
;
maxz
=
gpu
->
sim
.
periodicBoxSizeZ
;
}
else
{
for
(
int
i
=
1
;
i
<
numAtoms
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
i
].
x
);
maxx
=
max
(
maxx
,
posq
[
i
].
x
);
miny
=
min
(
miny
,
posq
[
i
].
y
);
maxy
=
max
(
maxy
,
posq
[
i
].
y
);
minz
=
min
(
minz
,
posq
[
i
].
z
);
maxz
=
max
(
maxz
,
posq
[
i
].
z
);
}
}
// Loop over each group of identical molecules and reorder them.
vector
<
int
>
originalIndex
(
numAtoms
);
vector
<
float4
>
newPosq
(
numAtoms
);
vector
<
float4
>
newVelm
(
numAtoms
);
vector
<
int3
>
newCellOffsets
(
numAtoms
);
for
(
int
group
=
0
;
group
<
(
int
)
gpu
->
moleculeGroups
.
size
();
group
++
)
{
// Find the center of each molecule.
gpuMoleculeGroup
&
mol
=
gpu
->
moleculeGroups
[
group
];
int
numMolecules
=
mol
.
instances
.
size
();
vector
<
int
>&
atoms
=
mol
.
atoms
;
vector
<
float3
>
molPos
(
numMolecules
);
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
molPos
[
i
].
x
=
0.0
f
;
molPos
[
i
].
y
=
0.0
f
;
molPos
[
i
].
z
=
0.0
f
;
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
instances
[
i
];
molPos
[
i
].
x
+=
posq
[
atom
].
x
;
molPos
[
i
].
y
+=
posq
[
atom
].
y
;
molPos
[
i
].
z
+=
posq
[
atom
].
z
;
}
molPos
[
i
].
x
/=
atoms
.
size
();
molPos
[
i
].
y
/=
atoms
.
size
();
molPos
[
i
].
z
/=
atoms
.
size
();
}
if
(
gpu
->
sim
.
nonbondedMethod
==
PERIODIC
||
gpu
->
sim
.
nonbondedMethod
==
EWALD
||
gpu
->
sim
.
nonbondedMethod
==
PARTICLE_MESH_EWALD
)
{
// Move each molecule position into the same box.
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
int
xcell
=
(
int
)
floor
(
molPos
[
i
].
x
/
gpu
->
sim
.
periodicBoxSizeX
);
int
ycell
=
(
int
)
floor
(
molPos
[
i
].
y
/
gpu
->
sim
.
periodicBoxSizeY
);
int
zcell
=
(
int
)
floor
(
molPos
[
i
].
z
/
gpu
->
sim
.
periodicBoxSizeZ
);
float
dx
=
xcell
*
gpu
->
sim
.
periodicBoxSizeX
;
float
dy
=
ycell
*
gpu
->
sim
.
periodicBoxSizeY
;
float
dz
=
zcell
*
gpu
->
sim
.
periodicBoxSizeZ
;
if
(
dx
!=
0.0
f
||
dy
!=
0.0
f
||
dz
!=
0.0
f
)
{
molPos
[
i
].
x
-=
dx
;
molPos
[
i
].
y
-=
dy
;
molPos
[
i
].
z
-=
dz
;
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
instances
[
i
];
posq
[
atom
].
x
-=
dx
;
posq
[
atom
].
y
-=
dy
;
posq
[
atom
].
z
-=
dz
;
gpu
->
posCellOffsets
[
atom
].
x
-=
xcell
;
gpu
->
posCellOffsets
[
atom
].
y
-=
ycell
;
gpu
->
posCellOffsets
[
atom
].
z
-=
zcell
;
}
}
}
}
// Select a bin for each molecule, then sort them by bin.
bool
useHilbert
=
(
numMolecules
>
5000
||
atoms
.
size
()
>
8
);
// For small systems, a simple zigzag curve works better than a Hilbert curve.
float
binWidth
;
if
(
useHilbert
)
binWidth
=
(
float
)(
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
);
else
binWidth
=
(
float
)(
0.2
*
sqrt
(
gpu
->
sim
.
nonbondedCutoffSqr
));
int
xbins
=
1
+
(
int
)
((
maxx
-
minx
)
/
binWidth
);
int
ybins
=
1
+
(
int
)
((
maxy
-
miny
)
/
binWidth
);
vector
<
pair
<
int
,
int
>
>
molBins
(
numMolecules
);
bitmask_t
coords
[
3
];
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
int
x
=
(
int
)
((
molPos
[
i
].
x
-
minx
)
/
binWidth
);
int
y
=
(
int
)
((
molPos
[
i
].
y
-
miny
)
/
binWidth
);
int
z
=
(
int
)
((
molPos
[
i
].
z
-
minz
)
/
binWidth
);
int
bin
;
if
(
useHilbert
)
{
coords
[
0
]
=
x
;
coords
[
1
]
=
y
;
coords
[
2
]
=
z
;
bin
=
(
int
)
hilbert_c2i
(
3
,
8
,
coords
);
}
else
{
int
yodd
=
y
&
1
;
int
zodd
=
z
&
1
;
bin
=
z
*
xbins
*
ybins
;
bin
+=
(
zodd
?
ybins
-
y
:
y
)
*
xbins
;
bin
+=
(
yodd
?
xbins
-
x
:
x
);
}
molBins
[
i
]
=
pair
<
int
,
int
>
(
bin
,
i
);
}
sort
(
molBins
.
begin
(),
molBins
.
end
());
// Reorder the atoms.
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
oldIndex
=
mol
.
instances
[
molBins
[
i
].
second
]
+
atoms
[
j
];
int
newIndex
=
mol
.
instances
[
i
]
+
atoms
[
j
];
originalIndex
[
newIndex
]
=
(
*
gpu
->
psAtomIndex
)[
oldIndex
];
newPosq
[
newIndex
]
=
posq
[
oldIndex
];
newVelm
[
newIndex
]
=
velm
[
oldIndex
];
newCellOffsets
[
newIndex
]
=
gpu
->
posCellOffsets
[
oldIndex
];
}
}
}
// Update the streams.
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
posq
[
i
]
=
newPosq
[
i
];
velm
[
i
]
=
newVelm
[
i
];
(
*
gpu
->
psAtomIndex
)[
i
]
=
originalIndex
[
i
];
gpu
->
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
}
gpu
->
psPosq4
->
Upload
();
gpu
->
psVelm4
->
Upload
();
gpu
->
psAtomIndex
->
Upload
();
}
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
a2674e83
#ifndef __GPUTYPES_H__
Vim:
Warning:
Output
is
not
to
a
terminal
#define __GPUTYPES_H__
[
?
1049
h
[
?
1
h
=
[
1
;
59
r
[
?
12
;
25
h
[
?
12l
[
?
25
h
[
27
m
[
m
[
H
[
2
J
[
?
25l
[
59
;
1
H
"svn-commit.tmp"
15L
,
601
C
[
1
;
1
H
[
33
m
1
2
[
m
[
32
m
--
This
line
,
and
those
below
,
will
be
ignored
--
[
m
/* -------------------------------------------------------------------------- *
[
33
m
3
* OpenMM *
4
[
m
[
1
m
[
35
mM
plugins
/
amoeba
/
platforms
/
cuda
/
src
/
AmoebaCudaKernelFactory
.
cpp
[
m
* -------------------------------------------------------------------------- *
[
33
m
5
[
m
[
1
m
[
35
mM
plugins
/
freeEnergy
/
platforms
/
reference
/
src
/
gbsa
/
CpuGBVISoftcore
.
cpp
[
m
* This is part of the OpenMM molecular simulation toolkit originating from *
[
33
m
6
[
m
[
1
m
[
35
mM
openmmapi
/
include
/
openmm
/
GBVIForce
.
h
[
m
* Simbios, the NIH National Center for Physics-Based Simulation of *
[
33
m
7
[
m
[
1
m
[
35
mM
openmmapi
/
src
/
GBVIForce
.
cpp
[
m
* Biological Structures at Stanford, funded under the NIH Roadmap for *
[
33
m
8
[
m
[
1
m
[
35
mM
olla
/
src
/
Platform
.
cpp
[
m
* Medical Research, grant U54 GM072970. See https://simtk.org. *
[
33
m
9
[
m
[
1
m
[
35
mM
platforms
/
opencl
/
src
/
OpenCLContext
.
h
[
m
* *
[
33
m
10
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
CudaKernels
.
cpp
[
m
* Portions copyright (c) 2009 Stanford University and the Authors. *
[
33
m
11
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kCalculateGBVIBornSum
.
cu
[
m
* Authors: Scott Le Grand, Peter Eastman *
[
33
m
12
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gputypes
.
h
[
m
* Contributors: *
[
33
m
13
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
cudatypes
.
h
[
m
* *
[
33
m
14
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kForces
.
cu
[
m
* This program is free software: you can redistribute it and/or modify *
[
33
m
15
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gpu
.
cpp
[
m
* it under the terms of the GNU Lesser General Public License as published *
[
1
m
[
34
m
~
[
17
;
1
H
~
[
18
;
1
H
~
[
19
;
1
H
~
[
20
;
1
H
~
[
21
;
1
H
~
[
22
;
1
H
~
[
23
;
1
H
~
[
24
;
1
H
~
[
25
;
1
H
~
[
26
;
1
H
~
[
27
;
1
H
~
[
28
;
1
H
~
[
29
;
1
H
~
[
30
;
1
H
~
[
31
;
1
H
~
[
32
;
1
H
~
[
33
;
1
H
~
[
34
;
1
H
~
[
35
;
1
H
~
[
36
;
1
H
~
[
37
;
1
H
~
[
38
;
1
H
~
[
39
;
1
H
~
[
40
;
1
H
~
[
41
;
1
H
~
[
42
;
1
H
~
[
43
;
1
H
~
[
44
;
1
H
~
[
45
;
1
H
~
[
46
;
1
H
~
[
47
;
1
H
~
[
48
;
1
H
~
[
49
;
1
H
~
[
50
;
1
H
~
[
51
;
1
H
~
[
52
;
1
H
~
[
53
;
1
H
~
[
54
;
1
H
~
[
55
;
1
H
~
[
56
;
1
H
~
[
57
;
1
H
~
[
58
;
1
H
~
[
m
[
59
;
217
H1
,
0
-
1
[
9
CAll
[
1
;
5
H
[
?
12l
[
?
25
h
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
:
[
?
12l
[
?
25
hq
!
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
[
?
1l
>
[
?
12l
[
?
25
h
[
?
1049l
* by the Free Software Foundation, either version 3 of the License, or *
Log
message
unchanged
or
not
specified
* (at your option) any later version. *
a
)
bort
,
c
)
ontinue
,
e
)
dit
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "cudatypes.h"
#include "cudaCompact.h"
#include <vector>
#include "windowsExportCuda.h"
namespace
OpenMM
{
class
CudaForceInfo
;
}
struct
gpuAtomType
{
std
::
string
name
;
char
symbol
;
float
r
;
};
struct
gpuMoleculeGroup
{
std
::
vector
<
int
>
atoms
;
std
::
vector
<
int
>
instances
;
};
struct
gpuTabulatedFunction
{
gpuTabulatedFunction
()
:
coefficients
(
NULL
)
{
}
std
::
string
name
;
double
min
,
max
;
CUDAStream
<
float4
>*
coefficients
;
};
enum
SM_VERSION
{
SM_10
,
SM_11
,
SM_12
,
SM_20
};
/* Pointer to this structure will be given
* to gromacs functions*/
struct
_gpuContext
{
//Cache this here so that it doesn't
//have to be repeatedly passed around
int
natoms
;
int
device
;
bool
useBlockingSync
;
gpuAtomType
*
gpAtomTable
;
int
gAtomTypes
;
unsigned
int
blocksPerSM
;
unsigned
int
sharedMemoryPerBlock
;
cudaGmxSimulation
sim
;
unsigned
int
*
pOutputBufferCounter
;
std
::
vector
<
OpenMM
::
CudaForceInfo
*>
forces
;
std
::
vector
<
std
::
vector
<
int
>
>
exclusions
;
unsigned
char
*
pAtomSymbol
;
std
::
vector
<
gpuMoleculeGroup
>
moleculeGroups
;
gpuTabulatedFunction
tabulatedFunctions
[
MAX_TABULATED_FUNCTIONS
];
std
::
vector
<
int3
>
posCellOffsets
;
int
iterations
;
float
epsfac
;
float
solventDielectric
;
float
soluteDielectric
;
int
grid
;
bool
bCalculateCM
;
bool
bRemoveCM
;
bool
bRecalculateBornRadii
;
bool
bOutputBufferPerWarp
;
bool
bIncludeGBSA
;
bool
bIncludeGBVI
;
bool
tabulatedFunctionsChanged
;
unsigned
long
seed
;
SM_VERSION
sm_version
;
compactionPlan
compactPlan
;
cufftHandle
fftplan
;
CUDAStream
<
float4
>*
psPosq4
;
CUDAStream
<
float4
>*
psPosqP4
;
CUDAStream
<
float4
>*
psOldPosq4
;
CUDAStream
<
float4
>*
psVelm4
;
CUDAStream
<
float4
>*
psForce4
;
CUDAStream
<
float
>*
psEnergy
;
// Energy output buffer
CUDAStream
<
float2
>*
psSigEps2
;
CUDAStream
<
float4
>*
psCustomParams
;
// Atom parameters for custom nonbonded force
CUDAStream
<
int4
>*
psCustomBondID
;
// Atom indices for custom bonds
CUDAStream
<
float4
>*
psCustomBondParams
;
// Parameters for custom bonds
CUDAStream
<
int4
>*
psCustomAngleID1
;
// Atom indices for custom angles
CUDAStream
<
int2
>*
psCustomAngleID2
;
// Atom indices for custom angles
CUDAStream
<
float4
>*
psCustomAngleParams
;
// Parameters for custom angles
CUDAStream
<
int4
>*
psCustomTorsionID1
;
// Atom indices for custom torsions
CUDAStream
<
int4
>*
psCustomTorsionID2
;
// Atom indices for custom torsions
CUDAStream
<
float4
>*
psCustomTorsionParams
;
// Parameters for custom torsions
CUDAStream
<
int
>*
psCustomExternalID
;
// Atom indices for custom external force
CUDAStream
<
float4
>*
psCustomExternalParams
;
// Parameters for custom external force
CUDAStream
<
float4
>*
psTabulatedFunctionParams
;
// The min, max, and spacing for each tabulated function
CUDAStream
<
float2
>*
psEwaldCosSinSum
;
CUDAStream
<
float
>*
psTabulatedErfc
;
// Tabulated values for erfc()
CUDAStream
<
cufftComplex
>*
psPmeGrid
;
// Grid points for particle mesh Ewald
CUDAStream
<
float
>*
psPmeBsplineModuli
[
3
];
CUDAStream
<
float4
>*
psPmeBsplineTheta
;
CUDAStream
<
float4
>*
psPmeBsplineDtheta
;
CUDAStream
<
int
>*
psPmeAtomRange
;
// The range of sorted atoms at each grid point
CUDAStream
<
int2
>*
psPmeAtomGridIndex
;
// The grid point each atom is at
CUDAStream
<
float2
>*
psObcData
;
CUDAStream
<
float4
>*
psGBVIData
;
CUDAStream
<
float
>*
psObcChain
;
CUDAStream
<
float
>*
psBornForce
;
CUDAStream
<
float
>*
psBornRadii
;
CUDAStream
<
float
>*
psBornSum
;
CUDAStream
<
int4
>*
psBondID
;
CUDAStream
<
float2
>*
psBondParameter
;
CUDAStream
<
int4
>*
psBondAngleID1
;
CUDAStream
<
int2
>*
psBondAngleID2
;
CUDAStream
<
float2
>*
psBondAngleParameter
;
CUDAStream
<
int4
>*
psDihedralID1
;
CUDAStream
<
int4
>*
psDihedralID2
;
CUDAStream
<
float4
>*
psDihedralParameter
;
CUDAStream
<
int4
>*
psRbDihedralID1
;
CUDAStream
<
int4
>*
psRbDihedralID2
;
CUDAStream
<
float4
>*
psRbDihedralParameter1
;
CUDAStream
<
float2
>*
psRbDihedralParameter2
;
CUDAStream
<
int4
>*
psLJ14ID
;
CUDAStream
<
float4
>*
psLJ14Parameter
;
CUDAStream
<
int4
>*
psShakeID
;
CUDAStream
<
float4
>*
psShakeParameter
;
CUDAStream
<
int4
>*
psSettleID
;
CUDAStream
<
float2
>*
psSettleParameter
;
CUDAStream
<
unsigned
int
>*
psExclusion
;
CUDAStream
<
unsigned
int
>*
psExclusionIndex
;
CUDAStream
<
unsigned
int
>*
psWorkUnit
;
CUDAStream
<
unsigned
int
>*
psInteractingWorkUnit
;
CUDAStream
<
unsigned
int
>*
psInteractionFlag
;
CUDAStream
<
size_t
>*
psInteractionCount
;
CUDAStream
<
float2
>*
psStepSize
;
// The size of the previous and current time steps
CUDAStream
<
float
>*
psLangevinParameters
;
// Parameters used for Langevin integration
CUDAStream
<
float4
>*
psRandom4
;
// Pointer to sets of 4 random numbers for MD integration
CUDAStream
<
float2
>*
psRandom2
;
// Pointer to sets of 2 random numbers for MD integration
CUDAStream
<
uint4
>*
psRandomSeed
;
// Pointer to each random seed
CUDAStream
<
int
>*
psRandomPosition
;
// Pointer to random number positions
CUDAStream
<
float4
>*
psLinearMomentum
;
// Pointer to total linear momentum per CTA
CUDAStream
<
int
>*
psAtomIndex
;
// The original index of each atom
CUDAStream
<
float4
>*
psGridBoundingBox
;
// The size of each grid cell
CUDAStream
<
float4
>*
psGridCenter
;
// The center and radius for each grid cell
CUDAStream
<
int2
>*
psCcmaAtoms
;
// The atoms connected by each CCMA constraint
CUDAStream
<
float4
>*
psCcmaDistance
;
// The displacement vector (x, y, z) and constraint distance (w) for each CCMA constraint
CUDAStream
<
int
>*
psCcmaAtomConstraints
;
// The indices of constraints involving each atom
CUDAStream
<
int
>*
psCcmaNumAtomConstraints
;
// The number of constraints involving each atom
CUDAStream
<
float
>*
psCcmaDelta1
;
// Workspace for CCMA
CUDAStream
<
float
>*
psCcmaDelta2
;
// Workspace for CCMA
int
*
ccmaConvergedHostMarker
;
// Host memory used to communicate that CCMA has converged
cudaEvent_t
ccmaEvent
;
// Used to optimize communication during CCMA
CUDAStream
<
float
>*
psCcmaReducedMass
;
// The reduced mass for each CCMA constraint
CUDAStream
<
float
>*
psRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
CUDAStream
<
unsigned
int
>*
psRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
CUDAStream
<
unsigned
int
>*
psRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
CUDAStream
<
unsigned
int
>*
psConstraintMatrixColumn
;
// The column of each element in the constraint matrix.
CUDAStream
<
float
>*
psConstraintMatrixValue
;
// The value of each element in the constraint matrix.
};
typedef
struct
_gpuContext
*
gpuContext
;
// Function prototypes
extern
"C"
bool
gpuIsAvailable
();
extern
"C"
void
gpuSetBondParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
float
>&
length
,
const
std
::
vector
<
float
>&
k
);
extern
"C"
void
gpuSetBondAngleParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
int
>&
atom3
,
const
std
::
vector
<
float
>&
angle
,
const
std
::
vector
<
float
>&
k
);
extern
"C"
void
gpuSetDihedralParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
int
>&
atom3
,
const
std
::
vector
<
int
>&
atom4
,
const
std
::
vector
<
float
>&
k
,
const
std
::
vector
<
float
>&
phase
,
const
std
::
vector
<
int
>&
periodicity
);
extern
"C"
void
gpuSetRbDihedralParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
int
>&
atom3
,
const
std
::
vector
<
int
>&
atom4
,
const
std
::
vector
<
float
>&
c0
,
const
std
::
vector
<
float
>&
c1
,
const
std
::
vector
<
float
>&
c2
,
const
std
::
vector
<
float
>&
c3
,
const
std
::
vector
<
float
>&
c4
,
const
std
::
vector
<
float
>&
c5
);
extern
"C"
void
gpuSetLJ14Parameters
(
gpuContext
gpu
,
float
epsfac
,
float
fudge
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
float
>&
c6
,
const
std
::
vector
<
float
>&
c12
,
const
std
::
vector
<
float
>&
q1
,
const
std
::
vector
<
float
>&
q2
);
extern
"C"
void
gpuSetCoulombParameters
(
gpuContext
gpu
,
float
epsfac
,
const
std
::
vector
<
int
>&
atom
,
const
std
::
vector
<
float
>&
c6
,
const
std
::
vector
<
float
>&
c12
,
const
std
::
vector
<
float
>&
q
,
const
std
::
vector
<
char
>&
symbol
,
const
std
::
vector
<
std
::
vector
<
int
>
>&
exclusions
,
CudaNonbondedMethod
method
);
extern
"C"
void
gpuSetNonbondedCutoff
(
gpuContext
gpu
,
float
cutoffDistance
,
float
solventDielectric
);
extern
"C"
void
gpuSetTabulatedFunction
(
gpuContext
gpu
,
int
index
,
const
std
::
string
&
name
,
const
std
::
vector
<
double
>&
values
,
double
min
,
double
max
);
extern
"C"
void
gpuSetCustomBondParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
bondAtom1
,
const
std
::
vector
<
int
>&
bondAtom2
,
const
std
::
vector
<
std
::
vector
<
double
>
>&
bondParams
,
const
std
::
string
&
energyExp
,
const
std
::
vector
<
std
::
string
>&
paramNames
,
const
std
::
vector
<
std
::
string
>&
globalParamNames
);
extern
"C"
void
gpuSetCustomAngleParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
angleAtom1
,
const
std
::
vector
<
int
>&
angleAtom2
,
const
std
::
vector
<
int
>&
angleAtom3
,
const
std
::
vector
<
std
::
vector
<
double
>
>&
angleParams
,
const
std
::
string
&
energyExp
,
const
std
::
vector
<
std
::
string
>&
paramNames
,
const
std
::
vector
<
std
::
string
>&
globalParamNames
);
extern
"C"
void
gpuSetCustomTorsionParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
torsionAtom1
,
const
std
::
vector
<
int
>&
torsionAtom2
,
const
std
::
vector
<
int
>&
torsionAtom3
,
const
std
::
vector
<
int
>&
torsionAtom4
,
const
std
::
vector
<
std
::
vector
<
double
>
>&
torsionParams
,
const
std
::
string
&
energyExp
,
const
std
::
vector
<
std
::
string
>&
paramNames
,
const
std
::
vector
<
std
::
string
>&
globalParamNames
);
extern
"C"
void
gpuSetCustomExternalParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atomIndex
,
const
std
::
vector
<
std
::
vector
<
double
>
>&
atomParams
,
const
std
::
string
&
energyExp
,
const
std
::
vector
<
std
::
string
>&
paramNames
,
const
std
::
vector
<
std
::
string
>&
globalParamNames
);
extern
"C"
void
gpuSetCustomNonbondedParameters
(
gpuContext
gpu
,
const
std
::
vector
<
std
::
vector
<
double
>
>&
parameters
,
const
std
::
vector
<
std
::
vector
<
int
>
>&
exclusions
,
CudaNonbondedMethod
method
,
float
cutoffDistance
,
const
std
::
string
&
energyExp
,
const
std
::
vector
<
std
::
string
>&
paramNames
,
const
std
::
vector
<
std
::
string
>&
globalParamNames
);
extern
"C"
void
gpuSetEwaldParameters
(
gpuContext
gpu
,
float
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
);
extern
"C"
void
gpuSetPMEParameters
(
gpuContext
gpu
,
float
alpha
,
int
gridSizeX
,
int
gridSizeY
,
int
gridSizeZ
);
extern
"C"
void
OPENMMCUDA_EXPORT
gpuSetPeriodicBoxSize
(
gpuContext
gpu
,
float
xsize
,
float
ysize
,
float
zsize
);
extern
"C"
void
gpuSetObcParameters
(
gpuContext
gpu
,
float
innerDielectric
,
float
solventDielectric
,
const
std
::
vector
<
float
>&
radius
,
const
std
::
vector
<
float
>&
scale
,
const
std
::
vector
<
float
>&
charge
);
extern
"C"
void
gpuSetGBVIParameters
(
gpuContext
gpu
,
float
innerDielectric
,
float
solventDielectric
,
const
std
::
vector
<
int
>&
atom
,
const
std
::
vector
<
float
>&
radius
,
const
std
::
vector
<
float
>&
gammas
,
const
std
::
vector
<
float
>&
scaledRadii
);
extern
"C"
void
gpuSetConstraintParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
float
>&
distance
,
const
std
::
vector
<
float
>&
invMass1
,
const
std
::
vector
<
float
>&
invMass2
,
float
constraintTolerance
);
extern
"C"
int
gpuAllocateInitialBuffers
(
gpuContext
gpu
);
extern
"C"
void
gpuSetPositions
(
gpuContext
gpu
,
const
std
::
vector
<
float
>&
x
,
const
std
::
vector
<
float
>&
y
,
const
std
::
vector
<
float
>&
z
);
extern
"C"
void
gpuSetVelocities
(
gpuContext
gpu
,
const
std
::
vector
<
float
>&
x
,
const
std
::
vector
<
float
>&
y
,
const
std
::
vector
<
float
>&
z
);
extern
"C"
void
gpuSetMass
(
gpuContext
gpu
,
const
std
::
vector
<
float
>&
mass
);
extern
"C"
void
OPENMMCUDA_EXPORT
gpuInitializeRandoms
(
gpuContext
gpu
);
extern
"C"
OPENMMCUDA_EXPORT
void
*
gpuInit
(
int
numAtoms
,
unsigned
int
device
=
0
,
bool
useBlockingSync
=
false
);
extern
"C"
void
gpuSetLangevinIntegrationParameters
(
gpuContext
gpu
,
float
tau
,
float
deltaT
,
float
temperature
,
float
errorTol
);
extern
"C"
void
gpuSetVerletIntegrationParameters
(
gpuContext
gpu
,
float
deltaT
,
float
errorTol
);
extern
"C"
void
gpuSetBrownianIntegrationParameters
(
gpuContext
gpu
,
float
tau
,
float
deltaT
,
float
temperature
);
extern
"C"
void
gpuSetAndersenThermostatParameters
(
gpuContext
gpu
,
float
temperature
,
float
collisionFrequency
);
extern
"C"
void
gpuShutDown
(
gpuContext
gpu
);
extern
"C"
int
gpuBuildOutputBuffers
(
gpuContext
gpu
);
extern
"C"
int
gpuBuildThreadBlockWorkList
(
gpuContext
gpu
);
extern
"C"
void
gpuBuildExclusionList
(
gpuContext
gpu
);
extern
"C"
int
OPENMMCUDA_EXPORT
gpuSetConstants
(
gpuContext
gpu
);
extern
"C"
void
gpuReorderAtoms
(
gpuContext
gpu
);
extern
"C"
void
setExclusions
(
gpuContext
gpu
,
const
std
::
vector
<
std
::
vector
<
int
>
>&
exclusions
);
#endif //__GPUTYPES_H__
...
...
platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu
View file @
a2674e83
/* -------------------------------------------------------------------------- *
Vim:
Warning:
Output
is
not
to
a
terminal
* OpenMM *
[
?
1049h
[
?
1h
=
[
1
;
59
r
[
?
12
;
25h
[
?
12l
[
?
25h
[
27
m
[
m
[
H
[
2
J
[
?
25l
[
59
;
1
H
"svn-commit.tmp"
15L
,
601
C
[
1
;
1
H
[
33
m
1
* -------------------------------------------------------------------------- *
2
[
m
[
32
m
--
This
line
,
and
those
below
,
will
be
ignored
--
[
m
* This is part of the OpenMM molecular simulation toolkit originating from *
[
33
m
3
* Simbios, the NIH National Center for Physics-Based Simulation of *
4
[
m
[
1
m
[
35
mM
plugins
/
amoeba
/
platforms
/
cuda
/
src
/
AmoebaCudaKernelFactory
.
cpp
[
m
* Biological Structures at Stanford, funded under the NIH Roadmap for *
[
33
m
5
[
m
[
1
m
[
35
mM
plugins
/
freeEnergy
/
platforms
/
reference
/
src
/
gbsa
/
CpuGBVISoftcore
.
cpp
[
m
* Medical Research, grant U54 GM072970. See https://simtk.org. *
[
33
m
6
[
m
[
1
m
[
35
mM
openmmapi
/
include
/
openmm
/
GBVIForce
.
h
[
m
* *
[
33
m
7
[
m
[
1
m
[
35
mM
openmmapi
/
src
/
GBVIForce
.
cpp
[
m
* Portions copyright (c) 2009 Stanford University and the Authors. *
[
33
m
8
[
m
[
1
m
[
35
mM
olla
/
src
/
Platform
.
cpp
[
m
* Authors: Scott Le Grand, Peter Eastman *
[
33
m
9
[
m
[
1
m
[
35
mM
platforms
/
opencl
/
src
/
OpenCLContext
.
h
[
m
* Contributors: *
[
33
m
10
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
CudaKernels
.
cpp
[
m
* *
[
33
m
11
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kCalculateGBVIBornSum
.
cu
[
m
* Permission is hereby granted, free of charge, to any person obtaining a *
[
33
m
12
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gputypes
.
h
[
m
* copy of this software and associated documentation files (the "Software"), *
[
33
m
13
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
cudatypes
.
h
[
m
* to deal in the Software without restriction, including without limitation *
[
33
m
14
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kForces
.
cu
[
m
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
[
33
m
15
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gpu
.
cpp
[
m
* and/or sell copies of the Software, and to permit persons to whom the *
[
1
m
[
34
m
~
[
17
;
1
H
~
[
18
;
1
H
~
[
19
;
1
H
~
[
20
;
1
H
~
[
21
;
1
H
~
[
22
;
1
H
~
[
23
;
1
H
~
[
24
;
1
H
~
[
25
;
1
H
~
[
26
;
1
H
~
[
27
;
1
H
~
[
28
;
1
H
~
[
29
;
1
H
~
[
30
;
1
H
~
[
31
;
1
H
~
[
32
;
1
H
~
[
33
;
1
H
~
[
34
;
1
H
~
[
35
;
1
H
~
[
36
;
1
H
~
[
37
;
1
H
~
[
38
;
1
H
~
[
39
;
1
H
~
[
40
;
1
H
~
[
41
;
1
H
~
[
42
;
1
H
~
[
43
;
1
H
~
[
44
;
1
H
~
[
45
;
1
H
~
[
46
;
1
H
~
[
47
;
1
H
~
[
48
;
1
H
~
[
49
;
1
H
~
[
50
;
1
H
~
[
51
;
1
H
~
[
52
;
1
H
~
[
53
;
1
H
~
[
54
;
1
H
~
[
55
;
1
H
~
[
56
;
1
H
~
[
57
;
1
H
~
[
58
;
1
H
~
[
m
[
59
;
217
H1
,
0
-
1
[
9
CAll
[
1
;
5
H
[
?
12l
[
?
25h
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
:
[
?
12l
[
?
25h
q
!
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
[
?
1l
>
[
?
12l
[
?
25h
[
?
1049l
* Software is furnished to do so, subject to the following conditions: *
Log
message
unchanged
or
not
specified
* *
a
)
bort
,
c
)
ontinue
,
e
)
dit
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using
namespace
std
;
#include "gputypes.h"
#define UNROLLXX 0
#define UNROLLXY 0
struct
Atom
{
float
x
;
float
y
;
float
z
;
float
r
;
float
sr
;
float
sum
;
float
gamma
;
};
static
__constant__
cudaGmxSimulation
cSim
;
void
SetCalculateGBVIBornSumSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
}
void
GetCalculateGBVIBornSumSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateGBVIBornSum.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateGBVIBornSum.h"
// Include versions of the kernels with cutoffs.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateGBVIBornSum.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateGBVIBornSum.h"
// Include versions of the kernels with periodic boundary conditions.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kCalculateGBVIBornSum.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateGBVIBornSum.h"
__global__
void
kReduceGBVIBornSum_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
while
(
pos
<
cSim
.
atoms
)
{
float
sum
=
0.0
f
;
float
*
pSt
=
cSim
.
pBornSum
+
pos
;
float4
atom
=
cSim
.
pGBVIData
[
pos
];
// Get summed Born data
for
(
int
i
=
0
;
i
<
cSim
.
nonbondOutputBuffers
;
i
++
)
{
sum
+=
*
pSt
;
// printf("%4d %4d A: %9.4f\n", pos, i, *pSt);
pSt
+=
cSim
.
stride
;
}
// Now calculate Born radius
float
Rinv
=
1.0
f
/
atom
.
x
;
sum
=
Rinv
*
Rinv
*
Rinv
-
sum
;
cSim
.
pBornRadii
[
pos
]
=
pow
(
sum
,
(
-
1.0
f
/
3.0
f
)
);
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kReduceGBVIBornSum
(
gpuContext
gpu
)
{
//printf("kReduceGBVIBornSum\n");
#define GBVI_DEBUG 0
#if ( GBVI_DEBUG == 1 )
gpu
->
psGBVIData
->
Download
();
gpu
->
psBornSum
->
Download
();
gpu
->
psPosq4
->
Download
();
(
void
)
fprintf
(
stderr
,
"
\n
kReduceGBVIBornSum: Post BornSum %s Born radii & params
\n
"
,
(
gpu
->
bIncludeGBVI
?
"GBVI"
:
"Obc"
)
);
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
stderr
,
"%d bSum=%14.6e param[%14.6e %14.6e %14.6e] x[%14.6f %14.6f %14.6f %14.6f]
\n
"
,
ii
,
gpu
->
psBornSum
->
_pSysStream
[
0
][
ii
],
gpu
->
psGBVIData
->
_pSysStream
[
0
][
ii
].
x
,
gpu
->
psGBVIData
->
_pSysStream
[
0
][
ii
].
y
,
gpu
->
psGBVIData
->
_pSysStream
[
0
][
ii
].
z
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
x
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
y
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
z
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
w
);
}
#endif
#undef GBVI_DEBUG
kReduceGBVIBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
gpu
->
bRecalculateBornRadii
=
false
;
LAUNCHERROR
(
"kReduceGBVIBornSum"
);
}
void
kCalculateGBVIBornSum
(
gpuContext
gpu
)
{
//printf("kCalculateGBVIBornSum\n");
//size_t numWithInteractions;
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
case
NO_CUTOFF
:
#define GBVI 0
#if GBVI == 1
int
maxPrint
=
10
;
gpu
->
psWorkUnit
->
Download
();
fprintf
(
stderr
,
"kCalculateGBVIBornSum: bOutputBufferPerWarp=%u blks=%u th/blk=%u wu=%u %u shrd=%u
\n
"
,
gpu
->
bOutputBufferPerWarp
,
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
gpu
->
sim
.
workUnits
,
gpu
->
psWorkUnit
->
_pSysStream
[
0
][
0
],
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
);
gpu
->
psGBVIData
->
Download
();
gpu
->
psBornSum
->
Download
();
gpu
->
psPosq4
->
Download
();
(
void
)
fprintf
(
stderr
,
"
\n
kCalculateGBVIBornSum: pre BornSum %s Born radii & params
\n
"
,
(
gpu
->
bIncludeGBVI
?
"GBVI"
:
"Obc"
)
);
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
stderr
,
"%d bSum=%14.6e param[%14.6e %14.6e %14.6e] x[%14.6f %14.6f %14.6f %14.6f]
\n
"
,
ii
,
gpu
->
psBornSum
->
_pSysStream
[
0
][
ii
],
gpu
->
psGBVIData
->
_pSysStream
[
0
][
ii
].
x
,
gpu
->
psGBVIData
->
_pSysStream
[
0
][
ii
].
y
,
gpu
->
psGBVIData
->
_pSysStream
[
0
][
ii
].
z
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
x
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
y
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
z
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
w
);
if
(
(
ii
==
maxPrint
)
&&
(
ii
<
(
gpu
->
natoms
-
maxPrint
))
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
#endif
#undef GBVI
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateGBVIN2ByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
}
else
{
kCalculateGBVIN2BornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
}
break
;
case
CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateGBVICutoffByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateGBVICutoffBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
case
PERIODIC
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateGBVIPeriodicByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateGBVIPeriodicBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
}
LAUNCHERROR
(
"kCalculateGBVIBornSum"
);
}
...
...
platforms/cuda/src/kernels/kForces.cu
View file @
a2674e83
/* -------------------------------------------------------------------------- *
Vim:
Warning:
Output
is
not
to
a
terminal
* OpenMM *
[
?
1049h
[
?
1h
=
[
1
;
59
r
[
?
12
;
25h
[
?
12l
[
?
25h
[
27
m
[
m
[
H
[
2
J
[
?
25l
[
59
;
1
H
"svn-commit.tmp"
15L
,
601
C
[
1
;
1
H
[
33
m
1
* -------------------------------------------------------------------------- *
2
[
m
[
32
m
--
This
line
,
and
those
below
,
will
be
ignored
--
[
m
* This is part of the OpenMM molecular simulation toolkit originating from *
[
33
m
3
* Simbios, the NIH National Center for Physics-Based Simulation of *
4
[
m
[
1
m
[
35
mM
plugins
/
amoeba
/
platforms
/
cuda
/
src
/
AmoebaCudaKernelFactory
.
cpp
[
m
* Biological Structures at Stanford, funded under the NIH Roadmap for *
[
33
m
5
[
m
[
1
m
[
35
mM
plugins
/
freeEnergy
/
platforms
/
reference
/
src
/
gbsa
/
CpuGBVISoftcore
.
cpp
[
m
* Medical Research, grant U54 GM072970. See https://simtk.org. *
[
33
m
6
[
m
[
1
m
[
35
mM
openmmapi
/
include
/
openmm
/
GBVIForce
.
h
[
m
* *
[
33
m
7
[
m
[
1
m
[
35
mM
openmmapi
/
src
/
GBVIForce
.
cpp
[
m
* Portions copyright (c) 2009 Stanford University and the Authors. *
[
33
m
8
[
m
[
1
m
[
35
mM
olla
/
src
/
Platform
.
cpp
[
m
* Authors: Scott Le Grand, Peter Eastman *
[
33
m
9
[
m
[
1
m
[
35
mM
platforms
/
opencl
/
src
/
OpenCLContext
.
h
[
m
* Contributors: *
[
33
m
10
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
CudaKernels
.
cpp
[
m
* *
[
33
m
11
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kCalculateGBVIBornSum
.
cu
[
m
* This program is free software: you can redistribute it and/or modify *
[
33
m
12
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gputypes
.
h
[
m
* it under the terms of the GNU Lesser General Public License as published *
[
33
m
13
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
cudatypes
.
h
[
m
* by the Free Software Foundation, either version 3 of the License, or *
[
33
m
14
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kForces
.
cu
[
m
* (at your option) any later version. *
[
33
m
15
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gpu
.
cpp
[
m
* *
[
1
m
[
34
m
~
[
17
;
1
H
~
[
18
;
1
H
~
[
19
;
1
H
~
[
20
;
1
H
~
[
21
;
1
H
~
[
22
;
1
H
~
[
23
;
1
H
~
[
24
;
1
H
~
[
25
;
1
H
~
[
26
;
1
H
~
[
27
;
1
H
~
[
28
;
1
H
~
[
29
;
1
H
~
[
30
;
1
H
~
[
31
;
1
H
~
[
32
;
1
H
~
[
33
;
1
H
~
[
34
;
1
H
~
[
35
;
1
H
~
[
36
;
1
H
~
[
37
;
1
H
~
[
38
;
1
H
~
[
39
;
1
H
~
[
40
;
1
H
~
[
41
;
1
H
~
[
42
;
1
H
~
[
43
;
1
H
~
[
44
;
1
H
~
[
45
;
1
H
~
[
46
;
1
H
~
[
47
;
1
H
~
[
48
;
1
H
~
[
49
;
1
H
~
[
50
;
1
H
~
[
51
;
1
H
~
[
52
;
1
H
~
[
53
;
1
H
~
[
54
;
1
H
~
[
55
;
1
H
~
[
56
;
1
H
~
[
57
;
1
H
~
[
58
;
1
H
~
[
m
[
59
;
217
H1
,
0
-
1
[
9
CAll
[
1
;
5
H
[
?
12l
[
?
25h
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
:
[
?
12l
[
?
25h
q
!
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
[
?
1l
>
[
?
12l
[
?
25h
[
?
1049l
* This program is distributed in the hope that it will be useful, *
Log
message
unchanged
or
not
specified
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
a
)
bort
,
c
)
ontinue
,
e
)
dit
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using
namespace
std
;
#include "gputypes.h"
#define FABS(a) ((a) > 0.0f ? (a) : -(a))
static
__constant__
cudaGmxSimulation
cSim
;
void
OPENMMCUDA_EXPORT
SetForcesSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetForcesSim copy to cSim failed"
);
}
void
GetForcesSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: GetForcesSim copy from cSim failed"
);
}
__global__
__launch_bounds__
(
384
,
1
)
void
kClearForces_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
outputBuffers
)
{
cSim
.
pForce4
[
pos
]
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
);
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
OPENMMCUDA_EXPORT
kClearForces
(
gpuContext
gpu
)
{
// printf("kClearForces\n");
kClearForces_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
LAUNCHERROR
(
"kClearForces"
);
}
__global__
__launch_bounds__
(
384
,
1
)
void
kClearBornSumAndForces_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
{
cSim
.
pBornSum
[
pos
]
=
0.0
f
;
cSim
.
pBornForce
[
pos
]
=
0.0
f
;
cSim
.
pForce4
[
pos
]
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
);
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
while
(
pos
<
cSim
.
stride
*
cSim
.
outputBuffers
)
{
cSim
.
pForce4
[
pos
]
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
);
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kClearBornSumAndForces
(
gpuContext
gpu
)
{
// printf("kClearBornSumAndForces\n");
kClearBornSumAndForces_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
LAUNCHERROR
(
"kClearBornSumAndForces"
);
}
__global__
__launch_bounds__
(
384
,
1
)
void
kClearEnergy_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
energyOutputBuffers
)
{
((
float
*
)
cSim
.
pEnergy
)[
pos
]
=
0.0
f
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kClearEnergy
(
gpuContext
gpu
)
{
// printf("kClearEnergy\n");
kClearEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
LAUNCHERROR
(
"kClearEnergy"
);
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceBornSumAndForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
// Reduce forces
while
(
pos
<
cSim
.
stride4
)
{
float
totalForce
=
0.0
f
;
float
*
pFt
=
(
float
*
)
cSim
.
pForce4
+
pos
;
int
i
=
cSim
.
outputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f3
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f4
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
totalForce
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
totalForce
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalForce
+=
*
pFt
;
}
pFt
=
(
float
*
)
cSim
.
pForce4
+
pos
;
*
pFt
=
totalForce
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
// Reduce Born Sum
while
(
pos
-
cSim
.
stride4
<
cSim
.
atoms
)
{
float
sum
=
0.0
f
;
float
*
pSt
=
cSim
.
pBornSum
+
pos
-
cSim
.
stride4
;
float2
atom
=
cSim
.
pObcData
[
pos
-
cSim
.
stride4
];
// Get summed Born data
int
i
=
cSim
.
nonbondOutputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pSt
;
pSt
+=
cSim
.
stride
;
float
f2
=
*
pSt
;
pSt
+=
cSim
.
stride
;
float
f3
=
*
pSt
;
pSt
+=
cSim
.
stride
;
float
f4
=
*
pSt
;
pSt
+=
cSim
.
stride
;
sum
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pSt
;
pSt
+=
cSim
.
stride
;
float
f2
=
*
pSt
;
pSt
+=
cSim
.
stride
;
sum
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
sum
+=
*
pSt
;
}
// Now calculate Born radius and OBC term.
cSim
.
pBornSum
[
pos
-
cSim
.
stride4
]
=
sum
;
sum
*=
0.5
f
*
atom
.
x
;
float
sum2
=
sum
*
sum
;
float
sum3
=
sum
*
sum2
;
float
tanhSum
=
tanh
(
cSim
.
alphaOBC
*
sum
-
cSim
.
betaOBC
*
sum2
+
cSim
.
gammaOBC
*
sum3
);
float
nonOffsetRadii
=
atom
.
x
+
cSim
.
dielectricOffset
;
float
bornRadius
=
1.0
f
/
(
1.0
f
/
atom
.
x
-
tanhSum
/
nonOffsetRadii
);
float
obcChain
=
atom
.
x
*
(
cSim
.
alphaOBC
-
2.0
f
*
cSim
.
betaOBC
*
sum
+
3.0
f
*
cSim
.
gammaOBC
*
sum2
);
obcChain
=
(
1.0
f
-
tanhSum
*
tanhSum
)
*
obcChain
/
nonOffsetRadii
;
cSim
.
pBornRadii
[
pos
-
cSim
.
stride4
]
=
bornRadius
;
cSim
.
pObcChain
[
pos
-
cSim
.
stride4
]
=
obcChain
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kReduceBornSumAndForces
(
gpuContext
gpu
)
{
//printf("kReduceBornSumAndForces\n");
kReduceBornSumAndForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceBornSumAndForces"
);
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
// Reduce forces
while
(
pos
<
cSim
.
stride4
)
{
float
totalForce
=
0.0
f
;
float
*
pFt
=
(
float
*
)
cSim
.
pForce4
+
pos
;
int
i
=
cSim
.
outputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f3
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f4
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
totalForce
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
totalForce
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalForce
+=
*
pFt
;
}
pFt
=
(
float
*
)
cSim
.
pForce4
+
pos
;
*
pFt
=
totalForce
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
OPENMMCUDA_EXPORT
kReduceForces
(
gpuContext
gpu
)
{
// printf("kReduceForces\n");
kReduceForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceForces"
);
}
double
kReduceEnergy
(
gpuContext
gpu
)
{
//printf("kReduceEnergy\n");
gpu
->
psEnergy
->
Download
();
double
sum
=
0.0
;
for
(
int
i
=
0
;
i
<
gpu
->
sim
.
energyOutputBuffers
;
i
++
){
sum
+=
(
*
gpu
->
psEnergy
)[
i
];
}
return
sum
;
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceObcGbsaBornForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
float
energy
=
0.0
f
;
while
(
pos
<
cSim
.
atoms
)
{
float
bornRadius
=
cSim
.
pBornRadii
[
pos
];
float
obcChain
=
cSim
.
pObcChain
[
pos
];
float2
obcData
=
cSim
.
pObcData
[
pos
];
float
totalForce
=
0.0
f
;
float
*
pFt
=
cSim
.
pBornForce
+
pos
;
int
i
=
cSim
.
nonbondOutputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f3
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f4
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalForce
+=
*
pFt
;
}
float
r
=
(
obcData
.
x
+
cSim
.
dielectricOffset
+
cSim
.
probeRadius
);
float
ratio6
=
pow
((
obcData
.
x
+
cSim
.
dielectricOffset
)
/
bornRadius
,
6.0
f
);
float
saTerm
=
cSim
.
surfaceAreaFactor
*
r
*
r
*
ratio6
;
totalForce
+=
saTerm
/
bornRadius
;
totalForce
*=
bornRadius
*
bornRadius
*
obcChain
;
energy
+=
saTerm
;
pFt
=
cSim
.
pBornForce
+
pos
;
*
pFt
=
totalForce
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
// correct for surface area factor of -6
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
/
-
6.0
f
;
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceGBVIBornForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
float
energy
=
0.0
f
;
while
(
pos
<
cSim
.
atoms
)
{
float
bornRadius
=
cSim
.
pBornRadii
[
pos
];
float4
gbviData
=
cSim
.
pGBVIData
[
pos
];
float
totalForce
=
0.0
f
;
float
*
pFt
=
cSim
.
pBornForce
+
pos
;
int
i
=
cSim
.
nonbondOutputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f3
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f4
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalForce
+=
*
pFt
;
}
float
ratio
=
(
gbviData
.
x
/
bornRadius
);
float
ratio3
=
ratio
*
ratio
*
ratio
;
energy
-=
gbviData
.
z
*
ratio3
;
totalForce
+=
(
3.0
f
*
gbviData
.
z
*
ratio3
)
/
bornRadius
;
// 'cavity' term
float
br2
=
bornRadius
*
bornRadius
;
totalForce
*=
(
1.0
f
/
3.0
f
)
*
br2
*
br2
;
pFt
=
cSim
.
pBornForce
+
pos
;
*
pFt
=
totalForce
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
void
kReduceObcGbsaBornForces
(
gpuContext
gpu
)
{
//printf("kReduceObcGbsaBornForces\n");
if
(
gpu
->
bIncludeGBSA
){
kReduceObcGbsaBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceObcGbsaBornForces"
);
}
else
if
(
gpu
->
bIncludeGBVI
){
kReduceGBVIBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceGBVIBornForces"
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment