Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
cd874b2b
Commit
cd874b2b
authored
Feb 22, 2017
by
peastman
Browse files
Merged changes from main branch
parents
a783b996
b84e22ba
Changes
112
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
908 additions
and
290 deletions
+908
-290
platforms/cpu/src/CpuNonbondedForceVec4.cpp
platforms/cpu/src/CpuNonbondedForceVec4.cpp
+44
-5
platforms/cpu/src/CpuNonbondedForceVec8.cpp
platforms/cpu/src/CpuNonbondedForceVec8.cpp
+57
-3
platforms/cpu/src/CpuSETTLE.cpp
platforms/cpu/src/CpuSETTLE.cpp
+21
-51
platforms/cpu/staticTarget/CMakeLists.txt
platforms/cpu/staticTarget/CMakeLists.txt
+1
-2
platforms/cpu/tests/TestCpuDispersionPME.cpp
platforms/cpu/tests/TestCpuDispersionPME.cpp
+37
-0
platforms/cuda/include/CudaContext.h
platforms/cuda/include/CudaContext.h
+13
-1
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+63
-6
platforms/cuda/include/CudaParallelKernels.h
platforms/cuda/include/CudaParallelKernels.h
+9
-0
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+43
-10
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+369
-191
platforms/cuda/src/CudaParallelKernels.cpp
platforms/cuda/src/CudaParallelKernels.cpp
+4
-0
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+4
-0
platforms/cuda/src/kernels/coulombLennardJones.cu
platforms/cuda/src/kernels/coulombLennardJones.cu
+45
-1
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+67
-9
platforms/cuda/src/kernels/utilities.cu
platforms/cuda/src/kernels/utilities.cu
+7
-0
platforms/cuda/staticTarget/CMakeLists.txt
platforms/cuda/staticTarget/CMakeLists.txt
+1
-2
platforms/cuda/tests/TestCudaDispersionPME.cpp
platforms/cuda/tests/TestCudaDispersionPME.cpp
+36
-0
platforms/opencl/include/OpenCLContext.h
platforms/opencl/include/OpenCLContext.h
+12
-0
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+65
-8
platforms/opencl/include/OpenCLParallelKernels.h
platforms/opencl/include/OpenCLParallelKernels.h
+10
-1
No files found.
platforms/cpu/src/CpuNonbondedForceVec4.cpp
View file @
cd874b2b
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
#include "SimTKOpenMMUtilities.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec4.h"
#include "CpuNonbondedForceVec4.h"
#include <algorithm>
#include <algorithm>
#include <iostream>
using
namespace
std
;
using
namespace
std
;
using
namespace
OpenMM
;
using
namespace
OpenMM
;
...
@@ -213,7 +214,6 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -213,7 +214,6 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Determine whether we need to apply periodic boundary conditions.
// Determine whether we need to apply periodic boundary conditions.
PeriodicType
periodicType
;
PeriodicType
periodicType
;
fvec4
blockCenter
;
fvec4
blockCenter
;
if
(
!
periodic
)
{
if
(
!
periodic
)
{
...
@@ -263,7 +263,6 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces
...
@@ -263,7 +263,6 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces
template
<
int
PERIODIC_TYPE
>
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
...
@@ -278,6 +277,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -278,6 +277,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
fvec4
C6s
(
C6params
[
blockAtom
[
0
]],
C6params
[
blockAtom
[
1
]],
C6params
[
blockAtom
[
2
]],
C6params
[
blockAtom
[
3
]]);
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
...
@@ -318,7 +318,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -318,7 +318,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4
sig2
=
inverseR
*
sig
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
fvec4
eps
=
blockAtomEpsilon
*
atomEpsilon
;
fvec4
epsSig6
=
eps
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
if
(
useSwitch
)
{
...
@@ -328,6 +329,17 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -328,6 +329,17 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
energy
*=
switchValue
;
}
}
if
(
ljpme
)
{
fvec4
C6ij
=
C6s
*
C6params
[
atom
];
fvec4
inverseR2
=
inverseR
*
inverseR
;
fvec4
mysig2
=
sig
*
sig
;
fvec4
mysig6
=
mysig2
*
mysig2
*
mysig2
;
fvec4
emult
=
C6ij
*
inverseR2
*
inverseR2
*
inverseR2
*
exptermsApprox
(
r
);
fvec4
potentialShift
=
eps
*
(
1.0
f
-
mysig6
*
inverseRcut6
)
*
mysig6
*
inverseRcut6
-
C6ij
*
inverseRcut6Expterm
;
dEdR
+=
6.0
f
*
C6ij
*
inverseR2
*
inverseR2
*
inverseR2
*
dExptermsApprox
(
r
);
energy
+=
emult
+
potentialShift
;
}
}
}
else
{
else
{
energy
=
0.0
f
;
energy
=
0.0
f
;
...
@@ -420,3 +432,30 @@ fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) {
...
@@ -420,3 +432,30 @@ fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) {
transpose
(
t1
,
t2
,
t3
,
t4
);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
return
coeff1
*
t1
+
coeff2
*
t2
;
}
}
fvec4
CpuNonbondedForceVec4
::
exptermsApprox
(
const
fvec4
&
r
)
{
fvec4
r1
=
r
*
exptermsDXInv
;
ivec4
index
=
min
(
floor
(
r1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
r1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
exptermsTable
[
index
[
0
]]);
fvec4
t2
(
&
exptermsTable
[
index
[
1
]]);
fvec4
t3
(
&
exptermsTable
[
index
[
2
]]);
fvec4
t4
(
&
exptermsTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
fvec4
CpuNonbondedForceVec4
::
dExptermsApprox
(
const
fvec4
&
r
)
{
fvec4
r1
=
r
*
exptermsDXInv
;
ivec4
index
=
min
(
floor
(
r1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
r1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
dExptermsTable
[
index
[
0
]]);
fvec4
t2
(
&
dExptermsTable
[
index
[
1
]]);
fvec4
t3
(
&
dExptermsTable
[
index
[
2
]]);
fvec4
t4
(
&
dExptermsTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
platforms/cpu/src/CpuNonbondedForceVec8.cpp
View file @
cd874b2b
...
@@ -27,6 +27,7 @@
...
@@ -27,6 +27,7 @@
#include "openmm/OpenMMException.h"
#include "openmm/OpenMMException.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/hardware.h"
#include <algorithm>
#include <algorithm>
#include <iostream>
using
namespace
std
;
using
namespace
std
;
using
namespace
OpenMM
;
using
namespace
OpenMM
;
...
@@ -81,7 +82,6 @@ enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, Periodic
...
@@ -81,7 +82,6 @@ enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, Periodic
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Determine whether we need to apply periodic boundary conditions.
// Determine whether we need to apply periodic boundary conditions.
PeriodicType
periodicType
;
PeriodicType
periodicType
;
fvec4
blockCenter
;
fvec4
blockCenter
;
if
(
!
periodic
)
{
if
(
!
periodic
)
{
...
@@ -308,6 +308,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -308,6 +308,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
blockAtomCharge
*=
ONE_4PI_EPS0
;
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
fvec8
C6s
(
C6params
[
blockAtom
[
0
]],
C6params
[
blockAtom
[
1
]],
C6params
[
blockAtom
[
2
]],
C6params
[
blockAtom
[
3
]],
C6params
[
blockAtom
[
4
]],
C6params
[
blockAtom
[
5
]],
C6params
[
blockAtom
[
6
]],
C6params
[
blockAtom
[
7
]]);
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
...
@@ -348,7 +349,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -348,7 +349,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec8
sig2
=
inverseR
*
sig
;
fvec8
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
fvec8
eps
=
blockAtomEpsilon
*
atomEpsilon
;
fvec8
epsSig6
=
eps
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
if
(
useSwitch
)
{
...
@@ -358,6 +360,17 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -358,6 +360,17 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
energy
*=
switchValue
;
}
}
if
(
ljpme
)
{
fvec8
C6ij
=
C6s
*
C6params
[
atom
];
fvec8
inverseR2
=
inverseR
*
inverseR
;
fvec8
mysig2
=
sig
*
sig
;
fvec8
mysig6
=
mysig2
*
mysig2
*
mysig2
;
fvec8
emult
=
C6ij
*
inverseR2
*
inverseR2
*
inverseR2
*
exptermsApprox
(
r
);
fvec8
potentialShift
=
eps
*
(
1.0
f
-
mysig6
*
inverseRcut6
)
*
mysig6
*
inverseRcut6
-
C6ij
*
inverseRcut6Expterm
;
dEdR
+=
6.0
f
*
C6ij
*
inverseR2
*
inverseR2
*
inverseR2
*
dExptermsApprox
(
r
);
energy
+=
emult
+
potentialShift
;
}
}
}
else
{
else
{
energy
=
0.0
f
;
energy
=
0.0
f
;
...
@@ -464,4 +477,45 @@ fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) {
...
@@ -464,4 +477,45 @@ fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) {
transpose
(
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
s1
,
s2
,
s3
,
s4
);
transpose
(
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
s1
,
s2
,
s3
,
s4
);
return
coeff1
*
s1
+
coeff2
*
s2
;
return
coeff1
*
s1
+
coeff2
*
s2
;
}
}
fvec8
CpuNonbondedForceVec8
::
exptermsApprox
(
const
fvec8
&
r
)
{
fvec8
r1
=
r
*
exptermsDXInv
;
ivec8
index
=
min
(
floor
(
r1
),
NUM_TABLE_POINTS
);
fvec8
coeff2
=
r1
-
index
;
fvec8
coeff1
=
1.0
f
-
coeff2
;
ivec4
indexLower
=
index
.
lowerVec
();
ivec4
indexUpper
=
index
.
upperVec
();
fvec4
t1
(
&
exptermsTable
[
indexLower
[
0
]]);
fvec4
t2
(
&
exptermsTable
[
indexLower
[
1
]]);
fvec4
t3
(
&
exptermsTable
[
indexLower
[
2
]]);
fvec4
t4
(
&
exptermsTable
[
indexLower
[
3
]]);
fvec4
t5
(
&
exptermsTable
[
indexUpper
[
0
]]);
fvec4
t6
(
&
exptermsTable
[
indexUpper
[
1
]]);
fvec4
t7
(
&
exptermsTable
[
indexUpper
[
2
]]);
fvec4
t8
(
&
exptermsTable
[
indexUpper
[
3
]]);
fvec8
s1
,
s2
,
s3
,
s4
;
transpose
(
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
s1
,
s2
,
s3
,
s4
);
return
coeff1
*
s1
+
coeff2
*
s2
;
}
fvec8
CpuNonbondedForceVec8
::
dExptermsApprox
(
const
fvec8
&
r
)
{
fvec8
r1
=
r
*
exptermsDXInv
;
ivec8
index
=
min
(
floor
(
r1
),
NUM_TABLE_POINTS
);
fvec8
coeff2
=
r1
-
index
;
fvec8
coeff1
=
1.0
f
-
coeff2
;
ivec4
indexLower
=
index
.
lowerVec
();
ivec4
indexUpper
=
index
.
upperVec
();
fvec4
t1
(
&
dExptermsTable
[
indexLower
[
0
]]);
fvec4
t2
(
&
dExptermsTable
[
indexLower
[
1
]]);
fvec4
t3
(
&
dExptermsTable
[
indexLower
[
2
]]);
fvec4
t4
(
&
dExptermsTable
[
indexLower
[
3
]]);
fvec4
t5
(
&
dExptermsTable
[
indexUpper
[
0
]]);
fvec4
t6
(
&
dExptermsTable
[
indexUpper
[
1
]]);
fvec4
t7
(
&
dExptermsTable
[
indexUpper
[
2
]]);
fvec4
t8
(
&
dExptermsTable
[
indexUpper
[
3
]]);
fvec8
s1
,
s2
,
s3
,
s4
;
transpose
(
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
s1
,
s2
,
s3
,
s4
);
return
coeff1
*
s1
+
coeff2
*
s2
;
}
#endif
#endif
platforms/cpu/src/CpuSETTLE.cpp
View file @
cd874b2b
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2013-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2013-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -35,52 +35,6 @@
...
@@ -35,52 +35,6 @@
using
namespace
OpenMM
;
using
namespace
OpenMM
;
using
namespace
std
;
using
namespace
std
;
class
CpuSETTLE
::
ApplyToPositionsTask
:
public
ThreadPool
::
Task
{
public:
ApplyToPositionsTask
(
vector
<
OpenMM
::
Vec3
>&
atomCoordinates
,
vector
<
OpenMM
::
Vec3
>&
atomCoordinatesP
,
vector
<
double
>&
inverseMasses
,
double
tolerance
,
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
)
:
atomCoordinates
(
atomCoordinates
),
atomCoordinatesP
(
atomCoordinatesP
),
inverseMasses
(
inverseMasses
),
tolerance
(
tolerance
),
threadSettle
(
threadSettle
)
{
gmx_atomic_set
(
&
atomicCounter
,
0
);
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
while
(
true
)
{
int
index
=
gmx_atomic_fetch_add
(
&
atomicCounter
,
1
);
if
(
index
>=
threadSettle
.
size
())
break
;
threadSettle
[
index
]
->
apply
(
atomCoordinates
,
atomCoordinatesP
,
inverseMasses
,
tolerance
);
}
}
vector
<
OpenMM
::
Vec3
>&
atomCoordinates
;
vector
<
OpenMM
::
Vec3
>&
atomCoordinatesP
;
vector
<
double
>&
inverseMasses
;
double
tolerance
;
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
;
gmx_atomic_t
atomicCounter
;
};
class
CpuSETTLE
::
ApplyToVelocitiesTask
:
public
ThreadPool
::
Task
{
public:
ApplyToVelocitiesTask
(
vector
<
OpenMM
::
Vec3
>&
atomCoordinates
,
vector
<
OpenMM
::
Vec3
>&
velocities
,
vector
<
double
>&
inverseMasses
,
double
tolerance
,
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
)
:
atomCoordinates
(
atomCoordinates
),
velocities
(
velocities
),
inverseMasses
(
inverseMasses
),
tolerance
(
tolerance
),
threadSettle
(
threadSettle
)
{
gmx_atomic_set
(
&
atomicCounter
,
0
);
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
while
(
true
)
{
int
index
=
gmx_atomic_fetch_add
(
&
atomicCounter
,
1
);
if
(
index
>=
threadSettle
.
size
())
break
;
threadSettle
[
index
]
->
applyToVelocities
(
atomCoordinates
,
velocities
,
inverseMasses
,
tolerance
);
}
}
vector
<
OpenMM
::
Vec3
>&
atomCoordinates
;
vector
<
OpenMM
::
Vec3
>&
velocities
;
vector
<
double
>&
inverseMasses
;
double
tolerance
;
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
;
gmx_atomic_t
atomicCounter
;
};
CpuSETTLE
::
CpuSETTLE
(
const
System
&
system
,
const
ReferenceSETTLEAlgorithm
&
settle
,
ThreadPool
&
threads
)
:
threads
(
threads
)
{
CpuSETTLE
::
CpuSETTLE
(
const
System
&
system
,
const
ReferenceSETTLEAlgorithm
&
settle
,
ThreadPool
&
threads
)
:
threads
(
threads
)
{
int
numBlocks
=
10
*
threads
.
getNumThreads
();
int
numBlocks
=
10
*
threads
.
getNumThreads
();
int
numClusters
=
settle
.
getNumClusters
();
int
numClusters
=
settle
.
getNumClusters
();
...
@@ -107,13 +61,29 @@ CpuSETTLE::~CpuSETTLE() {
...
@@ -107,13 +61,29 @@ CpuSETTLE::~CpuSETTLE() {
}
}
void
CpuSETTLE
::
apply
(
vector
<
OpenMM
::
Vec3
>&
atomCoordinates
,
vector
<
OpenMM
::
Vec3
>&
atomCoordinatesP
,
vector
<
double
>&
inverseMasses
,
double
tolerance
)
{
void
CpuSETTLE
::
apply
(
vector
<
OpenMM
::
Vec3
>&
atomCoordinates
,
vector
<
OpenMM
::
Vec3
>&
atomCoordinatesP
,
vector
<
double
>&
inverseMasses
,
double
tolerance
)
{
ApplyToPositionsTask
task
(
atomCoordinates
,
atomCoordinatesP
,
inverseMasses
,
tolerance
,
threadSettle
);
gmx_atomic_t
atomicCounter
;
threads
.
execute
(
task
);
gmx_atomic_set
(
&
atomicCounter
,
0
);
threads
.
execute
([
&
]
(
ThreadPool
&
threads
,
int
threadIndex
)
{
while
(
true
)
{
int
index
=
gmx_atomic_fetch_add
(
&
atomicCounter
,
1
);
if
(
index
>=
threadSettle
.
size
())
break
;
threadSettle
[
index
]
->
apply
(
atomCoordinates
,
atomCoordinatesP
,
inverseMasses
,
tolerance
);
}
});
threads
.
waitForThreads
();
threads
.
waitForThreads
();
}
}
void
CpuSETTLE
::
applyToVelocities
(
vector
<
OpenMM
::
Vec3
>&
atomCoordinates
,
vector
<
OpenMM
::
Vec3
>&
velocities
,
vector
<
double
>&
inverseMasses
,
double
tolerance
)
{
void
CpuSETTLE
::
applyToVelocities
(
vector
<
OpenMM
::
Vec3
>&
atomCoordinates
,
vector
<
OpenMM
::
Vec3
>&
velocities
,
vector
<
double
>&
inverseMasses
,
double
tolerance
)
{
ApplyToVelocitiesTask
task
(
atomCoordinates
,
velocities
,
inverseMasses
,
tolerance
,
threadSettle
);
gmx_atomic_t
atomicCounter
;
threads
.
execute
(
task
);
gmx_atomic_set
(
&
atomicCounter
,
0
);
threads
.
execute
([
&
]
(
ThreadPool
&
threads
,
int
threadIndex
)
{
while
(
true
)
{
int
index
=
gmx_atomic_fetch_add
(
&
atomicCounter
,
1
);
if
(
index
>=
threadSettle
.
size
())
break
;
threadSettle
[
index
]
->
applyToVelocities
(
atomCoordinates
,
velocities
,
inverseMasses
,
tolerance
);
}
});
threads
.
waitForThreads
();
threads
.
waitForThreads
();
}
}
platforms/cpu/staticTarget/CMakeLists.txt
View file @
cd874b2b
...
@@ -16,7 +16,6 @@ ENDFOREACH(file)
...
@@ -16,7 +16,6 @@ ENDFOREACH(file)
ADD_LIBRARY
(
${
STATIC_TARGET
}
STATIC
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
ADD_LIBRARY
(
${
STATIC_TARGET
}
STATIC
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
${
OPENMM_LIBRARY_NAME
}
_static
${
PTHREADS_LIB_STATIC
}
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
${
OPENMM_LIBRARY_NAME
}
_static
${
PTHREADS_LIB_STATIC
}
)
#-DPTW32_STATIC_LIB only works for the windows pthreads.
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_LINK_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_CPU_BUILDING_STATIC_LIBRARY"
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_LINK_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_CPU_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB"
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
STATIC_TARGET
}
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
STATIC_TARGET
}
)
platforms/cpu/tests/TestCpuDispersionPME.cpp
0 → 100644
View file @
cd874b2b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CpuTests.h"
#include "TestDispersionPME.h"
void
runPlatformTests
()
{
}
platforms/cuda/include/CudaContext.h
View file @
cd874b2b
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -494,6 +494,10 @@ public:
...
@@ -494,6 +494,10 @@ public:
CudaNonbondedUtilities
&
getNonbondedUtilities
()
{
CudaNonbondedUtilities
&
getNonbondedUtilities
()
{
return
*
nonbonded
;
return
*
nonbonded
;
}
}
/**
* Set the particle charges. These are packed into the fourth element of the posq array.
*/
void
setCharges
(
const
std
::
vector
<
double
>&
charges
);
/**
/**
* Get the thread used by this context for executing parallel computations.
* Get the thread used by this context for executing parallel computations.
*/
*/
...
@@ -577,6 +581,12 @@ public:
...
@@ -577,6 +581,12 @@ public:
* and order to be revalidated.
* and order to be revalidated.
*/
*/
void
invalidateMolecules
();
void
invalidateMolecules
();
/**
* Mark that the current molecule definitions from one particular force (and hence the atom order)
* may be invalid. This should be called whenever force field parameters change. It will cause the
* definitions and order to be revalidated.
*/
bool
invalidateMolecules
(
CudaForceInfo
*
force
);
private:
private:
/**
/**
* Compute a sorted list of device indices in decreasing order of desirability
* Compute a sorted list of device indices in decreasing order of desirability
...
@@ -626,6 +636,7 @@ private:
...
@@ -626,6 +636,7 @@ private:
CUfunction
clearFourBuffersKernel
;
CUfunction
clearFourBuffersKernel
;
CUfunction
clearFiveBuffersKernel
;
CUfunction
clearFiveBuffersKernel
;
CUfunction
clearSixBuffersKernel
;
CUfunction
clearSixBuffersKernel
;
CUfunction
setChargesKernel
;
std
::
vector
<
CudaForceInfo
*>
forces
;
std
::
vector
<
CudaForceInfo
*>
forces
;
std
::
vector
<
Molecule
>
molecules
;
std
::
vector
<
Molecule
>
molecules
;
std
::
vector
<
MoleculeGroup
>
moleculeGroups
;
std
::
vector
<
MoleculeGroup
>
moleculeGroups
;
...
@@ -638,6 +649,7 @@ private:
...
@@ -638,6 +649,7 @@ private:
CudaArray
*
energyBuffer
;
CudaArray
*
energyBuffer
;
CudaArray
*
energyParamDerivBuffer
;
CudaArray
*
energyParamDerivBuffer
;
CudaArray
*
atomIndexDevice
;
CudaArray
*
atomIndexDevice
;
CudaArray
*
chargeBuffer
;
std
::
vector
<
std
::
string
>
energyParamDerivNames
;
std
::
vector
<
std
::
string
>
energyParamDerivNames
;
std
::
map
<
std
::
string
,
double
>
energyParamDerivWorkspace
;
std
::
map
<
std
::
string
,
double
>
energyParamDerivWorkspace
;
std
::
vector
<
int
>
atomIndex
;
std
::
vector
<
int
>
atomIndex
;
...
...
platforms/cuda/include/CudaKernels.h
View file @
cd874b2b
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2008-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -198,7 +198,6 @@ public:
...
@@ -198,7 +198,6 @@ public:
*/
*/
void
loadCheckpoint
(
ContextImpl
&
context
,
std
::
istream
&
stream
);
void
loadCheckpoint
(
ContextImpl
&
context
,
std
::
istream
&
stream
);
private:
private:
class
GetPositionsTask
;
CudaContext
&
cu
;
CudaContext
&
cu
;
};
};
...
@@ -292,9 +291,11 @@ public:
...
@@ -292,9 +291,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
HarmonicBondForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
HarmonicBondForce
&
force
);
private:
private:
class
ForceInfo
;
int
numBonds
;
int
numBonds
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaArray
*
params
;
CudaArray
*
params
;
};
};
...
@@ -332,9 +333,11 @@ public:
...
@@ -332,9 +333,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomBondForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomBondForce
&
force
);
private:
private:
class
ForceInfo
;
int
numBonds
;
int
numBonds
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
*
globals
;
...
@@ -375,9 +378,11 @@ public:
...
@@ -375,9 +378,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
HarmonicAngleForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
HarmonicAngleForce
&
force
);
private:
private:
class
ForceInfo
;
int
numAngles
;
int
numAngles
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaArray
*
params
;
CudaArray
*
params
;
};
};
...
@@ -415,9 +420,11 @@ public:
...
@@ -415,9 +420,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomAngleForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomAngleForce
&
force
);
private:
private:
class
ForceInfo
;
int
numAngles
;
int
numAngles
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
*
globals
;
...
@@ -458,9 +465,11 @@ public:
...
@@ -458,9 +465,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
PeriodicTorsionForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
PeriodicTorsionForce
&
force
);
private:
private:
class
ForceInfo
;
int
numTorsions
;
int
numTorsions
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaArray
*
params
;
CudaArray
*
params
;
};
};
...
@@ -498,9 +507,11 @@ public:
...
@@ -498,9 +507,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
RBTorsionForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
RBTorsionForce
&
force
);
private:
private:
class
ForceInfo
;
int
numTorsions
;
int
numTorsions
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaArray
*
params1
;
CudaArray
*
params1
;
CudaArray
*
params2
;
CudaArray
*
params2
;
...
@@ -539,9 +550,11 @@ public:
...
@@ -539,9 +550,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CMAPTorsionForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CMAPTorsionForce
&
force
);
private:
private:
class
ForceInfo
;
int
numTorsions
;
int
numTorsions
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
std
::
vector
<
int2
>
mapPositionsVec
;
std
::
vector
<
int2
>
mapPositionsVec
;
CudaArray
*
coefficients
;
CudaArray
*
coefficients
;
...
@@ -582,9 +595,11 @@ public:
...
@@ -582,9 +595,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomTorsionForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomTorsionForce
&
force
);
private:
private:
class
ForceInfo
;
int
numTorsions
;
int
numTorsions
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
*
globals
;
...
@@ -599,7 +614,8 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
...
@@ -599,7 +614,8 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
public:
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeDispersionBsplineModuliX
(
NULL
),
pmeDispersionBsplineModuliY
(
NULL
),
pmeDispersionBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
}
}
~
CudaCalcNonbondedForceKernel
();
~
CudaCalcNonbondedForceKernel
();
/**
/**
...
@@ -636,6 +652,15 @@ public:
...
@@ -636,6 +652,15 @@ public:
* @param nz the number of grid points along the Z axis
* @param nz the number of grid points along the Z axis
*/
*/
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
private:
private:
class
SortTrait
:
public
CudaSort
::
SortTrait
{
class
SortTrait
:
public
CudaSort
::
SortTrait
{
int
getDataSize
()
const
{
return
8
;}
int
getDataSize
()
const
{
return
8
;}
...
@@ -647,12 +672,14 @@ private:
...
@@ -647,12 +672,14 @@ private:
const
char
*
getMaxValue
()
const
{
return
"make_int2(2147483647, 2147483647)"
;}
const
char
*
getMaxValue
()
const
{
return
"make_int2(2147483647, 2147483647)"
;}
const
char
*
getSortKey
()
const
{
return
"value.y"
;}
const
char
*
getSortKey
()
const
{
return
"value.y"
;}
};
};
class
ForceInfo
;
class
PmeIO
;
class
PmeIO
;
class
PmePreComputation
;
class
PmePreComputation
;
class
PmePostComputation
;
class
PmePostComputation
;
class
SyncStreamPreComputation
;
class
SyncStreamPreComputation
;
class
SyncStreamPostComputation
;
class
SyncStreamPostComputation
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
bool
hasInitializedFFT
;
bool
hasInitializedFFT
;
CudaArray
*
sigmaEpsilon
;
CudaArray
*
sigmaEpsilon
;
CudaArray
*
exceptionParams
;
CudaArray
*
exceptionParams
;
...
@@ -662,6 +689,9 @@ private:
...
@@ -662,6 +689,9 @@ private:
CudaArray
*
pmeBsplineModuliX
;
CudaArray
*
pmeBsplineModuliX
;
CudaArray
*
pmeBsplineModuliY
;
CudaArray
*
pmeBsplineModuliY
;
CudaArray
*
pmeBsplineModuliZ
;
CudaArray
*
pmeBsplineModuliZ
;
CudaArray
*
pmeDispersionBsplineModuliX
;
CudaArray
*
pmeDispersionBsplineModuliY
;
CudaArray
*
pmeDispersionBsplineModuliZ
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
pmeEnergyBuffer
;
CudaArray
*
pmeEnergyBuffer
;
...
@@ -673,20 +703,29 @@ private:
...
@@ -673,20 +703,29 @@ private:
CudaFFT3D
*
fft
;
CudaFFT3D
*
fft
;
cufftHandle
fftForward
;
cufftHandle
fftForward
;
cufftHandle
fftBackward
;
cufftHandle
fftBackward
;
CudaFFT3D
*
dispersionFft
;
cufftHandle
dispersionFftForward
;
cufftHandle
dispersionFftBackward
;
CUfunction
ewaldSumsKernel
;
CUfunction
ewaldSumsKernel
;
CUfunction
ewaldForcesKernel
;
CUfunction
ewaldForcesKernel
;
CUfunction
pmeGridIndexKernel
;
CUfunction
pmeGridIndexKernel
;
CUfunction
pmeDispersionGridIndexKernel
;
CUfunction
pmeSpreadChargeKernel
;
CUfunction
pmeSpreadChargeKernel
;
CUfunction
pmeDispersionSpreadChargeKernel
;
CUfunction
pmeFinishSpreadChargeKernel
;
CUfunction
pmeFinishSpreadChargeKernel
;
CUfunction
pmeDispersionFinishSpreadChargeKernel
;
CUfunction
pmeEvalEnergyKernel
;
CUfunction
pmeEvalEnergyKernel
;
CUfunction
pmeEvalDispersionEnergyKernel
;
CUfunction
pmeConvolutionKernel
;
CUfunction
pmeConvolutionKernel
;
CUfunction
pmeDispersionConvolutionKernel
;
CUfunction
pmeInterpolateForceKernel
;
CUfunction
pmeInterpolateForceKernel
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefi
ne
s
;
CUfunction
pmeInterpolateDispersionForceKer
ne
l
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
int
interpolateForceThreads
;
int
interpolateForceThreads
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
bool
hasCoulomb
,
hasLJ
,
usePmeStream
,
useCudaFFT
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
bool
hasCoulomb
,
hasLJ
,
usePmeStream
,
useCudaFFT
,
doLJPME
;
NonbondedMethod
nonbondedMethod
;
NonbondedMethod
nonbondedMethod
;
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
};
};
...
@@ -724,8 +763,10 @@ public:
...
@@ -724,8 +763,10 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
private:
private:
class
ForceInfo
;
void
initInteractionGroups
(
const
CustomNonbondedForce
&
force
,
const
std
::
string
&
interactionSource
,
const
std
::
vector
<
std
::
string
>&
tableTypes
);
void
initInteractionGroups
(
const
CustomNonbondedForce
&
force
,
const
std
::
string
&
interactionSource
,
const
std
::
vector
<
std
::
string
>&
tableTypes
);
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
*
globals
;
CudaArray
*
interactionGroupData
;
CudaArray
*
interactionGroupData
;
...
@@ -775,10 +816,12 @@ public:
...
@@ -775,10 +816,12 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
GBSAOBCForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
GBSAOBCForce
&
force
);
private:
private:
class
ForceInfo
;
double
prefactor
,
surfaceAreaFactor
,
cutoff
;
double
prefactor
,
surfaceAreaFactor
,
cutoff
;
bool
hasCreatedKernels
;
bool
hasCreatedKernels
;
int
maxTiles
;
int
maxTiles
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
CudaArray
*
params
;
CudaArray
*
params
;
CudaArray
*
bornSum
;
CudaArray
*
bornSum
;
CudaArray
*
bornRadii
;
CudaArray
*
bornRadii
;
...
@@ -825,10 +868,12 @@ public:
...
@@ -825,10 +868,12 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomGBForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomGBForce
&
force
);
private:
private:
class
ForceInfo
;
double
cutoff
;
double
cutoff
;
bool
hasInitializedKernels
,
needParameterGradient
,
needEnergyParamDerivs
;
bool
hasInitializedKernels
,
needParameterGradient
,
needEnergyParamDerivs
;
int
maxTiles
,
numComputedValues
;
int
maxTiles
,
numComputedValues
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaParameterSet
*
computedValues
;
CudaParameterSet
*
computedValues
;
CudaParameterSet
*
energyDerivs
;
CudaParameterSet
*
energyDerivs
;
...
@@ -882,9 +927,11 @@ public:
...
@@ -882,9 +927,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomExternalForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomExternalForce
&
force
);
private:
private:
class
ForceInfo
;
int
numParticles
;
int
numParticles
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
*
globals
;
...
@@ -926,9 +973,11 @@ public:
...
@@ -926,9 +973,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomHbondForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomHbondForce
&
force
);
private:
private:
class
ForceInfo
;
int
numDonors
,
numAcceptors
;
int
numDonors
,
numAcceptors
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
CudaParameterSet
*
donorParams
;
CudaParameterSet
*
donorParams
;
CudaParameterSet
*
acceptorParams
;
CudaParameterSet
*
acceptorParams
;
CudaArray
*
globals
;
CudaArray
*
globals
;
...
@@ -978,9 +1027,11 @@ public:
...
@@ -978,9 +1027,11 @@ public:
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomCentroidBondForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomCentroidBondForce
&
force
);
private:
private:
class
ForceInfo
;
int
numGroups
,
numBonds
;
int
numGroups
,
numBonds
;
bool
needEnergyParamDerivs
;
bool
needEnergyParamDerivs
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
*
globals
;
CudaArray
*
groupParticles
;
CudaArray
*
groupParticles
;
...
@@ -1031,8 +1082,10 @@ public:
...
@@ -1031,8 +1082,10 @@ public:
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomCompoundBondForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomCompoundBondForce
&
force
);
private:
private:
class
ForceInfo
;
int
numBonds
;
int
numBonds
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
*
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
...
@@ -1077,7 +1130,9 @@ public:
...
@@ -1077,7 +1130,9 @@ public:
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomManyParticleForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomManyParticleForce
&
force
);
private:
private:
class
ForceInfo
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
NonbondedMethod
nonbondedMethod
;
NonbondedMethod
nonbondedMethod
;
int
maxNeighborPairs
,
forceWorkgroupSize
,
findNeighborsWorkgroupSize
;
int
maxNeighborPairs
,
forceWorkgroupSize
,
findNeighborsWorkgroupSize
;
...
@@ -1139,9 +1194,11 @@ public:
...
@@ -1139,9 +1194,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
GayBerneForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
GayBerneForce
&
force
);
private:
private:
class
ForceInfo
;
class
ReorderListener
;
class
ReorderListener
;
void
sortAtoms
();
void
sortAtoms
();
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
bool
hasInitializedKernels
;
bool
hasInitializedKernels
;
int
numRealParticles
,
numExceptions
,
maxNeighborBlocks
;
int
numRealParticles
,
numExceptions
,
maxNeighborBlocks
;
GayBerneForce
::
NonbondedMethod
nonbondedMethod
;
GayBerneForce
::
NonbondedMethod
nonbondedMethod
;
...
...
platforms/cuda/include/CudaParallelKernels.h
View file @
cd874b2b
...
@@ -439,6 +439,15 @@ public:
...
@@ -439,6 +439,15 @@ public:
* @param nz the number of grid points along the Z axis
* @param nz the number of grid points along the Z axis
*/
*/
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
private:
private:
class
Task
;
class
Task
;
CudaPlatform
::
PlatformData
&
data
;
CudaPlatform
::
PlatformData
&
data
;
...
...
platforms/cuda/src/CudaContext.cpp
View file @
cd874b2b
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -108,7 +108,8 @@ static int executeInWindows(const string &command) {
...
@@ -108,7 +108,8 @@ static int executeInWindows(const string &command) {
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
currentStream
(
0
),
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
currentStream
(
0
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
hasCompilerKernel
(
false
),
isNvccAvailable
(
false
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
hasCompilerKernel
(
false
),
isNvccAvailable
(
false
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
force
(
NULL
),
energyBuffer
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
pinnedBuffer
(
NULL
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
force
(
NULL
),
energyBuffer
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
chargeBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
// Determine what compiler to use.
// Determine what compiler to use.
this
->
compiler
=
"
\"
"
+
compiler
+
"
\"
"
;
this
->
compiler
=
"
\"
"
+
compiler
+
"
\"
"
;
...
@@ -291,6 +292,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -291,6 +292,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
clearFourBuffersKernel
=
getKernel
(
utilities
,
"clearFourBuffers"
);
clearFourBuffersKernel
=
getKernel
(
utilities
,
"clearFourBuffers"
);
clearFiveBuffersKernel
=
getKernel
(
utilities
,
"clearFiveBuffers"
);
clearFiveBuffersKernel
=
getKernel
(
utilities
,
"clearFiveBuffers"
);
clearSixBuffersKernel
=
getKernel
(
utilities
,
"clearSixBuffers"
);
clearSixBuffersKernel
=
getKernel
(
utilities
,
"clearSixBuffers"
);
setChargesKernel
=
getKernel
(
utilities
,
"setCharges"
);
// Set defines based on the requested precision.
// Set defines based on the requested precision.
...
@@ -407,6 +409,8 @@ CudaContext::~CudaContext() {
...
@@ -407,6 +409,8 @@ CudaContext::~CudaContext() {
delete
energyParamDerivBuffer
;
delete
energyParamDerivBuffer
;
if
(
atomIndexDevice
!=
NULL
)
if
(
atomIndexDevice
!=
NULL
)
delete
atomIndexDevice
;
delete
atomIndexDevice
;
if
(
chargeBuffer
!=
NULL
)
delete
chargeBuffer
;
if
(
integration
!=
NULL
)
if
(
integration
!=
NULL
)
delete
integration
;
delete
integration
;
if
(
expression
!=
NULL
)
if
(
expression
!=
NULL
)
...
@@ -860,6 +864,25 @@ void CudaContext::clearAutoclearBuffers() {
...
@@ -860,6 +864,25 @@ void CudaContext::clearAutoclearBuffers() {
}
}
}
}
void
CudaContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
if
(
chargeBuffer
==
NULL
)
chargeBuffer
=
new
CudaArray
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
if
(
getUseDoublePrecision
())
{
double
*
c
=
(
double
*
)
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
charges
.
size
();
i
++
)
c
[
i
]
=
charges
[
i
];
chargeBuffer
->
upload
(
c
);
}
else
{
float
*
c
=
(
float
*
)
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
charges
.
size
();
i
++
)
c
[
i
]
=
(
float
)
charges
[
i
];
chargeBuffer
->
upload
(
c
);
}
void
*
args
[]
=
{
&
chargeBuffer
->
getDevicePointer
(),
&
posq
->
getDevicePointer
(),
&
atomIndexDevice
->
getDevicePointer
(),
&
numAtoms
};
executeKernel
(
setChargesKernel
,
args
,
numAtoms
);
}
/**
/**
* This class ensures that atom reordering doesn't break virtual sites.
* This class ensures that atom reordering doesn't break virtual sites.
*/
*/
...
@@ -1058,9 +1081,19 @@ void CudaContext::findMoleculeGroups() {
...
@@ -1058,9 +1081,19 @@ void CudaContext::findMoleculeGroups() {
}
}
void
CudaContext
::
invalidateMolecules
()
{
void
CudaContext
::
invalidateMolecules
()
{
if
(
numAtoms
==
0
||
nonbonded
==
NULL
||
!
nonbonded
->
getUseCutoff
())
for
(
int
i
=
0
;
i
<
forces
.
size
();
i
++
)
if
(
invalidateMolecules
(
forces
[
i
]))
return
;
return
;
}
bool
CudaContext
::
invalidateMolecules
(
CudaForceInfo
*
force
)
{
if
(
numAtoms
==
0
||
nonbonded
==
NULL
||
!
nonbonded
->
getUseCutoff
())
return
false
;
bool
valid
=
true
;
bool
valid
=
true
;
int
forceIndex
=
-
1
;
for
(
int
i
=
0
;
i
<
forces
.
size
();
i
++
)
if
(
forces
[
i
]
==
force
)
forceIndex
=
i
;
for
(
int
group
=
0
;
valid
&&
group
<
(
int
)
moleculeGroups
.
size
();
group
++
)
{
for
(
int
group
=
0
;
valid
&&
group
<
(
int
)
moleculeGroups
.
size
();
group
++
)
{
MoleculeGroup
&
mol
=
moleculeGroups
[
group
];
MoleculeGroup
&
mol
=
moleculeGroups
[
group
];
vector
<
int
>&
instances
=
mol
.
instances
;
vector
<
int
>&
instances
=
mol
.
instances
;
...
@@ -1075,22 +1108,21 @@ void CudaContext::invalidateMolecules() {
...
@@ -1075,22 +1108,21 @@ void CudaContext::invalidateMolecules() {
Molecule
&
m2
=
molecules
[
instances
[
j
]];
Molecule
&
m2
=
molecules
[
instances
[
j
]];
int
offset2
=
offsets
[
j
];
int
offset2
=
offsets
[
j
];
for
(
int
i
=
0
;
i
<
(
int
)
atoms
.
size
()
&&
valid
;
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
atoms
.
size
()
&&
valid
;
i
++
)
{
for
(
int
k
=
0
;
k
<
(
int
)
forces
.
size
();
k
++
)
if
(
!
force
->
areParticlesIdentical
(
atoms
[
i
]
+
offset1
,
atoms
[
i
]
+
offset2
))
if
(
!
forces
[
k
]
->
areParticlesIdentical
(
atoms
[
i
]
+
offset1
,
atoms
[
i
]
+
offset2
))
valid
=
false
;
valid
=
false
;
}
}
// See if the force groups are identical.
// See if the force groups are identical.
for
(
int
i
=
0
;
i
<
(
int
)
forces
.
size
()
&&
valid
;
i
++
)
{
if
(
valid
&&
forceIndex
>
-
1
)
{
for
(
int
k
=
0
;
k
<
(
int
)
m1
.
groups
[
i
].
size
()
&&
valid
;
k
++
)
for
(
int
k
=
0
;
k
<
(
int
)
m1
.
groups
[
forceIndex
].
size
()
&&
valid
;
k
++
)
if
(
!
force
s
[
i
]
->
areGroupsIdentical
(
m1
.
groups
[
i
][
k
],
m2
.
groups
[
i
][
k
]))
if
(
!
force
->
areGroupsIdentical
(
m1
.
groups
[
forceIndex
][
k
],
m2
.
groups
[
forceIndex
][
k
]))
valid
=
false
;
valid
=
false
;
}
}
}
}
}
}
if
(
valid
)
if
(
valid
)
return
;
return
false
;
// The list of which molecules are identical is no longer valid. We need to restore the
// The list of which molecules are identical is no longer valid. We need to restore the
// atoms to their original order, rebuild the list of identical molecules, and sort them
// atoms to their original order, rebuild the list of identical molecules, and sort them
...
@@ -1158,6 +1190,7 @@ void CudaContext::invalidateMolecules() {
...
@@ -1158,6 +1190,7 @@ void CudaContext::invalidateMolecules() {
for
(
int
i
=
0
;
i
<
(
int
)
reorderListeners
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
reorderListeners
.
size
();
i
++
)
reorderListeners
[
i
]
->
execute
();
reorderListeners
[
i
]
->
execute
();
reorderAtoms
();
reorderAtoms
();
return
true
;
}
}
void
CudaContext
::
reorderAtoms
()
{
void
CudaContext
::
reorderAtoms
()
{
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
cd874b2b
This diff is collapsed.
Click to expand it.
platforms/cuda/src/CudaParallelKernels.cpp
View file @
cd874b2b
...
@@ -628,6 +628,10 @@ void CudaParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int&
...
@@ -628,6 +628,10 @@ void CudaParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int&
dynamic_cast
<
const
CudaCalcNonbondedForceKernel
&>
(
kernels
[
0
].
getImpl
()).
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
dynamic_cast
<
const
CudaCalcNonbondedForceKernel
&>
(
kernels
[
0
].
getImpl
()).
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
}
}
void
CudaParallelCalcNonbondedForceKernel
::
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
dynamic_cast
<
const
CudaCalcNonbondedForceKernel
&>
(
kernels
[
0
].
getImpl
()).
getLJPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
}
class
CudaParallelCalcCustomNonbondedForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
class
CudaParallelCalcCustomNonbondedForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
public:
Task
(
ContextImpl
&
context
,
CudaCalcCustomNonbondedForceKernel
&
kernel
,
bool
includeForce
,
Task
(
ContextImpl
&
context
,
CudaCalcCustomNonbondedForceKernel
&
kernel
,
bool
includeForce
,
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
cd874b2b
...
@@ -247,6 +247,10 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
...
@@ -247,6 +247,10 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
CHECK_RESULT
(
cuDeviceGetName
(
name
,
1000
,
contexts
[
i
]
->
getDevice
()),
"Error querying device name"
);
CHECK_RESULT
(
cuDeviceGetName
(
name
,
1000
,
contexts
[
i
]
->
getDevice
()),
"Error querying device name"
);
deviceName
<<
name
;
deviceName
<<
name
;
}
}
size_t
printfsize
;
cuCtxGetLimit
(
&
printfsize
,
CU_LIMIT_PRINTF_FIFO_SIZE
);
cuCtxSetLimit
(
CU_LIMIT_PRINTF_FIFO_SIZE
,
10
*
printfsize
);
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
disablePmeStream
=
(
pmeStreamProperty
==
"true"
);
disablePmeStream
=
(
pmeStreamProperty
==
"true"
);
deterministicForces
=
(
deterministicForcesProperty
==
"true"
);
deterministicForces
=
(
deterministicForcesProperty
==
"true"
);
...
...
platforms/cuda/src/kernels/coulombLennardJones.cu
View file @
cd874b2b
...
@@ -17,6 +17,26 @@
...
@@ -17,6 +17,26 @@
const
real
erfcAlphaR
=
(
0.254829592
f
+
(
-
0.284496736
f
+
(
1.421413741
f
+
(
-
1.453152027
f
+
1.061405429
f
*
t
)
*
t
)
*
t
)
*
t
)
*
t
*
expAlphaRSqr
;
const
real
erfcAlphaR
=
(
0.254829592
f
+
(
-
0.284496736
f
+
(
1.421413741
f
+
(
-
1.453152027
f
+
1.061405429
f
*
t
)
*
t
)
*
t
)
*
t
)
*
t
*
expAlphaRSqr
;
#endif
#endif
real
tempForce
=
0.0
f
;
real
tempForce
=
0.0
f
;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA
*
r
;
const
real
dar2
=
dispersionAlphaR
*
dispersionAlphaR
;
const
real
dar4
=
dar2
*
dar2
;
const
real
dar6
=
dar4
*
dar2
;
const
real
invR2
=
invR
*
invR
;
const
real
expDar2
=
EXP
(
-
dar2
);
const
float2
sigExpProd
=
sigmaEpsilon1
*
sigmaEpsilon2
;
const
real
c6
=
64
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
y
;
const
real
coef
=
invR2
*
invR2
*
invR2
*
c6
;
const
real
eprefac
=
1.0
f
+
dar2
+
0.5
f
*
dar4
;
const
real
dprefac
=
eprefac
+
dar6
/
6.0
f
;
#endif
#endif
if
(
needCorrection
)
{
if
(
needCorrection
)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
...
@@ -29,6 +49,13 @@
...
@@ -29,6 +49,13 @@
includeInteraction
=
false
;
includeInteraction
=
false
;
tempEnergy
-=
TWO_OVER_SQRT_PI
*
EWALD_ALPHA
*
138.935456
f
*
posq1
.
w
*
posq2
.
w
;
tempEnergy
-=
TWO_OVER_SQRT_PI
*
EWALD_ALPHA
*
138.935456
f
*
posq1
.
w
*
posq2
.
w
;
}
}
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
#endif
#endif
}
}
else
{
else
{
#if HAS_LENNARD_JONES
#if HAS_LENNARD_JONES
...
@@ -36,7 +63,8 @@
...
@@ -36,7 +63,8 @@
real
sig2
=
invR
*
sig
;
real
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
real
sig6
=
sig2
*
sig2
*
sig2
;
real
sig6
=
sig2
*
sig2
*
sig2
;
real
epssig6
=
sig6
*
(
sigmaEpsilon1
.
y
*
sigmaEpsilon2
.
y
);
real
eps
=
sigmaEpsilon1
.
y
*
sigmaEpsilon2
.
y
;
real
epssig6
=
sig6
*
eps
;
tempForce
=
epssig6
*
(
12.0
f
*
sig6
-
6.0
f
);
tempForce
=
epssig6
*
(
12.0
f
*
sig6
-
6.0
f
);
real
ljEnergy
=
epssig6
*
(
sig6
-
1.0
f
);
real
ljEnergy
=
epssig6
*
(
sig6
-
1.0
f
);
#if USE_LJ_SWITCH
#if USE_LJ_SWITCH
...
@@ -48,6 +76,22 @@
...
@@ -48,6 +76,22 @@
ljEnergy
*=
switchValue
;
ljEnergy
*=
switchValue
;
}
}
#endif
#endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2
=
sig
*
sig
;
sig6
=
sig2
*
sig2
*
sig2
*
INVCUT6
;
epssig6
=
eps
*
sig6
;
// The additive part of the potential shift
ljEnergy
+=
epssig6
*
(
1.0
f
-
sig6
);
// The multiplicative part of the potential shift
ljEnergy
+=
MULTSHIFT6
*
c6
;
#endif
tempForce
+=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempForce
+=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
includeInteraction
?
ljEnergy
+
prefactor
*
erfcAlphaR
:
0
;
tempEnergy
+=
includeInteraction
?
ljEnergy
+
prefactor
*
erfcAlphaR
:
0
;
#else
#else
...
...
platforms/cuda/src/kernels/pme.cu
View file @
cd874b2b
...
@@ -21,7 +21,11 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
...
@@ -21,7 +21,11 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
extern
"C"
__global__
void
gridSpreadCharge
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
extern
"C"
__global__
void
gridSpreadCharge
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
#ifdef USE_LJPME
,
const
float2
*
__restrict__
sigmaEpsilon
#endif
)
{
real3
data
[
PME_ORDER
];
real3
data
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
@@ -63,6 +67,12 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
...
@@ -63,6 +67,12 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
// Spread the charge from this atom onto each grid point.
// Spread the charge from this atom onto each grid point.
#ifdef USE_LJPME
const
float2
sigEps
=
sigmaEpsilon
[
atom
];
const
real
charge
=
8
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
y
;
#else
const
real
charge
=
pos
.
w
;
#endif
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
xbase
=
gridIndex
.
x
+
ix
;
int
xbase
=
gridIndex
.
x
+
ix
;
xbase
-=
(
xbase
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
);
xbase
-=
(
xbase
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
);
...
@@ -80,7 +90,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
...
@@ -80,7 +90,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
);
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
);
int
index
=
ybase
+
zindex
;
int
index
=
ybase
+
zindex
;
real
add
=
pos
.
w
*
dx
*
dy
*
data
[
iz
].
z
;
real
add
=
charge
*
dx
*
dy
*
data
[
iz
].
z
;
#ifdef USE_DOUBLE_PRECISION
#ifdef USE_DOUBLE_PRECISION
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
...
@@ -121,7 +131,15 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict
...
@@ -121,7 +131,15 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
// R2C stores into a half complex matrix where the last dimension is cut by half
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
);
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
);
#ifdef USE_LJPME
const
real
recipScaleFactor
=
-
2
*
M_PI
*
SQRT
(
M_PI
)
*
RECIP
(
6
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
real
bfac
=
M_PI
/
EWALD_ALPHA
;
real
fac1
=
2
*
M_PI
*
M_PI
*
M_PI
*
SQRT
(
M_PI
);
real
fac2
=
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
;
real
fac3
=
-
2
*
EWALD_ALPHA
*
M_PI
*
M_PI
;
#else
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
#endif
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// real indices
// real indices
...
@@ -140,12 +158,23 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict
...
@@ -140,12 +158,23 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict
real
bz
=
pmeBsplineModuliZ
[
kz
];
real
bz
=
pmeBsplineModuliZ
[
kz
];
real2
grid
=
halfcomplex_pmeGrid
[
index
];
real2
grid
=
halfcomplex_pmeGrid
[
index
];
real
m2
=
mhx
*
mhx
+
mhy
*
mhy
+
mhz
*
mhz
;
real
m2
=
mhx
*
mhx
+
mhy
*
mhy
+
mhz
*
mhz
;
#ifdef USE_LJPME
real
denom
=
recipScaleFactor
/
(
bx
*
by
*
bz
);
real
m
=
SQRT
(
m2
);
real
m3
=
m
*
m2
;
real
b
=
bfac
*
m
;
real
expfac
=
-
b
*
b
;
real
expterm
=
EXP
(
expfac
);
real
erfcterm
=
ERFC
(
b
);
real
eterm
=
(
fac1
*
erfcterm
*
m3
+
expterm
*
(
fac2
+
fac3
*
m2
))
*
denom
;
halfcomplex_pmeGrid
[
index
]
=
make_real2
(
grid
.
x
*
eterm
,
grid
.
y
*
eterm
);
#else
real
denom
=
m2
*
bx
*
by
*
bz
;
real
denom
=
m2
*
bx
*
by
*
bz
;
real
eterm
=
recipScaleFactor
*
EXP
(
-
RECIP_EXP_FACTOR
*
m2
)
/
denom
;
real
eterm
=
recipScaleFactor
*
EXP
(
-
RECIP_EXP_FACTOR
*
m2
)
/
denom
;
if
(
kx
!=
0
||
ky
!=
0
||
kz
!=
0
)
{
if
(
kx
!=
0
||
ky
!=
0
||
kz
!=
0
)
{
halfcomplex_pmeGrid
[
index
]
=
make_real2
(
grid
.
x
*
eterm
,
grid
.
y
*
eterm
);
halfcomplex_pmeGrid
[
index
]
=
make_real2
(
grid
.
x
*
eterm
,
grid
.
y
*
eterm
);
}
}
#endif
}
}
}
}
...
@@ -156,7 +185,15 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
...
@@ -156,7 +185,15 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
// R2C stores into a half complex matrix where the last dimension is cut by half
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
#ifdef USE_LJPME
const
real
recipScaleFactor
=
-
2
*
M_PI
*
SQRT
(
M_PI
)
*
RECIP
(
6
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
real
bfac
=
M_PI
/
EWALD_ALPHA
;
real
fac1
=
2
*
M_PI
*
M_PI
*
M_PI
*
SQRT
(
M_PI
);
real
fac2
=
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
;
real
fac3
=
-
2
*
EWALD_ALPHA
*
M_PI
*
M_PI
;
#else
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
#endif
mixed
energy
=
0
;
mixed
energy
=
0
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
...
@@ -175,8 +212,19 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
...
@@ -175,8 +212,19 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
real
bx
=
pmeBsplineModuliX
[
kx
];
real
bx
=
pmeBsplineModuliX
[
kx
];
real
by
=
pmeBsplineModuliY
[
ky
];
real
by
=
pmeBsplineModuliY
[
ky
];
real
bz
=
pmeBsplineModuliZ
[
kz
];
real
bz
=
pmeBsplineModuliZ
[
kz
];
#ifdef USE_LJPME
real
denom
=
recipScaleFactor
/
(
bx
*
by
*
bz
);
real
m
=
SQRT
(
m2
);
real
m3
=
m
*
m2
;
real
b
=
bfac
*
m
;
real
expfac
=
-
b
*
b
;
real
expterm
=
EXP
(
expfac
);
real
erfcterm
=
ERFC
(
b
);
real
eterm
=
(
fac1
*
erfcterm
*
m3
+
expterm
*
(
fac2
+
fac3
*
m2
))
*
denom
;
#else
real
denom
=
m2
*
bx
*
by
*
bz
;
real
denom
=
m2
*
bx
*
by
*
bz
;
real
eterm
=
recipScaleFactor
*
EXP
(
-
RECIP_EXP_FACTOR
*
m2
)
/
denom
;
real
eterm
=
recipScaleFactor
*
EXP
(
-
RECIP_EXP_FACTOR
*
m2
)
/
denom
;
#endif
if
(
kz
>=
(
GRID_SIZE_Z
/
2
+
1
))
{
if
(
kz
>=
(
GRID_SIZE_Z
/
2
+
1
))
{
kx
=
((
kx
==
0
)
?
kx
:
GRID_SIZE_X
-
kx
);
kx
=
((
kx
==
0
)
?
kx
:
GRID_SIZE_X
-
kx
);
...
@@ -185,11 +233,12 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
...
@@ -185,11 +233,12 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
}
}
int
indexInHalfComplexGrid
=
kz
+
ky
*
(
GRID_SIZE_Z
/
2
+
1
)
+
kx
*
(
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
));
int
indexInHalfComplexGrid
=
kz
+
ky
*
(
GRID_SIZE_Z
/
2
+
1
)
+
kx
*
(
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
));
real2
grid
=
halfcomplex_pmeGrid
[
indexInHalfComplexGrid
];
real2
grid
=
halfcomplex_pmeGrid
[
indexInHalfComplexGrid
];
if
(
kx
!=
0
||
ky
!=
0
||
kz
!=
0
)
{
#ifndef USE_LJPME
if
(
kx
!=
0
||
ky
!=
0
||
kz
!=
0
)
#endif
energy
+=
eterm
*
(
grid
.
x
*
grid
.
x
+
grid
.
y
*
grid
.
y
);
energy
+=
eterm
*
(
grid
.
x
*
grid
.
x
+
grid
.
y
*
grid
.
y
);
}
}
}
#if defined(USE_PME_STREAM) && !defined(USE_LJPME)
#ifdef USE_PME_STREAM
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
=
0.5
f
*
energy
;
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
=
0.5
f
*
energy
;
#else
#else
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
0.5
f
*
energy
;
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
0.5
f
*
energy
;
...
@@ -199,7 +248,11 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
...
@@ -199,7 +248,11 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
extern
"C"
__global__
extern
"C"
__global__
void
gridInterpolateForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
void
gridInterpolateForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
#ifdef USE_LJPME
,
const
float2
*
__restrict__
sigmaEpsilon
#endif
)
{
real3
data
[
PME_ORDER
];
real3
data
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
@@ -271,7 +324,12 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
...
@@ -271,7 +324,12 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
}
}
}
}
}
}
#ifdef USE_LJPME
const
float2
sigEps
=
sigmaEpsilon
[
atom
];
real
q
=
8
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
y
;
#else
real
q
=
pos
.
w
*
EPSILON_FACTOR
;
real
q
=
pos
.
w
*
EPSILON_FACTOR
;
#endif
real
forceX
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecX
.
x
);
real
forceX
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecX
.
x
);
real
forceY
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecY
.
y
);
real
forceY
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecY
.
y
);
real
forceZ
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
real
forceZ
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
...
...
platforms/cuda/src/kernels/utilities.cu
View file @
cd874b2b
...
@@ -73,4 +73,11 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res
...
@@ -73,4 +73,11 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res
clearSingleBuffer
(
buffer6
,
size6
);
clearSingleBuffer
(
buffer6
,
size6
);
}
}
/**
* Record the atomic charges into the posq array.
*/
__global__
void
setCharges
(
real
*
__restrict__
charges
,
real4
*
__restrict__
posq
,
int
*
__restrict__
atomOrder
,
int
numAtoms
)
{
for
(
int
i
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
i
<
numAtoms
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
posq
[
i
].
w
=
charges
[
atomOrder
[
i
]];
}
}
}
\ No newline at end of file
platforms/cuda/staticTarget/CMakeLists.txt
View file @
cd874b2b
...
@@ -14,8 +14,7 @@ SET_SOURCE_FILES_PROPERTIES(${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H} PROPERTIES GEN
...
@@ -14,8 +14,7 @@ SET_SOURCE_FILES_PROPERTIES(${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H} PROPERTIES GEN
ADD_LIBRARY
(
${
STATIC_TARGET
}
STATIC
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
ADD_LIBRARY
(
${
STATIC_TARGET
}
STATIC
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
${
OPENMM_LIBRARY_NAME
}
${
CUDA_CUDA_LIBRARY
}
${
CUDA_cufft_LIBRARY
}
${
PTHREADS_LIB_STATIC
}
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
${
OPENMM_LIBRARY_NAME
}
${
CUDA_CUDA_LIBRARY
}
${
CUDA_cufft_LIBRARY
}
${
PTHREADS_LIB_STATIC
}
)
#-DPTW32_STATIC_LIB only works for the windows pthreads.
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_CUDA_BUILDING_STATIC_LIBRARY"
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_CUDA_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB"
)
IF
(
APPLE
)
IF
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-F/Library/Frameworks -framework CUDA"
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-F/Library/Frameworks -framework CUDA"
)
ELSE
(
APPLE
)
ELSE
(
APPLE
)
...
...
platforms/cuda/tests/TestCudaDispersionPME.cpp
0 → 100644
View file @
cd874b2b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaTests.h"
#include "TestDispersionPME.h"
void
runPlatformTests
()
{
}
platforms/opencl/include/OpenCLContext.h
View file @
cd874b2b
...
@@ -609,6 +609,10 @@ public:
...
@@ -609,6 +609,10 @@ public:
OpenCLNonbondedUtilities
&
getNonbondedUtilities
()
{
OpenCLNonbondedUtilities
&
getNonbondedUtilities
()
{
return
*
nonbonded
;
return
*
nonbonded
;
}
}
/**
* Set the particle charges. These are packed into the fourth element of the posq array.
*/
void
setCharges
(
const
std
::
vector
<
double
>&
charges
);
/**
/**
* Get the thread used by this context for executing parallel computations.
* Get the thread used by this context for executing parallel computations.
*/
*/
...
@@ -692,6 +696,12 @@ public:
...
@@ -692,6 +696,12 @@ public:
* and order to be revalidated.
* and order to be revalidated.
*/
*/
void
invalidateMolecules
();
void
invalidateMolecules
();
/**
* Mark that the current molecule definitions from one particular force (and hence the atom order)
* may be invalid. This should be called whenever force field parameters change. It will cause the
* definitions and order to be revalidated.
*/
bool
invalidateMolecules
(
OpenCLForceInfo
*
force
);
private:
private:
struct
Molecule
;
struct
Molecule
;
struct
MoleculeGroup
;
struct
MoleculeGroup
;
...
@@ -739,6 +749,7 @@ private:
...
@@ -739,6 +749,7 @@ private:
cl
::
Kernel
clearSixBuffersKernel
;
cl
::
Kernel
clearSixBuffersKernel
;
cl
::
Kernel
reduceReal4Kernel
;
cl
::
Kernel
reduceReal4Kernel
;
cl
::
Kernel
reduceForcesKernel
;
cl
::
Kernel
reduceForcesKernel
;
cl
::
Kernel
setChargesKernel
;
std
::
vector
<
OpenCLForceInfo
*>
forces
;
std
::
vector
<
OpenCLForceInfo
*>
forces
;
std
::
vector
<
Molecule
>
molecules
;
std
::
vector
<
Molecule
>
molecules
;
std
::
vector
<
MoleculeGroup
>
moleculeGroups
;
std
::
vector
<
MoleculeGroup
>
moleculeGroups
;
...
@@ -754,6 +765,7 @@ private:
...
@@ -754,6 +765,7 @@ private:
OpenCLArray
*
energyBuffer
;
OpenCLArray
*
energyBuffer
;
OpenCLArray
*
energyParamDerivBuffer
;
OpenCLArray
*
energyParamDerivBuffer
;
OpenCLArray
*
atomIndexDevice
;
OpenCLArray
*
atomIndexDevice
;
OpenCLArray
*
chargeBuffer
;
std
::
vector
<
std
::
string
>
energyParamDerivNames
;
std
::
vector
<
std
::
string
>
energyParamDerivNames
;
std
::
map
<
std
::
string
,
double
>
energyParamDerivWorkspace
;
std
::
map
<
std
::
string
,
double
>
energyParamDerivWorkspace
;
std
::
vector
<
int
>
atomIndex
;
std
::
vector
<
int
>
atomIndex
;
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
cd874b2b
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2008-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -176,7 +176,6 @@ public:
...
@@ -176,7 +176,6 @@ public:
*/
*/
void
loadCheckpoint
(
ContextImpl
&
context
,
std
::
istream
&
stream
);
void
loadCheckpoint
(
ContextImpl
&
context
,
std
::
istream
&
stream
);
private:
private:
class
GetPositionsTask
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
};
};
...
@@ -270,9 +269,11 @@ public:
...
@@ -270,9 +269,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
HarmonicBondForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
HarmonicBondForce
&
force
);
private:
private:
class
ForceInfo
;
int
numBonds
;
int
numBonds
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLArray
*
params
;
OpenCLArray
*
params
;
};
};
...
@@ -310,9 +311,11 @@ public:
...
@@ -310,9 +311,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomBondForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomBondForce
&
force
);
private:
private:
class
ForceInfo
;
int
numBonds
;
int
numBonds
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
*
globals
;
...
@@ -353,9 +356,11 @@ public:
...
@@ -353,9 +356,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
HarmonicAngleForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
HarmonicAngleForce
&
force
);
private:
private:
class
ForceInfo
;
int
numAngles
;
int
numAngles
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLArray
*
params
;
OpenCLArray
*
params
;
};
};
...
@@ -393,9 +398,11 @@ public:
...
@@ -393,9 +398,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomAngleForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomAngleForce
&
force
);
private:
private:
class
ForceInfo
;
int
numAngles
;
int
numAngles
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
*
globals
;
...
@@ -436,9 +443,11 @@ public:
...
@@ -436,9 +443,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
PeriodicTorsionForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
PeriodicTorsionForce
&
force
);
private:
private:
class
ForceInfo
;
int
numTorsions
;
int
numTorsions
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLArray
*
params
;
OpenCLArray
*
params
;
};
};
...
@@ -476,9 +485,11 @@ public:
...
@@ -476,9 +485,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
RBTorsionForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
RBTorsionForce
&
force
);
private:
private:
class
ForceInfo
;
int
numTorsions
;
int
numTorsions
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLArray
*
params
;
OpenCLArray
*
params
;
};
};
...
@@ -516,9 +527,11 @@ public:
...
@@ -516,9 +527,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CMAPTorsionForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CMAPTorsionForce
&
force
);
private:
private:
class
ForceInfo
;
int
numTorsions
;
int
numTorsions
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
std
::
vector
<
mm_int2
>
mapPositionsVec
;
std
::
vector
<
mm_int2
>
mapPositionsVec
;
OpenCLArray
*
coefficients
;
OpenCLArray
*
coefficients
;
...
@@ -559,9 +572,11 @@ public:
...
@@ -559,9 +572,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomTorsionForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomTorsionForce
&
force
);
private:
private:
class
ForceInfo
;
int
numTorsions
;
int
numTorsions
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
*
globals
;
...
@@ -576,8 +591,9 @@ class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
...
@@ -576,8 +591,9 @@ class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
public:
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeDispersionBsplineModuliX
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
pmeDispersionBsplineModuliY
(
NULL
),
pmeDispersionBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
dispersionFft
(
NULL
),
pmeio
(
NULL
)
{
}
}
~
OpenCLCalcNonbondedForceKernel
();
~
OpenCLCalcNonbondedForceKernel
();
/**
/**
...
@@ -614,6 +630,15 @@ public:
...
@@ -614,6 +630,15 @@ public:
* @param nz the number of grid points along the Z axis
* @param nz the number of grid points along the Z axis
*/
*/
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
private:
private:
class
SortTrait
:
public
OpenCLSort
::
SortTrait
{
class
SortTrait
:
public
OpenCLSort
::
SortTrait
{
int
getDataSize
()
const
{
return
8
;}
int
getDataSize
()
const
{
return
8
;}
...
@@ -625,12 +650,14 @@ private:
...
@@ -625,12 +650,14 @@ private:
const
char
*
getMaxValue
()
const
{
return
"(int2) (INT_MAX, INT_MAX)"
;}
const
char
*
getMaxValue
()
const
{
return
"(int2) (INT_MAX, INT_MAX)"
;}
const
char
*
getSortKey
()
const
{
return
"value.y"
;}
const
char
*
getSortKey
()
const
{
return
"value.y"
;}
};
};
class
ForceInfo
;
class
PmeIO
;
class
PmeIO
;
class
PmePreComputation
;
class
PmePreComputation
;
class
PmePostComputation
;
class
PmePostComputation
;
class
SyncQueuePreComputation
;
class
SyncQueuePreComputation
;
class
SyncQueuePostComputation
;
class
SyncQueuePostComputation
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
OpenCLArray
*
sigmaEpsilon
;
OpenCLArray
*
sigmaEpsilon
;
OpenCLArray
*
exceptionParams
;
OpenCLArray
*
exceptionParams
;
...
@@ -640,6 +667,9 @@ private:
...
@@ -640,6 +667,9 @@ private:
OpenCLArray
*
pmeBsplineModuliX
;
OpenCLArray
*
pmeBsplineModuliX
;
OpenCLArray
*
pmeBsplineModuliY
;
OpenCLArray
*
pmeBsplineModuliY
;
OpenCLArray
*
pmeBsplineModuliZ
;
OpenCLArray
*
pmeBsplineModuliZ
;
OpenCLArray
*
pmeDispersionBsplineModuliX
;
OpenCLArray
*
pmeDispersionBsplineModuliY
;
OpenCLArray
*
pmeDispersionBsplineModuliZ
;
OpenCLArray
*
pmeBsplineTheta
;
OpenCLArray
*
pmeBsplineTheta
;
OpenCLArray
*
pmeAtomRange
;
OpenCLArray
*
pmeAtomRange
;
OpenCLArray
*
pmeAtomGridIndex
;
OpenCLArray
*
pmeAtomGridIndex
;
...
@@ -648,25 +678,34 @@ private:
...
@@ -648,25 +678,34 @@ private:
cl
::
CommandQueue
pmeQueue
;
cl
::
CommandQueue
pmeQueue
;
cl
::
Event
pmeSyncEvent
;
cl
::
Event
pmeSyncEvent
;
OpenCLFFT3D
*
fft
;
OpenCLFFT3D
*
fft
;
OpenCLFFT3D
*
dispersionFft
;
Kernel
cpuPme
;
Kernel
cpuPme
;
PmeIO
*
pmeio
;
PmeIO
*
pmeio
;
SyncQueuePostComputation
*
syncQueue
;
SyncQueuePostComputation
*
syncQueue
;
cl
::
Kernel
ewaldSumsKernel
;
cl
::
Kernel
ewaldSumsKernel
;
cl
::
Kernel
ewaldForcesKernel
;
cl
::
Kernel
ewaldForcesKernel
;
cl
::
Kernel
pmeGridIndexKernel
;
cl
::
Kernel
pmeAtomRangeKernel
;
cl
::
Kernel
pmeAtomRangeKernel
;
cl
::
Kernel
pmeDispersionAtomRangeKernel
;
cl
::
Kernel
pmeZIndexKernel
;
cl
::
Kernel
pmeZIndexKernel
;
cl
::
Kernel
pmeDispersionZIndexKernel
;
cl
::
Kernel
pmeUpdateBsplinesKernel
;
cl
::
Kernel
pmeUpdateBsplinesKernel
;
cl
::
Kernel
pmeDispersionUpdateBsplinesKernel
;
cl
::
Kernel
pmeSpreadChargeKernel
;
cl
::
Kernel
pmeSpreadChargeKernel
;
cl
::
Kernel
pmeDispersionSpreadChargeKernel
;
cl
::
Kernel
pmeFinishSpreadChargeKernel
;
cl
::
Kernel
pmeFinishSpreadChargeKernel
;
cl
::
Kernel
pmeDispersionFinishSpreadChargeKernel
;
cl
::
Kernel
pmeConvolutionKernel
;
cl
::
Kernel
pmeConvolutionKernel
;
cl
::
Kernel
pmeDispersionConvolutionKernel
;
cl
::
Kernel
pmeEvalEnergyKernel
;
cl
::
Kernel
pmeEvalEnergyKernel
;
cl
::
Kernel
pmeDispersionEvalEnergyKernel
;
cl
::
Kernel
pmeInterpolateForceKernel
;
cl
::
Kernel
pmeInterpolateForceKernel
;
cl
::
Kernel
pmeDispersionInterpolateForceKernel
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
bool
hasCoulomb
,
hasLJ
,
usePmeQueue
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
bool
hasCoulomb
,
hasLJ
,
usePmeQueue
,
doLJPME
;
NonbondedMethod
nonbondedMethod
;
NonbondedMethod
nonbondedMethod
;
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
};
};
...
@@ -704,8 +743,10 @@ public:
...
@@ -704,8 +743,10 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
private:
private:
class
ForceInfo
;
void
initInteractionGroups
(
const
CustomNonbondedForce
&
force
,
const
std
::
string
&
interactionSource
,
const
std
::
vector
<
std
::
string
>&
tableTypes
);
void
initInteractionGroups
(
const
CustomNonbondedForce
&
force
,
const
std
::
string
&
interactionSource
,
const
std
::
vector
<
std
::
string
>&
tableTypes
);
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
*
globals
;
OpenCLArray
*
interactionGroupData
;
OpenCLArray
*
interactionGroupData
;
...
@@ -756,10 +797,12 @@ public:
...
@@ -756,10 +797,12 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
GBSAOBCForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
GBSAOBCForce
&
force
);
private:
private:
class
ForceInfo
;
double
prefactor
,
surfaceAreaFactor
,
cutoff
;
double
prefactor
,
surfaceAreaFactor
,
cutoff
;
bool
hasCreatedKernels
;
bool
hasCreatedKernels
;
int
maxTiles
;
int
maxTiles
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
OpenCLArray
*
params
;
OpenCLArray
*
params
;
OpenCLArray
*
bornSum
;
OpenCLArray
*
bornSum
;
OpenCLArray
*
longBornSum
;
OpenCLArray
*
longBornSum
;
...
@@ -807,10 +850,12 @@ public:
...
@@ -807,10 +850,12 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomGBForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomGBForce
&
force
);
private:
private:
class
ForceInfo
;
double
cutoff
;
double
cutoff
;
bool
hasInitializedKernels
,
needParameterGradient
,
needEnergyParamDerivs
;
bool
hasInitializedKernels
,
needParameterGradient
,
needEnergyParamDerivs
;
int
maxTiles
,
numComputedValues
;
int
maxTiles
,
numComputedValues
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
computedValues
;
OpenCLParameterSet
*
computedValues
;
OpenCLParameterSet
*
energyDerivs
;
OpenCLParameterSet
*
energyDerivs
;
...
@@ -864,9 +909,11 @@ public:
...
@@ -864,9 +909,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomExternalForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomExternalForce
&
force
);
private:
private:
class
ForceInfo
;
int
numParticles
;
int
numParticles
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
*
globals
;
...
@@ -908,9 +955,11 @@ public:
...
@@ -908,9 +955,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomHbondForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomHbondForce
&
force
);
private:
private:
class
ForceInfo
;
int
numDonors
,
numAcceptors
;
int
numDonors
,
numAcceptors
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
OpenCLParameterSet
*
donorParams
;
OpenCLParameterSet
*
donorParams
;
OpenCLParameterSet
*
acceptorParams
;
OpenCLParameterSet
*
acceptorParams
;
OpenCLArray
*
globals
;
OpenCLArray
*
globals
;
...
@@ -961,9 +1010,11 @@ public:
...
@@ -961,9 +1010,11 @@ public:
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomCentroidBondForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomCentroidBondForce
&
force
);
private:
private:
class
ForceInfo
;
int
numGroups
,
numBonds
;
int
numGroups
,
numBonds
;
bool
needEnergyParamDerivs
;
bool
needEnergyParamDerivs
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
*
globals
;
OpenCLArray
*
groupParticles
;
OpenCLArray
*
groupParticles
;
...
@@ -1013,8 +1064,10 @@ public:
...
@@ -1013,8 +1064,10 @@ public:
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomCompoundBondForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomCompoundBondForce
&
force
);
private:
private:
class
ForceInfo
;
int
numBonds
;
int
numBonds
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
*
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
...
@@ -1059,7 +1112,9 @@ public:
...
@@ -1059,7 +1112,9 @@ public:
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomManyParticleForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomManyParticleForce
&
force
);
private:
private:
class
ForceInfo
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
NonbondedMethod
nonbondedMethod
;
NonbondedMethod
nonbondedMethod
;
int
maxNeighborPairs
,
forceWorkgroupSize
,
findNeighborsWorkgroupSize
;
int
maxNeighborPairs
,
forceWorkgroupSize
,
findNeighborsWorkgroupSize
;
...
@@ -1119,9 +1174,11 @@ public:
...
@@ -1119,9 +1174,11 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
GayBerneForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
GayBerneForce
&
force
);
private:
private:
class
ForceInfo
;
class
ReorderListener
;
class
ReorderListener
;
void
sortAtoms
();
void
sortAtoms
();
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
bool
hasInitializedKernels
;
bool
hasInitializedKernels
;
int
numRealParticles
,
maxNeighborBlocks
;
int
numRealParticles
,
maxNeighborBlocks
;
GayBerneForce
::
NonbondedMethod
nonbondedMethod
;
GayBerneForce
::
NonbondedMethod
nonbondedMethod
;
...
...
platforms/opencl/include/OpenCLParallelKernels.h
View file @
cd874b2b
...
@@ -438,6 +438,15 @@ public:
...
@@ -438,6 +438,15 @@ public:
* @param nz the number of grid points along the Z axis
* @param nz the number of grid points along the Z axis
*/
*/
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
private:
private:
class
Task
;
class
Task
;
OpenCLPlatform
::
PlatformData
&
data
;
OpenCLPlatform
::
PlatformData
&
data
;
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment