Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
047934e2
Commit
047934e2
authored
Mar 01, 2017
by
Rafal P. Wiewiora
Browse files
Merge remote-tracking branch 'upstream/master'
parents
ce3a5dc0
d12c9bd1
Changes
351
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
386 additions
and
295 deletions
+386
-295
platforms/reference/tests/TestReferenceNeighborList.cpp
platforms/reference/tests/TestReferenceNeighborList.cpp
+23
-23
plugins/amoeba/openmmapi/src/AmoebaAngleForceImpl.cpp
plugins/amoeba/openmmapi/src/AmoebaAngleForceImpl.cpp
+1
-0
plugins/amoeba/openmmapi/src/AmoebaBondForceImpl.cpp
plugins/amoeba/openmmapi/src/AmoebaBondForceImpl.cpp
+1
-0
plugins/amoeba/openmmapi/src/AmoebaGeneralizedKirkwoodForceImpl.cpp
...oeba/openmmapi/src/AmoebaGeneralizedKirkwoodForceImpl.cpp
+1
-0
plugins/amoeba/openmmapi/src/AmoebaInPlaneAngleForceImpl.cpp
plugins/amoeba/openmmapi/src/AmoebaInPlaneAngleForceImpl.cpp
+1
-0
plugins/amoeba/openmmapi/src/AmoebaMultipoleForceImpl.cpp
plugins/amoeba/openmmapi/src/AmoebaMultipoleForceImpl.cpp
+21
-2
plugins/amoeba/openmmapi/src/AmoebaOutOfPlaneBendForceImpl.cpp
...ns/amoeba/openmmapi/src/AmoebaOutOfPlaneBendForceImpl.cpp
+1
-0
plugins/amoeba/openmmapi/src/AmoebaPiTorsionForceImpl.cpp
plugins/amoeba/openmmapi/src/AmoebaPiTorsionForceImpl.cpp
+1
-0
plugins/amoeba/openmmapi/src/AmoebaStretchBendForceImpl.cpp
plugins/amoeba/openmmapi/src/AmoebaStretchBendForceImpl.cpp
+1
-0
plugins/amoeba/platforms/cuda/CMakeLists.txt
plugins/amoeba/platforms/cuda/CMakeLists.txt
+36
-10
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp
...ins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp
+10
-2
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+31
-75
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
+4
-4
plugins/amoeba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
...eba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
+6
-6
plugins/amoeba/platforms/cuda/src/kernels/multipoleFixedField.cu
.../amoeba/platforms/cuda/src/kernels/multipoleFixedField.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
...moeba/platforms/cuda/src/kernels/multipoleInducedField.cu
+121
-6
plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
+108
-157
plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
+10
-6
plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
.../platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
+1
-1
plugins/amoeba/platforms/cuda/tests/TestCudaAmoebaExtrapolatedPolarization.cpp
...rms/cuda/tests/TestCudaAmoebaExtrapolatedPolarization.cpp
+7
-2
No files found.
platforms/reference/tests/TestReferenceNeighborList.cpp
View file @
047934e2
...
@@ -41,14 +41,14 @@ using namespace OpenMM;
...
@@ -41,14 +41,14 @@ using namespace OpenMM;
void
testNeighborList
()
void
testNeighborList
()
{
{
vector
<
Real
Vec
>
particleList
(
2
);
vector
<
Vec
3
>
particleList
(
2
);
particleList
[
0
]
=
Real
Vec
(
13.6
,
0
,
0
);
particleList
[
0
]
=
Vec
3
(
13.6
,
0
,
0
);
particleList
[
1
]
=
Real
Vec
(
0
,
0
,
0
);
particleList
[
1
]
=
Vec
3
(
0
,
0
,
0
);
vector
<
set
<
int
>
>
exclusions
(
2
);
vector
<
set
<
int
>
>
exclusions
(
2
);
NeighborList
neighborList
;
NeighborList
neighborList
;
Real
Vec
boxVectors
[
3
];
Vec
3
boxVectors
[
3
];
computeNeighborListNaive
(
neighborList
,
2
,
particleList
,
exclusions
,
boxVectors
,
false
,
13.7
,
0.01
);
computeNeighborListNaive
(
neighborList
,
2
,
particleList
,
exclusions
,
boxVectors
,
false
,
13.7
,
0.01
);
assert
(
neighborList
.
size
()
==
1
);
assert
(
neighborList
.
size
()
==
1
);
...
@@ -62,15 +62,15 @@ void testNeighborList()
...
@@ -62,15 +62,15 @@ void testNeighborList()
assert
(
neighborList
.
size
()
==
0
);
assert
(
neighborList
.
size
()
==
0
);
}
}
double
distance2
(
Real
Vec
&
pos1
,
Real
Vec
&
pos2
,
const
Real
Vec
*
periodicBoxVectors
)
{
double
distance2
(
Vec
3
&
pos1
,
Vec
3
&
pos2
,
const
Vec
3
*
periodicBoxVectors
)
{
Real
Vec
diff
=
pos1
-
pos2
;
Vec
3
diff
=
pos1
-
pos2
;
diff
-=
periodicBoxVectors
[
2
]
*
floor
(
diff
[
2
]
/
periodicBoxVectors
[
2
][
2
]
+
0.5
);
diff
-=
periodicBoxVectors
[
2
]
*
floor
(
diff
[
2
]
/
periodicBoxVectors
[
2
][
2
]
+
0.5
);
diff
-=
periodicBoxVectors
[
1
]
*
floor
(
diff
[
1
]
/
periodicBoxVectors
[
1
][
1
]
+
0.5
);
diff
-=
periodicBoxVectors
[
1
]
*
floor
(
diff
[
1
]
/
periodicBoxVectors
[
1
][
1
]
+
0.5
);
diff
-=
periodicBoxVectors
[
0
]
*
floor
(
diff
[
0
]
/
periodicBoxVectors
[
0
][
0
]
+
0.5
);
diff
-=
periodicBoxVectors
[
0
]
*
floor
(
diff
[
0
]
/
periodicBoxVectors
[
0
][
0
]
+
0.5
);
return
diff
.
dot
(
diff
);
return
diff
.
dot
(
diff
);
}
}
void
verifyNeighborList
(
NeighborList
&
list
,
int
numParticles
,
vector
<
Real
Vec
>&
positions
,
const
Real
Vec
*
periodicBoxVectors
,
double
cutoff
)
{
void
verifyNeighborList
(
NeighborList
&
list
,
int
numParticles
,
vector
<
Vec
3
>&
positions
,
const
Vec
3
*
periodicBoxVectors
,
double
cutoff
)
{
for
(
int
i
=
0
;
i
<
(
int
)
list
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
list
.
size
();
i
++
)
{
int
particle1
=
list
[
i
].
first
;
int
particle1
=
list
[
i
].
first
;
int
particle2
=
list
[
i
].
second
;
int
particle2
=
list
[
i
].
second
;
...
@@ -87,18 +87,18 @@ void verifyNeighborList(NeighborList& list, int numParticles, vector<RealVec>& p
...
@@ -87,18 +87,18 @@ void verifyNeighborList(NeighborList& list, int numParticles, vector<RealVec>& p
void
testPeriodic
()
{
void
testPeriodic
()
{
const
int
numParticles
=
100
;
const
int
numParticles
=
100
;
const
double
cutoff
=
3.0
;
const
double
cutoff
=
3.0
;
Real
Vec
periodicBoxVectors
[
3
];
Vec
3
periodicBoxVectors
[
3
];
periodicBoxVectors
[
0
]
=
Real
Vec
(
20
,
0
,
0
);
periodicBoxVectors
[
0
]
=
Vec
3
(
20
,
0
,
0
);
periodicBoxVectors
[
1
]
=
Real
Vec
(
0
,
15
,
0
);
periodicBoxVectors
[
1
]
=
Vec
3
(
0
,
15
,
0
);
periodicBoxVectors
[
2
]
=
Real
Vec
(
0
,
0
,
22
);
periodicBoxVectors
[
2
]
=
Vec
3
(
0
,
0
,
22
);
vector
<
Real
Vec
>
particleList
(
numParticles
);
vector
<
Vec
3
>
particleList
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
particleList
[
i
][
0
]
=
(
RealOpenMM
)
(
genrand_real2
(
sfmt
)
*
periodicBoxVectors
[
0
][
0
]
*
3
)
;
particleList
[
i
][
0
]
=
genrand_real2
(
sfmt
)
*
periodicBoxVectors
[
0
][
0
]
*
3
;
particleList
[
i
][
1
]
=
(
RealOpenMM
)
(
genrand_real2
(
sfmt
)
*
periodicBoxVectors
[
1
][
1
]
*
3
)
;
particleList
[
i
][
1
]
=
genrand_real2
(
sfmt
)
*
periodicBoxVectors
[
1
][
1
]
*
3
;
particleList
[
i
][
2
]
=
(
RealOpenMM
)
(
genrand_real2
(
sfmt
)
*
periodicBoxVectors
[
2
][
2
]
*
3
)
;
particleList
[
i
][
2
]
=
genrand_real2
(
sfmt
)
*
periodicBoxVectors
[
2
][
2
]
*
3
;
}
}
vector
<
set
<
int
>
>
exclusions
(
numParticles
);
vector
<
set
<
int
>
>
exclusions
(
numParticles
);
NeighborList
neighborList
;
NeighborList
neighborList
;
...
@@ -111,18 +111,18 @@ void testPeriodic() {
...
@@ -111,18 +111,18 @@ void testPeriodic() {
void
testTriclinic
()
{
void
testTriclinic
()
{
const
int
numParticles
=
1000
;
const
int
numParticles
=
1000
;
const
double
cutoff
=
3.0
;
const
double
cutoff
=
3.0
;
Real
Vec
periodicBoxVectors
[
3
];
Vec
3
periodicBoxVectors
[
3
];
periodicBoxVectors
[
0
]
=
Real
Vec
(
20
,
0
,
0
);
periodicBoxVectors
[
0
]
=
Vec
3
(
20
,
0
,
0
);
periodicBoxVectors
[
1
]
=
Real
Vec
(
5
,
15
,
0
);
periodicBoxVectors
[
1
]
=
Vec
3
(
5
,
15
,
0
);
periodicBoxVectors
[
2
]
=
Real
Vec
(
-
3
,
-
7
,
22
);
periodicBoxVectors
[
2
]
=
Vec
3
(
-
3
,
-
7
,
22
);
vector
<
Real
Vec
>
particleList
(
numParticles
);
vector
<
Vec
3
>
particleList
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
particleList
[
i
][
0
]
=
(
RealOpenMM
)
(
genrand_real2
(
sfmt
)
*
periodicBoxVectors
[
0
][
0
]
*
3
)
;
particleList
[
i
][
0
]
=
genrand_real2
(
sfmt
)
*
periodicBoxVectors
[
0
][
0
]
*
3
;
particleList
[
i
][
1
]
=
(
RealOpenMM
)
(
genrand_real2
(
sfmt
)
*
periodicBoxVectors
[
1
][
1
]
*
3
)
;
particleList
[
i
][
1
]
=
genrand_real2
(
sfmt
)
*
periodicBoxVectors
[
1
][
1
]
*
3
;
particleList
[
i
][
2
]
=
(
RealOpenMM
)
(
genrand_real2
(
sfmt
)
*
periodicBoxVectors
[
2
][
2
]
*
3
)
;
particleList
[
i
][
2
]
=
genrand_real2
(
sfmt
)
*
periodicBoxVectors
[
2
][
2
]
*
3
;
}
}
vector
<
set
<
int
>
>
exclusions
(
numParticles
);
vector
<
set
<
int
>
>
exclusions
(
numParticles
);
NeighborList
neighborList
;
NeighborList
neighborList
;
...
...
plugins/amoeba/openmmapi/src/AmoebaAngleForceImpl.cpp
View file @
047934e2
...
@@ -64,4 +64,5 @@ std::vector<std::string> AmoebaAngleForceImpl::getKernelNames() {
...
@@ -64,4 +64,5 @@ std::vector<std::string> AmoebaAngleForceImpl::getKernelNames() {
void
AmoebaAngleForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
void
AmoebaAngleForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
kernel
.
getAs
<
CalcAmoebaAngleForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
kernel
.
getAs
<
CalcAmoebaAngleForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
context
.
systemChanged
();
}
}
plugins/amoeba/openmmapi/src/AmoebaBondForceImpl.cpp
View file @
047934e2
...
@@ -75,4 +75,5 @@ vector<pair<int, int> > AmoebaBondForceImpl::getBondedParticles() const {
...
@@ -75,4 +75,5 @@ vector<pair<int, int> > AmoebaBondForceImpl::getBondedParticles() const {
void
AmoebaBondForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
void
AmoebaBondForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
kernel
.
getAs
<
CalcAmoebaBondForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
kernel
.
getAs
<
CalcAmoebaBondForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
context
.
systemChanged
();
}
}
plugins/amoeba/openmmapi/src/AmoebaGeneralizedKirkwoodForceImpl.cpp
View file @
047934e2
...
@@ -66,4 +66,5 @@ std::vector<std::string> AmoebaGeneralizedKirkwoodForceImpl::getKernelNames() {
...
@@ -66,4 +66,5 @@ std::vector<std::string> AmoebaGeneralizedKirkwoodForceImpl::getKernelNames() {
void
AmoebaGeneralizedKirkwoodForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
void
AmoebaGeneralizedKirkwoodForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
kernel
.
getAs
<
CalcAmoebaGeneralizedKirkwoodForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
kernel
.
getAs
<
CalcAmoebaGeneralizedKirkwoodForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
context
.
systemChanged
();
}
}
plugins/amoeba/openmmapi/src/AmoebaInPlaneAngleForceImpl.cpp
View file @
047934e2
...
@@ -64,4 +64,5 @@ std::vector<std::string> AmoebaInPlaneAngleForceImpl::getKernelNames() {
...
@@ -64,4 +64,5 @@ std::vector<std::string> AmoebaInPlaneAngleForceImpl::getKernelNames() {
void
AmoebaInPlaneAngleForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
void
AmoebaInPlaneAngleForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
kernel
.
getAs
<
CalcAmoebaInPlaneAngleForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
kernel
.
getAs
<
CalcAmoebaInPlaneAngleForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
context
.
systemChanged
();
}
}
plugins/amoeba/openmmapi/src/AmoebaMultipoleForceImpl.cpp
View file @
047934e2
...
@@ -50,7 +50,8 @@ AmoebaMultipoleForceImpl::~AmoebaMultipoleForceImpl() {
...
@@ -50,7 +50,8 @@ AmoebaMultipoleForceImpl::~AmoebaMultipoleForceImpl() {
void
AmoebaMultipoleForceImpl
::
initialize
(
ContextImpl
&
context
)
{
void
AmoebaMultipoleForceImpl
::
initialize
(
ContextImpl
&
context
)
{
const
System
&
system
=
context
.
getSystem
();
const
System
&
system
=
context
.
getSystem
();
if
(
owner
.
getNumMultipoles
()
!=
system
.
getNumParticles
())
int
numParticles
=
system
.
getNumParticles
();
if
(
owner
.
getNumMultipoles
()
!=
numParticles
)
throw
OpenMMException
(
"AmoebaMultipoleForce must have exactly as many particles as the System it belongs to."
);
throw
OpenMMException
(
"AmoebaMultipoleForce must have exactly as many particles as the System it belongs to."
);
// check cutoff < 0.5*boxSize
// check cutoff < 0.5*boxSize
...
@@ -64,7 +65,7 @@ void AmoebaMultipoleForceImpl::initialize(ContextImpl& context) {
...
@@ -64,7 +65,7 @@ void AmoebaMultipoleForceImpl::initialize(ContextImpl& context) {
}
}
double
quadrupoleValidationTolerance
=
1.0e-05
;
double
quadrupoleValidationTolerance
=
1.0e-05
;
for
(
int
ii
=
0
;
ii
<
system
.
getN
umParticles
()
;
ii
++
)
{
for
(
int
ii
=
0
;
ii
<
n
umParticles
;
ii
++
)
{
int
axisType
,
multipoleAtomZ
,
multipoleAtomX
,
multipoleAtomY
;
int
axisType
,
multipoleAtomZ
,
multipoleAtomX
,
multipoleAtomY
;
double
charge
,
thole
,
dampingFactor
,
polarity
;
double
charge
,
thole
,
dampingFactor
,
polarity
;
...
@@ -121,6 +122,23 @@ void AmoebaMultipoleForceImpl::initialize(ContextImpl& context) {
...
@@ -121,6 +122,23 @@ void AmoebaMultipoleForceImpl::initialize(ContextImpl& context) {
buffer
<<
"] (ZThenX, Bisector, Z-Bisect, ThreeFold, NoAxisType) currently handled ."
;
buffer
<<
"] (ZThenX, Bisector, Z-Bisect, ThreeFold, NoAxisType) currently handled ."
;
throw
OpenMMException
(
buffer
.
str
());
throw
OpenMMException
(
buffer
.
str
());
}
}
if
(
axisType
!=
AmoebaMultipoleForce
::
NoAxisType
&&
(
multipoleAtomZ
<
0
||
multipoleAtomZ
>=
numParticles
))
{
std
::
stringstream
buffer
;
buffer
<<
"AmoebaMultipoleForce: invalid z axis particle: "
<<
multipoleAtomZ
;
throw
OpenMMException
(
buffer
.
str
());
}
if
(
axisType
!=
AmoebaMultipoleForce
::
NoAxisType
&&
axisType
!=
AmoebaMultipoleForce
::
ZOnly
&&
(
multipoleAtomX
<
0
||
multipoleAtomX
>=
numParticles
))
{
std
::
stringstream
buffer
;
buffer
<<
"AmoebaMultipoleForce: invalid x axis particle: "
<<
multipoleAtomX
;
throw
OpenMMException
(
buffer
.
str
());
}
if
((
axisType
==
AmoebaMultipoleForce
::
ZBisect
||
axisType
==
AmoebaMultipoleForce
::
ThreeFold
)
&&
(
multipoleAtomY
<
0
||
multipoleAtomY
>=
numParticles
))
{
std
::
stringstream
buffer
;
buffer
<<
"AmoebaMultipoleForce: invalid y axis particle: "
<<
multipoleAtomY
;
throw
OpenMMException
(
buffer
.
str
());
}
}
}
kernel
=
context
.
getPlatform
().
createKernel
(
CalcAmoebaMultipoleForceKernel
::
Name
(),
context
);
kernel
=
context
.
getPlatform
().
createKernel
(
CalcAmoebaMultipoleForceKernel
::
Name
(),
context
);
kernel
.
getAs
<
CalcAmoebaMultipoleForceKernel
>
().
initialize
(
context
.
getSystem
(),
owner
);
kernel
.
getAs
<
CalcAmoebaMultipoleForceKernel
>
().
initialize
(
context
.
getSystem
(),
owner
);
...
@@ -206,6 +224,7 @@ void AmoebaMultipoleForceImpl::getSystemMultipoleMoments(ContextImpl& context, s
...
@@ -206,6 +224,7 @@ void AmoebaMultipoleForceImpl::getSystemMultipoleMoments(ContextImpl& context, s
void
AmoebaMultipoleForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
void
AmoebaMultipoleForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
kernel
.
getAs
<
CalcAmoebaMultipoleForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
kernel
.
getAs
<
CalcAmoebaMultipoleForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
context
.
systemChanged
();
}
}
void
AmoebaMultipoleForceImpl
::
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
void
AmoebaMultipoleForceImpl
::
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
...
...
plugins/amoeba/openmmapi/src/AmoebaOutOfPlaneBendForceImpl.cpp
View file @
047934e2
...
@@ -64,4 +64,5 @@ std::vector<std::string> AmoebaOutOfPlaneBendForceImpl::getKernelNames() {
...
@@ -64,4 +64,5 @@ std::vector<std::string> AmoebaOutOfPlaneBendForceImpl::getKernelNames() {
void
AmoebaOutOfPlaneBendForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
void
AmoebaOutOfPlaneBendForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
kernel
.
getAs
<
CalcAmoebaOutOfPlaneBendForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
kernel
.
getAs
<
CalcAmoebaOutOfPlaneBendForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
context
.
systemChanged
();
}
}
plugins/amoeba/openmmapi/src/AmoebaPiTorsionForceImpl.cpp
View file @
047934e2
...
@@ -64,4 +64,5 @@ std::vector<std::string> AmoebaPiTorsionForceImpl::getKernelNames() {
...
@@ -64,4 +64,5 @@ std::vector<std::string> AmoebaPiTorsionForceImpl::getKernelNames() {
void
AmoebaPiTorsionForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
void
AmoebaPiTorsionForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
kernel
.
getAs
<
CalcAmoebaPiTorsionForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
kernel
.
getAs
<
CalcAmoebaPiTorsionForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
context
.
systemChanged
();
}
}
plugins/amoeba/openmmapi/src/AmoebaStretchBendForceImpl.cpp
View file @
047934e2
...
@@ -64,4 +64,5 @@ std::vector<std::string> AmoebaStretchBendForceImpl::getKernelNames() {
...
@@ -64,4 +64,5 @@ std::vector<std::string> AmoebaStretchBendForceImpl::getKernelNames() {
void
AmoebaStretchBendForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
void
AmoebaStretchBendForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
kernel
.
getAs
<
CalcAmoebaStretchBendForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
kernel
.
getAs
<
CalcAmoebaStretchBendForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
context
.
systemChanged
();
}
}
plugins/amoeba/platforms/cuda/CMakeLists.txt
View file @
047934e2
...
@@ -21,6 +21,7 @@ SET(OPENMM_SOURCE_SUBDIRS .)
...
@@ -21,6 +21,7 @@ SET(OPENMM_SOURCE_SUBDIRS .)
SET
(
OPENMMAMOEBACUDA_LIBRARY_NAME OpenMMAmoebaCUDA
)
SET
(
OPENMMAMOEBACUDA_LIBRARY_NAME OpenMMAmoebaCUDA
)
SET
(
SHARED_TARGET
${
OPENMMAMOEBACUDA_LIBRARY_NAME
}
)
SET
(
SHARED_TARGET
${
OPENMMAMOEBACUDA_LIBRARY_NAME
}
)
SET
(
STATIC_TARGET
${
OPENMMAMOEBACUDA_LIBRARY_NAME
}
_static
)
# These are all the places to search for header files which are
# These are all the places to search for header files which are
...
@@ -85,17 +86,42 @@ ADD_CUSTOM_COMMAND(OUTPUT ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H}
...
@@ -85,17 +86,42 @@ ADD_CUSTOM_COMMAND(OUTPUT ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H}
DEPENDS
${
CUDA_KERNELS
}
DEPENDS
${
CUDA_KERNELS
}
)
)
SET_SOURCE_FILES_PROPERTIES
(
${
CUDA_KERNELS_CPP
}
${
CUDA_KERNELS_H
}
PROPERTIES GENERATED TRUE
)
SET_SOURCE_FILES_PROPERTIES
(
${
CUDA_KERNELS_CPP
}
${
CUDA_KERNELS_H
}
PROPERTIES GENERATED TRUE
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
OPENMM_LIBRARY_NAME
}
${
PTHREADS_LIB
}
)
# Build the shared plugin library.
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
OPENMM_LIBRARY_NAME
}
CUDA
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
SHARED_AMOEBA_TARGET
}
)
IF
(
OPENMM_BUILD_SHARED_LIB
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_BUILDING_SHARED_LIBRARY"
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
IF
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-F/Library/Frameworks -framework CUDA"
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
OPENMM_LIBRARY_NAME
}
${
PTHREADS_LIB
}
)
ELSE
(
APPLE
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
OPENMM_LIBRARY_NAME
}
CUDA
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_LINK_FLAGS
}
"
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
SHARED_AMOEBA_TARGET
}
)
ENDIF
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_BUILDING_SHARED_LIBRARY"
)
IF
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-F/Library/Frameworks -framework CUDA"
)
ELSE
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_LINK_FLAGS
}
"
)
ENDIF
(
APPLE
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_TARGET
}
)
ENDIF
(
OPENMM_BUILD_SHARED_LIB
)
# Build the static plugin library.
IF
(
OPENMM_BUILD_STATIC_LIB
)
ADD_LIBRARY
(
${
STATIC_TARGET
}
STATIC
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
${
OPENMM_LIBRARY_NAME
}
${
PTHREADS_LIB
}
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
${
OPENMM_LIBRARY_NAME
}
CUDA
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
${
STATIC_AMOEBA_TARGET
}
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_BUILDING_STATIC_LIBRARY"
)
IF
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-F/Library/Frameworks -framework CUDA"
)
ELSE
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_LINK_FLAGS
}
"
)
ENDIF
(
APPLE
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
STATIC_TARGET
}
)
ENDIF
(
OPENMM_BUILD_STATIC_LIB
)
INSTALL
(
TARGETS
${
SHARED_TARGET
}
DESTINATION
${
CMAKE_INSTALL_PREFIX
}
/lib/plugins
)
INSTALL
(
TARGETS
${
SHARED_TARGET
}
DESTINATION
${
CMAKE_INSTALL_PREFIX
}
/lib/plugins
)
# Ensure that links to the main CUDA library will be resolved.
# Ensure that links to the main CUDA library will be resolved.
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp
View file @
047934e2
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2008-201
2
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
6
Stanford University and the Authors. *
* Authors: Mark Friedrichs, Peter Eastman *
* Authors: Mark Friedrichs, Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -33,10 +33,18 @@
...
@@ -33,10 +33,18 @@
using
namespace
OpenMM
;
using
namespace
OpenMM
;
#ifdef OPENMM_BUILDING_STATIC_LIBRARY
static
void
registerPlatforms
()
{
#else
extern
"C"
OPENMM_EXPORT
void
registerPlatforms
()
{
extern
"C"
OPENMM_EXPORT
void
registerPlatforms
()
{
#endif
}
}
#ifdef OPENMM_BUILDING_STATIC_LIBRARY
static
void
registerKernelFactories
()
{
#else
extern
"C"
OPENMM_EXPORT
void
registerKernelFactories
()
{
extern
"C"
OPENMM_EXPORT
void
registerKernelFactories
()
{
#endif
try
{
try
{
Platform
&
platform
=
Platform
::
getPlatformByName
(
"CUDA"
);
Platform
&
platform
=
Platform
::
getPlatformByName
(
"CUDA"
);
AmoebaCudaKernelFactory
*
factory
=
new
AmoebaCudaKernelFactory
();
AmoebaCudaKernelFactory
*
factory
=
new
AmoebaCudaKernelFactory
();
...
@@ -105,4 +113,4 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl
...
@@ -105,4 +113,4 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl
return
new
CudaCalcAmoebaWcaDispersionForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
CudaCalcAmoebaWcaDispersionForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
throw
OpenMMException
((
std
::
string
(
"Tried to create kernel with illegal kernel name '"
)
+
name
+
"'"
).
c_str
());
throw
OpenMMException
((
std
::
string
(
"Tried to create kernel with illegal kernel name '"
)
+
name
+
"'"
).
c_str
());
}
}
\ No newline at end of file
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
047934e2
...
@@ -41,7 +41,7 @@
...
@@ -41,7 +41,7 @@
#include "CudaForceInfo.h"
#include "CudaForceInfo.h"
#include "CudaKernelSources.h"
#include "CudaKernelSources.h"
#include "CudaNonbondedUtilities.h"
#include "CudaNonbondedUtilities.h"
#include "jama_
svd
.h"
#include "jama_
lu
.h"
#include <algorithm>
#include <algorithm>
#include <cmath>
#include <cmath>
...
@@ -52,10 +52,10 @@
...
@@ -52,10 +52,10 @@
using
namespace
OpenMM
;
using
namespace
OpenMM
;
using
namespace
std
;
using
namespace
std
;
#define CHECK_RESULT(result) \
#define CHECK_RESULT(result
, prefix
) \
if (result != CUDA_SUCCESS) { \
if (result != CUDA_SUCCESS) { \
std::stringstream m; \
std::stringstream m; \
m<<
errorMessage
<<": "<<cu.getErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
m<<
prefix
<<": "<<cu.getErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
throw OpenMMException(m.str());\
throw OpenMMException(m.str());\
}
}
...
@@ -813,7 +813,7 @@ private:
...
@@ -813,7 +813,7 @@ private:
};
};
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
hasInitializedFFT
(
false
),
multipolesAreValid
(
false
),
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
hasInitializedFFT
(
false
),
multipolesAreValid
(
false
),
hasCreatedEvent
(
false
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
sphericalDipoles
(
NULL
),
sphericalQuadrupoles
(
NULL
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
sphericalDipoles
(
NULL
),
sphericalQuadrupoles
(
NULL
),
fracDipoles
(
NULL
),
fracQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedField
(
NULL
),
inducedFieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
fracDipoles
(
NULL
),
fracQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedField
(
NULL
),
inducedFieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
diisCoefficients
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
prevDipoles
(
NULL
),
prevDipolesPolar
(
NULL
),
prevDipolesGk
(
NULL
),
diisCoefficients
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
prevDipoles
(
NULL
),
prevDipolesPolar
(
NULL
),
prevDipolesGk
(
NULL
),
...
@@ -822,7 +822,7 @@ CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::stri
...
@@ -822,7 +822,7 @@ CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::stri
inducedDipoleFieldGradientGk
(
NULL
),
inducedDipoleFieldGradientGkPolar
(
NULL
),
extrapolatedDipoleFieldGradient
(
NULL
),
extrapolatedDipoleFieldGradientPolar
(
NULL
),
inducedDipoleFieldGradientGk
(
NULL
),
inducedDipoleFieldGradientGkPolar
(
NULL
),
extrapolatedDipoleFieldGradient
(
NULL
),
extrapolatedDipoleFieldGradientPolar
(
NULL
),
extrapolatedDipoleFieldGradientGk
(
NULL
),
extrapolatedDipoleFieldGradientGkPolar
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
extrapolatedDipoleFieldGradientGk
(
NULL
),
extrapolatedDipoleFieldGradientGkPolar
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
pmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeIgrid
(
NULL
),
pmePhi
(
NULL
),
pmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeIgrid
(
NULL
),
pmePhi
(
NULL
),
pmePhid
(
NULL
),
pmePhip
(
NULL
),
pmePhidp
(
NULL
),
pmeCphi
(
NULL
),
pmeAtomGridIndex
(
NULL
),
lastPositions
(
NULL
),
sort
(
NULL
),
gkKernel
(
NULL
)
{
pmePhid
(
NULL
),
pmePhip
(
NULL
),
pmePhidp
(
NULL
),
pmeCphi
(
NULL
),
lastPositions
(
NULL
),
sort
(
NULL
),
gkKernel
(
NULL
)
{
}
}
CudaCalcAmoebaMultipoleForceKernel
::~
CudaCalcAmoebaMultipoleForceKernel
()
{
CudaCalcAmoebaMultipoleForceKernel
::~
CudaCalcAmoebaMultipoleForceKernel
()
{
...
@@ -927,14 +927,14 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
...
@@ -927,14 +927,14 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete
pmePhidp
;
delete
pmePhidp
;
if
(
pmeCphi
!=
NULL
)
if
(
pmeCphi
!=
NULL
)
delete
pmeCphi
;
delete
pmeCphi
;
if
(
pmeAtomGridIndex
!=
NULL
)
delete
pmeAtomGridIndex
;
if
(
lastPositions
!=
NULL
)
if
(
lastPositions
!=
NULL
)
delete
lastPositions
;
delete
lastPositions
;
if
(
sort
!=
NULL
)
if
(
sort
!=
NULL
)
delete
sort
;
delete
sort
;
if
(
hasInitializedFFT
)
if
(
hasInitializedFFT
)
cufftDestroy
(
fft
);
cufftDestroy
(
fft
);
if
(
hasCreatedEvent
)
cuEventDestroy
(
syncEvent
);
}
}
void
CudaCalcAmoebaMultipoleForceKernel
::
initialize
(
const
System
&
system
,
const
AmoebaMultipoleForce
&
force
)
{
void
CudaCalcAmoebaMultipoleForceKernel
::
initialize
(
const
System
&
system
,
const
AmoebaMultipoleForce
&
force
)
{
...
@@ -1021,6 +1021,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1021,6 +1021,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
prevErrors
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevErrors"
);
prevErrors
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevErrors"
);
diisMatrix
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"diisMatrix"
);
diisMatrix
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"diisMatrix"
);
diisCoefficients
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
+
1
,
sizeof
(
float
),
"diisMatrix"
);
diisCoefficients
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
+
1
,
sizeof
(
float
),
"diisMatrix"
);
CHECK_RESULT
(
cuEventCreate
(
&
syncEvent
,
CU_EVENT_DISABLE_TIMING
),
"Error creating event for AmoebaMultipoleForce"
);
hasCreatedEvent
=
true
;
}
}
else
if
(
polarizationType
==
AmoebaMultipoleForce
::
Extrapolated
)
{
else
if
(
polarizationType
==
AmoebaMultipoleForce
::
Extrapolated
)
{
int
numOrders
=
force
.
getExtrapolationCoefficients
().
size
();
int
numOrders
=
force
.
getExtrapolationCoefficients
().
size
();
...
@@ -1153,7 +1155,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1153,7 +1155,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
NonbondedForce
nb
;
NonbondedForce
nb
;
nb
.
setEwaldErrorTolerance
(
force
.
getEwaldErrorTolerance
());
nb
.
setEwaldErrorTolerance
(
force
.
getEwaldErrorTolerance
());
nb
.
setCutoffDistance
(
force
.
getCutoffDistance
());
nb
.
setCutoffDistance
(
force
.
getCutoffDistance
());
NonbondedForceImpl
::
calcPMEParameters
(
system
,
nb
,
alpha
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
NonbondedForceImpl
::
calcPMEParameters
(
system
,
nb
,
alpha
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
false
);
gridSizeX
=
CudaFFT3D
::
findLegalDimension
(
gridSizeX
);
gridSizeX
=
CudaFFT3D
::
findLegalDimension
(
gridSizeX
);
gridSizeY
=
CudaFFT3D
::
findLegalDimension
(
gridSizeY
);
gridSizeY
=
CudaFFT3D
::
findLegalDimension
(
gridSizeY
);
gridSizeZ
=
CudaFFT3D
::
findLegalDimension
(
gridSizeZ
);
gridSizeZ
=
CudaFFT3D
::
findLegalDimension
(
gridSizeZ
);
...
@@ -1212,6 +1214,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1212,6 +1214,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
updateInducedFieldKernel
=
cu
.
getKernel
(
module
,
"updateInducedFieldByDIIS"
);
updateInducedFieldKernel
=
cu
.
getKernel
(
module
,
"updateInducedFieldByDIIS"
);
recordDIISDipolesKernel
=
cu
.
getKernel
(
module
,
"recordInducedDipolesForDIIS"
);
recordDIISDipolesKernel
=
cu
.
getKernel
(
module
,
"recordInducedDipolesForDIIS"
);
buildMatrixKernel
=
cu
.
getKernel
(
module
,
"computeDIISMatrix"
);
buildMatrixKernel
=
cu
.
getKernel
(
module
,
"computeDIISMatrix"
);
solveMatrixKernel
=
cu
.
getKernel
(
module
,
"solveDIISMatrix"
);
initExtrapolatedKernel
=
cu
.
getKernel
(
module
,
"initExtrapolatedDipoles"
);
initExtrapolatedKernel
=
cu
.
getKernel
(
module
,
"initExtrapolatedDipoles"
);
iterateExtrapolatedKernel
=
cu
.
getKernel
(
module
,
"iterateExtrapolatedDipoles"
);
iterateExtrapolatedKernel
=
cu
.
getKernel
(
module
,
"iterateExtrapolatedDipoles"
);
computeExtrapolatedKernel
=
cu
.
getKernel
(
module
,
"computeExtrapolatedDipoles"
);
computeExtrapolatedKernel
=
cu
.
getKernel
(
module
,
"computeExtrapolatedDipoles"
);
...
@@ -1253,7 +1256,6 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1253,7 +1256,6 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
else
if
(
polarizationType
==
AmoebaMultipoleForce
::
Extrapolated
)
else
if
(
polarizationType
==
AmoebaMultipoleForce
::
Extrapolated
)
pmeDefines
[
"EXTRAPOLATED_POLARIZATION"
]
=
""
;
pmeDefines
[
"EXTRAPOLATED_POLARIZATION"
]
=
""
;
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipolePme
,
pmeDefines
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipolePme
,
pmeDefines
);
pmeGridIndexKernel
=
cu
.
getKernel
(
module
,
"findAtomGridIndex"
);
pmeTransformMultipolesKernel
=
cu
.
getKernel
(
module
,
"transformMultipolesToFractionalCoordinates"
);
pmeTransformMultipolesKernel
=
cu
.
getKernel
(
module
,
"transformMultipolesToFractionalCoordinates"
);
pmeTransformPotentialKernel
=
cu
.
getKernel
(
module
,
"transformPotentialToCartesianCoordinates"
);
pmeTransformPotentialKernel
=
cu
.
getKernel
(
module
,
"transformPotentialToCartesianCoordinates"
);
pmeSpreadFixedMultipolesKernel
=
cu
.
getKernel
(
module
,
"gridSpreadFixedMultipoles"
);
pmeSpreadFixedMultipolesKernel
=
cu
.
getKernel
(
module
,
"gridSpreadFixedMultipoles"
);
...
@@ -1285,7 +1287,6 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1285,7 +1287,6 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
pmePhidp
=
new
CudaArray
(
cu
,
20
*
numMultipoles
,
elementSize
,
"pmePhidp"
);
pmePhidp
=
new
CudaArray
(
cu
,
20
*
numMultipoles
,
elementSize
,
"pmePhidp"
);
pmeCphi
=
new
CudaArray
(
cu
,
10
*
numMultipoles
,
elementSize
,
"pmeCphi"
);
pmeCphi
=
new
CudaArray
(
cu
,
10
*
numMultipoles
,
elementSize
,
"pmeCphi"
);
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numMultipoles
,
"pmeAtomGridIndex"
);
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
cufftResult
result
=
cufftPlan3d
(
&
fft
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_Z2Z
:
CUFFT_C2C
);
cufftResult
result
=
cufftPlan3d
(
&
fft
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_Z2Z
:
CUFFT_C2C
);
if
(
result
!=
CUFFT_SUCCESS
)
if
(
result
!=
CUFFT_SUCCESS
)
...
@@ -1569,16 +1570,11 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1569,16 +1570,11 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Reciprocal space calculation.
// Reciprocal space calculation.
unsigned
int
maxTiles
=
nb
.
getInteractingTiles
().
getSize
();
unsigned
int
maxTiles
=
nb
.
getInteractingTiles
().
getSize
();
void
*
gridIndexArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeGridIndexKernel
,
gridIndexArgs
,
cu
.
getNumAtoms
(),
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
PmeOrder
*
PmeOrder
*
elementSize
);
sort
->
sort
(
*
pmeAtomGridIndex
);
void
*
pmeTransformMultipolesArgs
[]
=
{
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
void
*
pmeTransformMultipolesArgs
[]
=
{
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeTransformMultipolesKernel
,
pmeTransformMultipolesArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeTransformMultipolesKernel
,
pmeTransformMultipolesArgs
,
cu
.
getNumAtoms
());
void
*
pmeSpreadFixedMultipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
void
*
pmeSpreadFixedMultipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
&
pmeGrid
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
&
pmeGrid
->
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeSpreadFixedMultipolesKernel
,
pmeSpreadFixedMultipolesArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeSpreadFixedMultipolesKernel
,
pmeSpreadFixedMultipolesArgs
,
cu
.
getNumAtoms
());
void
*
finishSpreadArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
()};
void
*
finishSpreadArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
()};
...
@@ -1590,7 +1586,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1590,7 +1586,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
void
*
pmeConvolutionArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmeBsplineModuliX
->
getDevicePointer
(),
&
pmeBsplineModuliY
->
getDevicePointer
(),
void
*
pmeConvolutionArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmeBsplineModuliX
->
getDevicePointer
(),
&
pmeBsplineModuliY
->
getDevicePointer
(),
&
pmeBsplineModuliZ
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
&
pmeBsplineModuliZ
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeConvolutionKernel
,
pmeConvolutionArgs
,
cu
.
getNumAtoms
()
);
cu
.
executeKernel
(
pmeConvolutionKernel
,
pmeConvolutionArgs
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
256
);
if
(
cu
.
getUseDoublePrecision
())
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
else
else
...
@@ -1598,7 +1594,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1598,7 +1594,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void
*
pmeFixedPotentialArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmePhi
->
getDevicePointer
(),
&
field
->
getDevicePointer
(),
void
*
pmeFixedPotentialArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmePhi
->
getDevicePointer
(),
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]
,
&
pmeAtomGridIndex
->
getDevicePointer
()
};
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeFixedPotentialKernel
,
pmeFixedPotentialArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeFixedPotentialKernel
,
pmeFixedPotentialArgs
,
cu
.
getNumAtoms
());
void
*
pmeTransformFixedPotentialArgs
[]
=
{
&
pmePhi
->
getDevicePointer
(),
&
pmeCphi
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
void
*
pmeTransformFixedPotentialArgs
[]
=
{
&
pmePhi
->
getDevicePointer
(),
&
pmeCphi
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeTransformPotentialKernel
,
pmeTransformFixedPotentialArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeTransformPotentialKernel
,
pmeTransformFixedPotentialArgs
,
cu
.
getNumAtoms
());
...
@@ -1625,7 +1621,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1625,7 +1621,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
cu
.
clearBuffer
(
*
pmeGrid
);
cu
.
clearBuffer
(
*
pmeGrid
);
void
*
pmeSpreadInducedDipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
void
*
pmeSpreadInducedDipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
pmeGrid
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
&
pmeGrid
->
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeSpreadInducedDipolesKernel
,
pmeSpreadInducedDipolesArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeSpreadInducedDipolesKernel
,
pmeSpreadInducedDipolesArgs
,
cu
.
getNumAtoms
());
if
(
cu
.
getUseDoublePrecision
())
if
(
cu
.
getUseDoublePrecision
())
...
@@ -1634,15 +1630,14 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1634,15 +1630,14 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
else
else
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
cu
.
executeKernel
(
pmeConvolutionKernel
,
pmeConvolutionArgs
,
cu
.
getNumAtoms
()
);
cu
.
executeKernel
(
pmeConvolutionKernel
,
pmeConvolutionArgs
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
256
);
if
(
cu
.
getUseDoublePrecision
())
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
else
else
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
void
*
pmeInducedPotentialArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
void
*
pmeInducedPotentialArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
&
pmePhidp
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
&
pmePhidp
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
],
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeInducedPotentialKernel
,
pmeInducedPotentialArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeInducedPotentialKernel
,
pmeInducedPotentialArgs
,
cu
.
getNumAtoms
());
// Iterate until the dipoles converge.
// Iterate until the dipoles converge.
...
@@ -1771,7 +1766,7 @@ void CudaCalcAmoebaMultipoleForceKernel::computeInducedField(void** recipBoxVect
...
@@ -1771,7 +1766,7 @@ void CudaCalcAmoebaMultipoleForceKernel::computeInducedField(void** recipBoxVect
cu
.
executeKernel
(
computeInducedFieldKernel
,
&
computeInducedFieldArgs
[
0
],
numForceThreadBlocks
*
inducedFieldThreads
,
inducedFieldThreads
);
cu
.
executeKernel
(
computeInducedFieldKernel
,
&
computeInducedFieldArgs
[
0
],
numForceThreadBlocks
*
inducedFieldThreads
,
inducedFieldThreads
);
cu
.
clearBuffer
(
*
pmeGrid
);
cu
.
clearBuffer
(
*
pmeGrid
);
void
*
pmeSpreadInducedDipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
void
*
pmeSpreadInducedDipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
pmeGrid
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
&
pmeGrid
->
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeSpreadInducedDipolesKernel
,
pmeSpreadInducedDipolesArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeSpreadInducedDipolesKernel
,
pmeSpreadInducedDipolesArgs
,
cu
.
getNumAtoms
());
if
(
cu
.
getUseDoublePrecision
())
{
if
(
cu
.
getUseDoublePrecision
())
{
...
@@ -1784,15 +1779,14 @@ void CudaCalcAmoebaMultipoleForceKernel::computeInducedField(void** recipBoxVect
...
@@ -1784,15 +1779,14 @@ void CudaCalcAmoebaMultipoleForceKernel::computeInducedField(void** recipBoxVect
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
void
*
pmeConvolutionArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmeBsplineModuliX
->
getDevicePointer
(),
&
pmeBsplineModuliY
->
getDevicePointer
(),
void
*
pmeConvolutionArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmeBsplineModuliX
->
getDevicePointer
(),
&
pmeBsplineModuliY
->
getDevicePointer
(),
&
pmeBsplineModuliZ
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
&
pmeBsplineModuliZ
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeConvolutionKernel
,
pmeConvolutionArgs
,
cu
.
getNumAtoms
()
);
cu
.
executeKernel
(
pmeConvolutionKernel
,
pmeConvolutionArgs
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
256
);
if
(
cu
.
getUseDoublePrecision
())
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
else
else
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
void
*
pmeInducedPotentialArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
void
*
pmeInducedPotentialArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
&
pmePhidp
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
&
pmePhidp
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
],
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeInducedPotentialKernel
,
pmeInducedPotentialArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeInducedPotentialKernel
,
pmeInducedPotentialArgs
,
cu
.
getNumAtoms
());
if
(
polarizationType
==
AmoebaMultipoleForce
::
Extrapolated
)
{
if
(
polarizationType
==
AmoebaMultipoleForce
::
Extrapolated
)
{
void
*
pmeRecordInducedFieldDipolesArgs
[]
=
{
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
void
*
pmeRecordInducedFieldDipolesArgs
[]
=
{
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
...
@@ -1831,22 +1825,24 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
...
@@ -1831,22 +1825,24 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
cu
.
executeKernel
(
recordDIISDipolesKernel
,
recordDIISDipolesArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
cu
.
executeKernel
(
recordDIISDipolesKernel
,
recordDIISDipolesArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
float2
*
errors
=
(
float2
*
)
cu
.
getPinnedBuffer
();
float2
*
errors
=
(
float2
*
)
cu
.
getPinnedBuffer
();
inducedDipoleErrors
->
download
(
errors
,
false
);
inducedDipoleErrors
->
download
(
errors
,
false
);
cuEventRecord
(
syncEvent
,
cu
.
getCurrentStream
());
// Build the DIIS matrix.
// Build the DIIS matrix.
int
numPrev
=
(
iteration
+
1
<
MaxPrevDIISDipoles
?
iteration
+
1
:
MaxPrevDIISDipoles
);
int
numPrev
=
(
iteration
+
1
<
MaxPrevDIISDipoles
?
iteration
+
1
:
MaxPrevDIISDipoles
);
void
*
buildMatrixArgs
[]
=
{
&
prevErrors
->
getDevicePointer
(),
&
iteration
,
&
diisMatrix
->
getDevicePointer
()};
void
*
buildMatrixArgs
[]
=
{
&
prevErrors
->
getDevicePointer
(),
&
iteration
,
&
diisMatrix
->
getDevicePointer
()};
int
threadBlocks
=
min
(
numPrev
,
cu
.
getNumThreadBlocks
());
int
threadBlocks
=
min
(
numPrev
,
cu
.
getNumThreadBlocks
());
cu
.
executeKernel
(
buildMatrixKernel
,
buildMatrixArgs
,
threadBlocks
*
128
,
128
,
128
*
elementSize
)
;
int
blockSize
=
512
;
vector
<
float
>
matrixf
;
cu
.
executeKernel
(
buildMatrixKernel
,
buildMatrixArgs
,
threadBlocks
*
blockSize
,
blockSize
,
blockSize
*
elementSize
)
;
vector
<
double
>
matrix
;
if
(
cu
.
getUseDoublePrecision
())
// Solve the matrix.
diisMatrix
->
download
(
matrix
);
else
void
*
solveMatrixArgs
[]
=
{
&
iteration
,
&
diisMatrix
->
getDevicePointer
(),
&
diisCoefficients
->
getDevicePointer
()};
diisMatrix
->
download
(
matrixf
);
cu
.
executeKernel
(
solveMatrixKernel
,
solveMatrixArgs
,
32
,
32
);
// Determine whether the iteration has converged.
// Determine whether the iteration has converged.
cuEventSynchronize
(
syncEvent
);
double
total1
=
0.0
,
total2
=
0.0
;
double
total1
=
0.0
,
total2
=
0.0
;
for
(
int
j
=
0
;
j
<
inducedDipoleErrors
->
getSize
();
j
++
)
{
for
(
int
j
=
0
;
j
<
inducedDipoleErrors
->
getSize
();
j
++
)
{
total1
+=
errors
[
j
].
x
;
total1
+=
errors
[
j
].
x
;
...
@@ -1854,56 +1850,16 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
...
@@ -1854,56 +1850,16 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
}
}
if
(
48.033324
*
sqrt
(
max
(
total1
,
total2
)
/
cu
.
getNumAtoms
())
<
inducedEpsilon
)
if
(
48.033324
*
sqrt
(
max
(
total1
,
total2
)
/
cu
.
getNumAtoms
())
<
inducedEpsilon
)
return
true
;
return
true
;
// Compute the coefficients for selecting the new dipoles.
float
*
coefficients
=
(
float
*
)
cu
.
getPinnedBuffer
();
if
(
iteration
==
0
)
coefficients
[
0
]
=
1
;
else
{
int
rank
=
numPrev
+
1
;
Array2D
<
double
>
b
(
rank
,
rank
);
b
[
0
][
0
]
=
0
;
for
(
int
i
=
1
;
i
<
rank
;
i
++
)
b
[
i
][
0
]
=
b
[
0
][
i
]
=
-
1
;
if
(
cu
.
getUseDoublePrecision
())
{
for
(
int
i
=
0
;
i
<
numPrev
;
i
++
)
for
(
int
j
=
0
;
j
<
numPrev
;
j
++
)
b
[
i
+
1
][
j
+
1
]
=
matrix
[
i
*
MaxPrevDIISDipoles
+
j
];
}
else
{
for
(
int
i
=
0
;
i
<
numPrev
;
i
++
)
for
(
int
j
=
0
;
j
<
numPrev
;
j
++
)
b
[
i
+
1
][
j
+
1
]
=
matrixf
[
i
*
MaxPrevDIISDipoles
+
j
];
}
// Solve using SVD. Since the right hand side is (-1, 0, 0, 0, ...), this is simpler than the general case.
JAMA
::
SVD
<
double
>
svd
(
b
);
Array2D
<
double
>
u
,
v
;
svd
.
getU
(
u
);
svd
.
getV
(
v
);
Array1D
<
double
>
s
;
svd
.
getSingularValues
(
s
);
int
effectiveRank
=
svd
.
rank
();
for
(
int
i
=
1
;
i
<
rank
;
i
++
)
{
double
d
=
0
;
for
(
int
j
=
0
;
j
<
effectiveRank
;
j
++
)
d
-=
u
[
0
][
j
]
*
v
[
i
][
j
]
/
s
[
j
];
coefficients
[
i
-
1
]
=
d
;
}
}
diisCoefficients
->
upload
(
coefficients
,
false
);
// Compute the dipoles.
// Compute the dipoles.
void
*
updateInducedFieldArgs
[]
=
{
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
void
*
updateInducedFieldArgs
[]
=
{
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
prevDipoles
->
getDevicePointer
(),
&
prevDipolesPolar
->
getDevicePointer
(),
&
diisCoefficients
->
getDevicePointer
(),
&
numPrev
};
&
prevDipoles
->
getDevicePointer
(),
&
prevDipolesPolar
->
getDevicePointer
(),
&
diisCoefficients
->
getDevicePointer
(),
&
numPrev
};
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedFieldArgs
,
cu
.
getNum
ThreadBlocks
()
*
cu
.
ThreadBlockSize
);
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedFieldArgs
,
3
*
cu
.
getNum
Atoms
(),
256
);
if
(
gkKernel
!=
NULL
)
{
if
(
gkKernel
!=
NULL
)
{
void
*
updateInducedFieldGkArgs
[]
=
{
&
gkKernel
->
getInducedDipoles
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipolesPolar
()
->
getDevicePointer
(),
void
*
updateInducedFieldGkArgs
[]
=
{
&
gkKernel
->
getInducedDipoles
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipolesPolar
()
->
getDevicePointer
(),
&
prevDipolesGk
->
getDevicePointer
(),
&
prevDipolesGkPolar
->
getDevicePointer
(),
&
diisCoefficients
->
getDevicePointer
(),
&
numPrev
};
&
prevDipolesGk
->
getDevicePointer
(),
&
prevDipolesGkPolar
->
getDevicePointer
(),
&
diisCoefficients
->
getDevicePointer
(),
&
numPrev
};
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedFieldGkArgs
,
cu
.
getNum
ThreadBlocks
()
*
cu
.
ThreadBlockSize
);
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedFieldGkArgs
,
3
*
cu
.
getNum
Atoms
(),
256
);
}
}
return
false
;
return
false
;
}
}
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
View file @
047934e2
...
@@ -408,7 +408,7 @@ private:
...
@@ -408,7 +408,7 @@ private:
int
fixedFieldThreads
,
inducedFieldThreads
,
electrostaticsThreads
;
int
fixedFieldThreads
,
inducedFieldThreads
,
electrostaticsThreads
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
double
alpha
,
inducedEpsilon
;
double
alpha
,
inducedEpsilon
;
bool
usePME
,
hasQuadrupoles
,
hasInitializedScaleFactors
,
hasInitializedFFT
,
multipolesAreValid
;
bool
usePME
,
hasQuadrupoles
,
hasInitializedScaleFactors
,
hasInitializedFFT
,
multipolesAreValid
,
hasCreatedEvent
;
AmoebaMultipoleForce
::
PolarizationType
polarizationType
;
AmoebaMultipoleForce
::
PolarizationType
polarizationType
;
CudaContext
&
cu
;
CudaContext
&
cu
;
const
System
&
system
;
const
System
&
system
;
...
@@ -465,16 +465,16 @@ private:
...
@@ -465,16 +465,16 @@ private:
CudaArray
*
pmePhidp
;
CudaArray
*
pmePhidp
;
CudaArray
*
pmeCphi
;
CudaArray
*
pmeCphi
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
lastPositions
;
CudaArray
*
lastPositions
;
CudaSort
*
sort
;
CudaSort
*
sort
;
cufftHandle
fft
;
cufftHandle
fft
;
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
computeInducedFieldKernel
,
updateInducedFieldKernel
,
electrostaticsKernel
,
mapTorqueKernel
;
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
computeInducedFieldKernel
,
updateInducedFieldKernel
,
electrostaticsKernel
,
mapTorqueKernel
;
CUfunction
pmeGridIndexKernel
,
pmeSpreadFixedMultipolesKernel
,
pmeSpreadInducedDipolesKernel
,
pmeFinishSpreadChargeKernel
,
pmeConvolutionKernel
;
CUfunction
pmeSpreadFixedMultipolesKernel
,
pmeSpreadInducedDipolesKernel
,
pmeFinishSpreadChargeKernel
,
pmeConvolutionKernel
;
CUfunction
pmeFixedPotentialKernel
,
pmeInducedPotentialKernel
,
pmeFixedForceKernel
,
pmeInducedForceKernel
,
pmeRecordInducedFieldDipolesKernel
,
computePotentialKernel
;
CUfunction
pmeFixedPotentialKernel
,
pmeInducedPotentialKernel
,
pmeFixedForceKernel
,
pmeInducedForceKernel
,
pmeRecordInducedFieldDipolesKernel
,
computePotentialKernel
;
CUfunction
recordDIISDipolesKernel
,
buildMatrixKernel
;
CUfunction
recordDIISDipolesKernel
,
buildMatrixKernel
,
solveMatrixKernel
;
CUfunction
initExtrapolatedKernel
,
iterateExtrapolatedKernel
,
computeExtrapolatedKernel
,
addExtrapolatedGradientKernel
;
CUfunction
initExtrapolatedKernel
,
iterateExtrapolatedKernel
,
computeExtrapolatedKernel
,
addExtrapolatedGradientKernel
;
CUfunction
pmeTransformMultipolesKernel
,
pmeTransformPotentialKernel
;
CUfunction
pmeTransformMultipolesKernel
,
pmeTransformPotentialKernel
;
CUevent
syncEvent
;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel
*
gkKernel
;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel
*
gkKernel
;
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
static
const
int
MaxPrevDIISDipoles
=
20
;
static
const
int
MaxPrevDIISDipoles
=
20
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
View file @
047934e2
...
@@ -107,9 +107,9 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
...
@@ -107,9 +107,9 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
real
dmp
=
atom1
.
damp
*
atom2
.
damp
;
real
dmp
=
atom1
.
damp
*
atom2
.
damp
;
real
a
=
min
(
atom1
.
thole
,
atom2
.
thole
);
real
a
=
min
(
atom1
.
thole
,
atom2
.
thole
);
real
u
=
fabs
(
dmp
)
>
1.0e-5
f
?
r
/
dmp
:
1e10
f
;
real
u
=
r
/
dmp
;
real
au3
=
a
*
u
*
u
*
u
;
real
au3
=
fabs
(
dmp
)
>
1.0e-5
f
?
a
*
u
*
u
*
u
:
0
;
real
expau3
=
au3
<
50
?
EXP
(
-
au3
)
:
0
;
real
expau3
=
fabs
(
dmp
)
>
1.0e-5
f
?
EXP
(
-
au3
)
:
0
;
real
a2u6
=
au3
*
au3
;
real
a2u6
=
au3
*
au3
;
real
a3u9
=
a2u6
*
au3
;
real
a3u9
=
a2u6
*
au3
;
// Thole damping factors for energies
// Thole damping factors for energies
...
@@ -336,7 +336,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
...
@@ -336,7 +336,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
iEIY
-=
eCoef
*
(
qiUinpI
.
y
*
qiUindJ
.
x
+
qiUindI
.
y
*
qiUinpJ
.
x
);
iEIY
-=
eCoef
*
(
qiUinpI
.
y
*
qiUindJ
.
x
+
qiUindI
.
y
*
qiUinpJ
.
x
);
iEJY
-=
eCoef
*
(
qiUinpJ
.
y
*
qiUindI
.
x
+
qiUindJ
.
y
*
qiUinpI
.
x
);
iEJY
-=
eCoef
*
(
qiUinpJ
.
y
*
qiUindI
.
x
+
qiUindJ
.
y
*
qiUinpI
.
x
);
fIZ
+=
dCoef
*
(
qiUinpI
.
x
*
qiUindJ
.
x
+
qiUindI
.
x
*
qiUinpJ
.
x
);
fIZ
+=
dCoef
*
(
qiUinpI
.
x
*
qiUindJ
.
x
+
qiUindI
.
x
*
qiUinpJ
.
x
);
f
I
Z
+=
dCoef
*
(
qiUinpJ
.
x
*
qiUindI
.
x
+
qiUindJ
.
x
*
qiUinpI
.
x
);
f
J
Z
+=
dCoef
*
(
qiUinpJ
.
x
*
qiUindI
.
x
+
qiUindJ
.
x
*
qiUinpI
.
x
);
// Uind-Uind terms (m=1)
// Uind-Uind terms (m=1)
eCoef
=
2
*
rInvVec
[
3
]
*
thole_d1
;
eCoef
=
2
*
rInvVec
[
3
]
*
thole_d1
;
dCoef
=
-
3
*
rInvVec
[
4
]
*
dthole_d1
;
dCoef
=
-
3
*
rInvVec
[
4
]
*
dthole_d1
;
...
@@ -345,7 +345,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
...
@@ -345,7 +345,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
iEIY
+=
eCoef
*
(
qiUinpI
.
x
*
qiUindJ
.
y
+
qiUindI
.
x
*
qiUinpJ
.
y
);
iEIY
+=
eCoef
*
(
qiUinpI
.
x
*
qiUindJ
.
y
+
qiUindI
.
x
*
qiUinpJ
.
y
);
iEJY
+=
eCoef
*
(
qiUinpJ
.
x
*
qiUindI
.
y
+
qiUindJ
.
x
*
qiUinpI
.
y
);
iEJY
+=
eCoef
*
(
qiUinpJ
.
x
*
qiUindI
.
y
+
qiUindJ
.
x
*
qiUinpI
.
y
);
fIZ
+=
dCoef
*
(
qiUinpI
.
y
*
qiUindJ
.
y
+
qiUindI
.
y
*
qiUinpJ
.
y
+
qiUinpI
.
z
*
qiUindJ
.
z
+
qiUindI
.
z
*
qiUinpJ
.
z
);
fIZ
+=
dCoef
*
(
qiUinpI
.
y
*
qiUindJ
.
y
+
qiUindI
.
y
*
qiUinpJ
.
y
+
qiUinpI
.
z
*
qiUindJ
.
z
+
qiUindI
.
z
*
qiUinpJ
.
z
);
f
I
Z
+=
dCoef
*
(
qiUinpJ
.
y
*
qiUindI
.
y
+
qiUindJ
.
y
*
qiUinpI
.
y
+
qiUinpJ
.
z
*
qiUindI
.
z
+
qiUindJ
.
z
*
qiUinpI
.
z
);
f
J
Z
+=
dCoef
*
(
qiUinpJ
.
y
*
qiUindI
.
y
+
qiUindJ
.
y
*
qiUinpI
.
y
+
qiUinpJ
.
z
*
qiUindI
.
z
+
qiUindJ
.
z
*
qiUinpI
.
z
);
#endif
#endif
// The quasi-internal frame forces and torques. Note that the induced torque intermediates are
// The quasi-internal frame forces and torques. Note that the induced torque intermediates are
...
@@ -545,7 +545,7 @@ extern "C" __global__ void computeElectrostatics(
...
@@ -545,7 +545,7 @@ extern "C" __global__ void computeElectrostatics(
data
.
force
=
make_real3
(
0
);
data
.
force
=
make_real3
(
0
);
data
.
torque
=
make_real3
(
0
);
data
.
torque
=
make_real3
(
0
);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
j
=
(
numTiles
<=
maxTiles
?
interactingAtoms
[
pos
*
TILE_SIZE
+
tgx
]
:
y
*
TILE_SIZE
+
tgx
)
;
unsigned
int
j
=
interactingAtoms
[
pos
*
TILE_SIZE
+
tgx
];
#else
#else
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoleFixedField.cu
View file @
047934e2
...
@@ -655,7 +655,7 @@ extern "C" __global__ void computeFixedField(
...
@@ -655,7 +655,7 @@ extern "C" __global__ void computeFixedField(
data
.
bornRadius
=
bornRadii
[
atom1
];
data
.
bornRadius
=
bornRadii
[
atom1
];
#endif
#endif
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
j
=
(
numTiles
<=
maxTiles
?
interactingAtoms
[
pos
*
TILE_SIZE
+
tgx
]
:
y
*
TILE_SIZE
+
tgx
)
;
unsigned
int
j
=
interactingAtoms
[
pos
*
TILE_SIZE
+
tgx
];
#else
#else
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
View file @
047934e2
...
@@ -514,7 +514,7 @@ extern "C" __global__ void computeInducedField(
...
@@ -514,7 +514,7 @@ extern "C" __global__ void computeInducedField(
loadAtomData
(
data
,
atom1
,
posq
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
loadAtomData
(
data
,
atom1
,
posq
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
#endif
#endif
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
j
=
(
numTiles
<=
maxTiles
?
interactingAtoms
[
pos
*
TILE_SIZE
+
tgx
]
:
y
*
TILE_SIZE
+
tgx
)
;
unsigned
int
j
=
interactingAtoms
[
pos
*
TILE_SIZE
+
tgx
];
#else
#else
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
...
@@ -607,11 +607,6 @@ extern "C" __global__ void recordInducedDipolesForDIIS(const long long* __restri
...
@@ -607,11 +607,6 @@ extern "C" __global__ void recordInducedDipolesForDIIS(const long long* __restri
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float
*
__restrict__
polarizability
,
float2
*
__restrict__
errors
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float
*
__restrict__
polarizability
,
float2
*
__restrict__
errors
,
real
*
__restrict__
prevDipoles
,
real
*
__restrict__
prevDipolesPolar
,
real
*
__restrict__
prevErrors
,
int
iteration
,
bool
recordPrevErrors
,
real
*
__restrict__
matrix
)
{
real
*
__restrict__
prevDipoles
,
real
*
__restrict__
prevDipolesPolar
,
real
*
__restrict__
prevErrors
,
int
iteration
,
bool
recordPrevErrors
,
real
*
__restrict__
matrix
)
{
extern
__shared__
real2
buffer
[];
extern
__shared__
real2
buffer
[];
#ifdef USE_EWALD
const
real
ewaldScale
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
;
#else
const
real
ewaldScale
=
0
;
#endif
const
real
fieldScale
=
1
/
(
real
)
0x100000000
;
const
real
fieldScale
=
1
/
(
real
)
0x100000000
;
real
sumErrors
=
0
;
real
sumErrors
=
0
;
real
sumPolarErrors
=
0
;
real
sumPolarErrors
=
0
;
...
@@ -699,6 +694,126 @@ extern "C" __global__ void computeDIISMatrix(real* __restrict__ prevErrors, int
...
@@ -699,6 +694,126 @@ extern "C" __global__ void computeDIISMatrix(real* __restrict__ prevErrors, int
}
}
}
}
extern
"C"
__global__
void
solveDIISMatrix
(
int
iteration
,
const
real
*
__restrict__
matrix
,
float
*
__restrict__
coefficients
)
{
__shared__
real
b
[
MAX_PREV_DIIS_DIPOLES
+
1
][
MAX_PREV_DIIS_DIPOLES
+
1
];
__shared__
real
piv
[
MAX_PREV_DIIS_DIPOLES
+
1
];
__shared__
real
x
[
MAX_PREV_DIIS_DIPOLES
+
1
];
// On the first iteration we don't need to do any calculation.
if
(
iteration
==
0
)
{
if
(
threadIdx
.
x
==
0
)
coefficients
[
0
]
=
1
;
return
;
}
// Load the matrix.
int
numPrev
=
min
(
iteration
+
1
,
MAX_PREV_DIIS_DIPOLES
);
int
rank
=
numPrev
+
1
;
for
(
int
index
=
threadIdx
.
x
;
index
<
numPrev
*
numPrev
;
index
+=
blockDim
.
x
)
{
int
i
=
index
/
numPrev
;
int
j
=
index
-
i
*
numPrev
;
b
[
i
+
1
][
j
+
1
]
=
matrix
[
i
*
MAX_PREV_DIIS_DIPOLES
+
j
];
}
for
(
int
i
=
threadIdx
.
x
;
i
<
rank
;
i
+=
blockDim
.
x
)
{
b
[
i
][
0
]
=
-
1
;
piv
[
i
]
=
i
;
}
__syncthreads
();
// Compute the mean absolute value of the values we just loaded. We use that for preconditioning it,
// which is essential for doing the computation in single precision.
if
(
threadIdx
.
x
==
0
)
{
real
mean
=
0
;
for
(
int
i
=
0
;
i
<
numPrev
;
i
++
)
for
(
int
j
=
0
;
j
<
numPrev
;
j
++
)
mean
+=
fabs
(
b
[
i
+
1
][
j
+
1
]);
mean
/=
numPrev
*
numPrev
;
b
[
0
][
0
]
=
0
;
for
(
int
i
=
1
;
i
<
rank
;
i
++
)
b
[
0
][
i
]
=
-
mean
;
// Compute the LU decomposition of the matrix. This code is adapted from JAMA.
int
pivsign
=
1
;
for
(
int
j
=
0
;
j
<
rank
;
j
++
)
{
// Apply previous transformations.
for
(
int
i
=
0
;
i
<
rank
;
i
++
)
{
// Most of the time is spent in the following dot product.
int
kmax
=
min
(
i
,
j
);
real
s
=
0
;
for
(
int
k
=
0
;
k
<
kmax
;
k
++
)
s
+=
b
[
i
][
k
]
*
b
[
k
][
j
];
b
[
i
][
j
]
-=
s
;
}
// Find pivot and exchange if necessary.
int
p
=
j
;
for
(
int
i
=
j
+
1
;
i
<
rank
;
i
++
)
if
(
abs
(
b
[
i
][
j
])
>
abs
(
b
[
p
][
j
]))
p
=
i
;
if
(
p
!=
j
)
{
int
k
=
0
;
for
(
k
=
0
;
k
<
rank
;
k
++
)
{
real
t
=
b
[
p
][
k
];
b
[
p
][
k
]
=
b
[
j
][
k
];
b
[
j
][
k
]
=
t
;
}
k
=
piv
[
p
];
piv
[
p
]
=
piv
[
j
];
piv
[
j
]
=
k
;
pivsign
=
-
pivsign
;
}
// Compute multipliers.
if
((
j
<
rank
)
&&
(
b
[
j
][
j
]
!=
0
))
for
(
int
i
=
j
+
1
;
i
<
rank
;
i
++
)
b
[
i
][
j
]
/=
b
[
j
][
j
];
}
for
(
int
i
=
0
;
i
<
rank
;
i
++
)
if
(
b
[
i
][
i
]
==
0
)
{
// The matrix is singular.
for
(
int
j
=
0
;
j
<
rank
-
1
;
j
++
)
coefficients
[
j
]
=
0
;
coefficients
[
rank
-
1
]
=
1
;
return
;
}
// Solve b*Y = X(piv)
for
(
int
i
=
0
;
i
<
rank
;
i
++
)
x
[
i
]
=
(
piv
[
i
]
==
0
?
-
1
:
0
);
for
(
int
k
=
0
;
k
<
rank
;
k
++
)
for
(
int
i
=
k
+
1
;
i
<
rank
;
i
++
)
x
[
i
]
-=
x
[
k
]
*
b
[
i
][
k
];
// Solve U*X = Y;
for
(
int
k
=
rank
-
1
;
k
>=
0
;
k
--
)
{
x
[
k
]
/=
b
[
k
][
k
];
for
(
int
i
=
0
;
i
<
k
;
i
++
)
x
[
i
]
-=
x
[
k
]
*
b
[
i
][
k
];
}
// Record the coefficients.
real
lastCoeff
=
1
;
for
(
int
i
=
0
;
i
<
rank
-
1
;
i
++
)
{
real
c
=
x
[
i
+
1
]
*
mean
;
coefficients
[
i
]
=
c
;
lastCoeff
-=
c
;
}
coefficients
[
rank
-
1
]
=
lastCoeff
;
}
}
extern
"C"
__global__
void
updateInducedFieldByDIIS
(
real
*
__restrict__
inducedDipole
,
real
*
__restrict__
inducedDipolePolar
,
extern
"C"
__global__
void
updateInducedFieldByDIIS
(
real
*
__restrict__
inducedDipole
,
real
*
__restrict__
inducedDipolePolar
,
const
real
*
__restrict__
prevDipoles
,
const
real
*
__restrict__
prevDipolesPolar
,
const
float
*
__restrict__
coefficients
,
int
numPrev
)
{
const
real
*
__restrict__
prevDipoles
,
const
real
*
__restrict__
prevDipolesPolar
,
const
float
*
__restrict__
coefficients
,
int
numPrev
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
3
*
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
3
*
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
View file @
047934e2
...
@@ -69,47 +69,6 @@ __device__ void computeBSplinePoint(real4* thetai, real w, real* array) {
...
@@ -69,47 +69,6 @@ __device__ void computeBSplinePoint(real4* thetai, real w, real* array) {
thetai
[
i
-
1
]
=
make_real4
(
ARRAY
(
PME_ORDER
,
i
),
ARRAY
(
PME_ORDER
-
1
,
i
),
ARRAY
(
PME_ORDER
-
2
,
i
),
ARRAY
(
PME_ORDER
-
3
,
i
));
thetai
[
i
-
1
]
=
make_real4
(
ARRAY
(
PME_ORDER
,
i
),
ARRAY
(
PME_ORDER
-
1
,
i
),
ARRAY
(
PME_ORDER
-
2
,
i
),
ARRAY
(
PME_ORDER
-
3
,
i
));
}
}
/**
* Compute the index of the grid point each atom is associated with.
*/
extern
"C"
__global__
void
findAtomGridIndex
(
const
real4
*
__restrict__
posq
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
pos
=
posq
[
i
];
pos
-=
periodicBoxVecZ
*
floor
(
pos
.
z
*
recipBoxVecZ
.
z
+
0.5
f
);
pos
-=
periodicBoxVecY
*
floor
(
pos
.
y
*
recipBoxVecY
.
z
+
0.5
f
);
pos
-=
periodicBoxVecX
*
floor
(
pos
.
x
*
recipBoxVecX
.
z
+
0.5
f
);
// First axis.
real
w
=
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
;
real
fr
=
GRID_SIZE_X
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
int
ifr
=
(
int
)
fr
;
int
igrid1
=
ifr
-
PME_ORDER
+
1
;
// Second axis.
w
=
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
;
fr
=
GRID_SIZE_Y
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
fr
;
int
igrid2
=
ifr
-
PME_ORDER
+
1
;
// Third axis.
w
=
pos
.
z
*
recipBoxVecZ
.
z
;
fr
=
GRID_SIZE_Z
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
fr
;
int
igrid3
=
ifr
-
PME_ORDER
+
1
;
// Record the grid point.
igrid1
+=
(
igrid1
<
0
?
GRID_SIZE_X
:
0
);
igrid2
+=
(
igrid2
<
0
?
GRID_SIZE_Y
:
0
);
igrid3
+=
(
igrid3
<
0
?
GRID_SIZE_Z
:
0
);
pmeAtomGridIndex
[
i
]
=
make_int2
(
i
,
igrid1
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
igrid2
*
GRID_SIZE_Z
+
igrid3
);
}
}
/**
/**
* Convert the fixed multipoles from Cartesian to fractional coordinates.
* Convert the fixed multipoles from Cartesian to fractional coordinates.
*/
*/
...
@@ -209,7 +168,7 @@ extern "C" __global__ void transformPotentialToCartesianCoordinates(const real*
...
@@ -209,7 +168,7 @@ extern "C" __global__ void transformPotentialToCartesianCoordinates(const real*
}
}
extern
"C"
__global__
void
gridSpreadFixedMultipoles
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
fracDipole
,
extern
"C"
__global__
void
gridSpreadFixedMultipoles
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
real2
*
__restrict__
pmeGrid
,
int2
*
__restrict__
pmeAtomGridIndex
,
const
real
*
__restrict__
fracQuadrupole
,
real2
*
__restrict__
pmeGrid
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
#if __CUDA_ARCH__ < 500
#if __CUDA_ARCH__ < 500
real
array
[
PME_ORDER
*
PME_ORDER
];
real
array
[
PME_ORDER
*
PME_ORDER
];
...
@@ -300,7 +259,7 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
...
@@ -300,7 +259,7 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
}
}
extern
"C"
__global__
void
gridSpreadInducedDipoles
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
inducedDipole
,
extern
"C"
__global__
void
gridSpreadInducedDipoles
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
real2
*
__restrict__
pmeGrid
,
int2
*
__restrict__
pmeAtomGridIndex
,
const
real
*
__restrict__
inducedDipolePolar
,
real2
*
__restrict__
pmeGrid
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
#if __CUDA_ARCH__ < 500
#if __CUDA_ARCH__ < 500
real
array
[
PME_ORDER
*
PME_ORDER
];
real
array
[
PME_ORDER
*
PME_ORDER
];
...
@@ -451,7 +410,7 @@ extern "C" __global__ void reciprocalConvolution(real2* __restrict__ pmeGrid, co
...
@@ -451,7 +410,7 @@ extern "C" __global__ void reciprocalConvolution(real2* __restrict__ pmeGrid, co
extern
"C"
__global__
void
computeFixedPotentialFromGrid
(
const
real2
*
__restrict__
pmeGrid
,
real
*
__restrict__
phi
,
extern
"C"
__global__
void
computeFixedPotentialFromGrid
(
const
real2
*
__restrict__
pmeGrid
,
real
*
__restrict__
phi
,
long
long
*
__restrict__
fieldBuffers
,
long
long
*
__restrict__
fieldPolarBuffers
,
const
real4
*
__restrict__
posq
,
long
long
*
__restrict__
fieldBuffers
,
long
long
*
__restrict__
fieldPolarBuffers
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
labFrameDipole
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
const
real
*
__restrict__
labFrameDipole
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
#if __CUDA_ARCH__ < 500
#if __CUDA_ARCH__ < 500
real
array
[
PME_ORDER
*
PME_ORDER
];
real
array
[
PME_ORDER
*
PME_ORDER
];
#else
#else
...
@@ -476,11 +435,7 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
...
@@ -476,11 +435,7 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
}
}
__syncthreads
();
__syncthreads
();
// Process the atoms in spatially sorted order. This improves cache performance when loading
for
(
int
m
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
m
<
NUM_ATOMS
;
m
+=
blockDim
.
x
*
gridDim
.
x
)
{
// the grid values.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
m
=
pmeAtomGridIndex
[
i
].
x
;
real4
pos
=
posq
[
m
];
real4
pos
=
posq
[
m
];
pos
-=
periodicBoxVecZ
*
floor
(
pos
.
z
*
recipBoxVecZ
.
z
+
0.5
f
);
pos
-=
periodicBoxVecZ
*
floor
(
pos
.
z
*
recipBoxVecZ
.
z
+
0.5
f
);
pos
-=
periodicBoxVecY
*
floor
(
pos
.
y
*
recipBoxVecY
.
z
+
0.5
f
);
pos
-=
periodicBoxVecY
*
floor
(
pos
.
y
*
recipBoxVecY
.
z
+
0.5
f
);
...
@@ -533,9 +488,9 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
...
@@ -533,9 +488,9 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
real
tuv102
=
0
;
real
tuv102
=
0
;
real
tuv012
=
0
;
real
tuv012
=
0
;
real
tuv111
=
0
;
real
tuv111
=
0
;
for
(
int
i
z
=
0
;
i
z
<
PME_ORDER
;
i
z
++
)
{
for
(
int
i
x
=
0
;
i
x
<
PME_ORDER
;
i
x
++
)
{
int
k
=
igrid
3
+
i
z
-
(
igrid
3
+
i
z
>=
GRID_SIZE_
Z
?
GRID_SIZE_
Z
:
0
);
int
i
=
igrid
1
+
i
x
-
(
igrid
1
+
i
x
>=
GRID_SIZE_
X
?
GRID_SIZE_
X
:
0
);
real4
v
=
theta
3
[
i
z
];
real4
v
=
theta
1
[
i
x
];
real
tu00
=
0
;
real
tu00
=
0
;
real
tu10
=
0
;
real
tu10
=
0
;
real
tu01
=
0
;
real
tu01
=
0
;
...
@@ -550,47 +505,47 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
...
@@ -550,47 +505,47 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
int
j
=
igrid2
+
iy
-
(
igrid2
+
iy
>=
GRID_SIZE_Y
?
GRID_SIZE_Y
:
0
);
int
j
=
igrid2
+
iy
-
(
igrid2
+
iy
>=
GRID_SIZE_Y
?
GRID_SIZE_Y
:
0
);
real4
u
=
theta2
[
iy
];
real4
u
=
theta2
[
iy
];
real4
t
=
make_real4
(
0
,
0
,
0
,
0
);
real4
t
=
make_real4
(
0
,
0
,
0
,
0
);
for
(
int
i
x
=
0
;
i
x
<
PME_ORDER
;
i
x
++
)
{
for
(
int
i
z
=
0
;
i
z
<
PME_ORDER
;
i
z
++
)
{
int
i
=
igrid
1
+
i
x
-
(
igrid
1
+
i
x
>=
GRID_SIZE_
X
?
GRID_SIZE_
X
:
0
);
int
k
=
igrid
3
+
i
z
-
(
igrid
3
+
i
z
>=
GRID_SIZE_
Z
?
GRID_SIZE_
Z
:
0
);
int
gridIndex
=
i
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
j
*
GRID_SIZE_Z
+
k
;
int
gridIndex
=
i
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
j
*
GRID_SIZE_Z
+
k
;
real
tq
=
pmeGrid
[
gridIndex
].
x
;
real
tq
=
pmeGrid
[
gridIndex
].
x
;
real4
tadd
=
theta
1
[
i
x
];
real4
tadd
=
theta
3
[
i
z
];
t
.
x
+=
tq
*
tadd
.
x
;
t
.
x
+=
tq
*
tadd
.
x
;
t
.
y
+=
tq
*
tadd
.
y
;
t
.
y
+=
tq
*
tadd
.
y
;
t
.
z
+=
tq
*
tadd
.
z
;
t
.
z
+=
tq
*
tadd
.
z
;
t
.
w
+=
tq
*
tadd
.
w
;
t
.
w
+=
tq
*
tadd
.
w
;
}
}
tu00
+=
t
.
x
*
u
.
x
;
tu00
+=
u
.
x
*
t
.
x
;
tu10
+=
t
.
y
*
u
.
x
;
tu10
+=
u
.
y
*
t
.
x
;
tu01
+=
t
.
x
*
u
.
y
;
tu01
+=
u
.
x
*
t
.
y
;
tu20
+=
t
.
z
*
u
.
x
;
tu20
+=
u
.
z
*
t
.
x
;
tu11
+=
t
.
y
*
u
.
y
;
tu11
+=
u
.
y
*
t
.
y
;
tu02
+=
t
.
x
*
u
.
z
;
tu02
+=
u
.
x
*
t
.
z
;
tu30
+=
t
.
w
*
u
.
x
;
tu30
+=
u
.
w
*
t
.
x
;
tu21
+=
t
.
z
*
u
.
y
;
tu21
+=
u
.
z
*
t
.
y
;
tu12
+=
t
.
y
*
u
.
z
;
tu12
+=
u
.
y
*
t
.
z
;
tu03
+=
t
.
x
*
u
.
w
;
tu03
+=
u
.
x
*
t
.
w
;
}
}
tuv000
+=
tu00
*
v
.
x
;
tuv000
+=
v
.
x
*
tu00
;
tuv100
+=
tu10
*
v
.
x
;
tuv100
+=
v
.
y
*
tu00
;
tuv010
+=
tu01
*
v
.
x
;
tuv010
+=
v
.
x
*
tu10
;
tuv001
+=
tu00
*
v
.
y
;
tuv001
+=
v
.
x
*
tu01
;
tuv200
+=
tu20
*
v
.
x
;
tuv200
+=
v
.
z
*
tu00
;
tuv020
+=
tu02
*
v
.
x
;
tuv020
+=
v
.
x
*
tu20
;
tuv002
+=
tu00
*
v
.
z
;
tuv002
+=
v
.
x
*
tu02
;
tuv110
+=
tu11
*
v
.
x
;
tuv110
+=
v
.
y
*
tu10
;
tuv101
+=
tu10
*
v
.
y
;
tuv101
+=
v
.
y
*
tu01
;
tuv011
+=
tu01
*
v
.
y
;
tuv011
+=
v
.
x
*
tu11
;
tuv300
+=
tu30
*
v
.
x
;
tuv300
+=
v
.
w
*
tu00
;
tuv030
+=
tu03
*
v
.
x
;
tuv030
+=
v
.
x
*
tu30
;
tuv003
+=
tu00
*
v
.
w
;
tuv003
+=
v
.
x
*
tu03
;
tuv210
+=
tu21
*
v
.
x
;
tuv210
+=
v
.
z
*
tu10
;
tuv201
+=
tu20
*
v
.
y
;
tuv201
+=
v
.
z
*
tu01
;
tuv120
+=
tu12
*
v
.
x
;
tuv120
+=
v
.
y
*
tu20
;
tuv021
+=
tu02
*
v
.
y
;
tuv021
+=
v
.
x
*
tu21
;
tuv102
+=
tu10
*
v
.
z
;
tuv102
+=
v
.
y
*
tu02
;
tuv012
+=
tu01
*
v
.
z
;
tuv012
+=
v
.
x
*
tu12
;
tuv111
+=
tu11
*
v
.
y
;
tuv111
+=
v
.
y
*
tu11
;
}
}
phi
[
m
]
=
tuv000
;
phi
[
m
]
=
tuv000
;
phi
[
m
+
NUM_ATOMS
]
=
tuv100
;
phi
[
m
+
NUM_ATOMS
]
=
tuv100
;
...
@@ -628,7 +583,7 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
...
@@ -628,7 +583,7 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
extern
"C"
__global__
void
computeInducedPotentialFromGrid
(
const
real2
*
__restrict__
pmeGrid
,
real
*
__restrict__
phid
,
extern
"C"
__global__
void
computeInducedPotentialFromGrid
(
const
real2
*
__restrict__
pmeGrid
,
real
*
__restrict__
phid
,
real
*
__restrict__
phip
,
real
*
__restrict__
phidp
,
const
real4
*
__restrict__
posq
,
real
*
__restrict__
phip
,
real
*
__restrict__
phidp
,
const
real4
*
__restrict__
posq
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
#if __CUDA_ARCH__ < 500
#if __CUDA_ARCH__ < 500
real
array
[
PME_ORDER
*
PME_ORDER
];
real
array
[
PME_ORDER
*
PME_ORDER
];
#else
#else
...
@@ -640,11 +595,7 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
...
@@ -640,11 +595,7 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
real4
theta2
[
PME_ORDER
];
real4
theta2
[
PME_ORDER
];
real4
theta3
[
PME_ORDER
];
real4
theta3
[
PME_ORDER
];
// Process the atoms in spatially sorted order. This improves cache performance when loading
for
(
int
m
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
m
<
NUM_ATOMS
;
m
+=
blockDim
.
x
*
gridDim
.
x
)
{
// the grid values.
for
(
int
atom
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
atom
<
NUM_ATOMS
;
atom
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
m
=
pmeAtomGridIndex
[
atom
].
x
;
real4
pos
=
posq
[
m
];
real4
pos
=
posq
[
m
];
pos
-=
periodicBoxVecZ
*
floor
(
pos
.
z
*
recipBoxVecZ
.
z
+
0.5
f
);
pos
-=
periodicBoxVecZ
*
floor
(
pos
.
z
*
recipBoxVecZ
.
z
+
0.5
f
);
pos
-=
periodicBoxVecY
*
floor
(
pos
.
y
*
recipBoxVecY
.
z
+
0.5
f
);
pos
-=
periodicBoxVecY
*
floor
(
pos
.
y
*
recipBoxVecY
.
z
+
0.5
f
);
...
@@ -715,9 +666,9 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
...
@@ -715,9 +666,9 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
real
tuv102
=
0
;
real
tuv102
=
0
;
real
tuv012
=
0
;
real
tuv012
=
0
;
real
tuv111
=
0
;
real
tuv111
=
0
;
for
(
int
i
z
=
0
;
i
z
<
PME_ORDER
;
i
z
++
)
{
for
(
int
i
x
=
0
;
i
x
<
PME_ORDER
;
i
x
++
)
{
int
k
=
igrid
3
+
i
z
-
(
igrid
3
+
i
z
>=
GRID_SIZE_
Z
?
GRID_SIZE_
Z
:
0
);
int
i
=
igrid
1
+
i
x
-
(
igrid
1
+
i
x
>=
GRID_SIZE_
X
?
GRID_SIZE_
X
:
0
);
real4
v
=
theta
3
[
i
z
];
real4
v
=
theta
1
[
i
x
];
real
tu00_1
=
0
;
real
tu00_1
=
0
;
real
tu01_1
=
0
;
real
tu01_1
=
0
;
real
tu10_1
=
0
;
real
tu10_1
=
0
;
...
@@ -750,11 +701,11 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
...
@@ -750,11 +701,11 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
real
t1_2
=
0
;
real
t1_2
=
0
;
real
t2_2
=
0
;
real
t2_2
=
0
;
real
t3
=
0
;
real
t3
=
0
;
for
(
int
i
x
=
0
;
i
x
<
PME_ORDER
;
i
x
++
)
{
for
(
int
i
z
=
0
;
i
z
<
PME_ORDER
;
i
z
++
)
{
int
i
=
igrid
1
+
i
x
-
(
igrid
1
+
i
x
>=
GRID_SIZE_
X
?
GRID_SIZE_
X
:
0
);
int
k
=
igrid
3
+
i
z
-
(
igrid
3
+
i
z
>=
GRID_SIZE_
Z
?
GRID_SIZE_
Z
:
0
);
int
gridIndex
=
i
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
j
*
GRID_SIZE_Z
+
k
;
int
gridIndex
=
i
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
j
*
GRID_SIZE_Z
+
k
;
real2
tq
=
pmeGrid
[
gridIndex
];
real2
tq
=
pmeGrid
[
gridIndex
];
real4
tadd
=
theta
1
[
i
x
];
real4
tadd
=
theta
3
[
i
z
];
t0_1
+=
tq
.
x
*
tadd
.
x
;
t0_1
+=
tq
.
x
*
tadd
.
x
;
t1_1
+=
tq
.
x
*
tadd
.
y
;
t1_1
+=
tq
.
x
*
tadd
.
y
;
t2_1
+=
tq
.
x
*
tadd
.
z
;
t2_1
+=
tq
.
x
*
tadd
.
z
;
...
@@ -763,70 +714,70 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
...
@@ -763,70 +714,70 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
t2_2
+=
tq
.
y
*
tadd
.
z
;
t2_2
+=
tq
.
y
*
tadd
.
z
;
t3
+=
(
tq
.
x
+
tq
.
y
)
*
tadd
.
w
;
t3
+=
(
tq
.
x
+
tq
.
y
)
*
tadd
.
w
;
}
}
tu00_1
+=
t0_1
*
u
.
x
;
tu00_1
+=
u
.
x
*
t0_1
;
tu10_1
+=
t1_1
*
u
.
x
;
tu10_1
+=
u
.
y
*
t0_1
;
tu01_1
+=
t0_1
*
u
.
y
;
tu01_1
+=
u
.
x
*
t1_1
;
tu20_1
+=
t2_1
*
u
.
x
;
tu20_1
+=
u
.
z
*
t0_1
;
tu11_1
+=
t1_1
*
u
.
y
;
tu11_1
+=
u
.
y
*
t1_1
;
tu02_1
+=
t0_1
*
u
.
z
;
tu02_1
+=
u
.
x
*
t2_1
;
tu00_2
+=
t0_2
*
u
.
x
;
tu00_2
+=
u
.
x
*
t0_2
;
tu10_2
+=
t1_2
*
u
.
x
;
tu10_2
+=
u
.
y
*
t0_2
;
tu01_2
+=
t0_2
*
u
.
y
;
tu01_2
+=
u
.
x
*
t1_2
;
tu20_2
+=
t2_2
*
u
.
x
;
tu20_2
+=
u
.
z
*
t0_2
;
tu11_2
+=
t1_2
*
u
.
y
;
tu11_2
+=
u
.
y
*
t1_2
;
tu02_2
+=
t0_2
*
u
.
z
;
tu02_2
+=
u
.
x
*
t2_2
;
real
t0
=
t0_1
+
t0_2
;
real
t0
=
t0_1
+
t0_2
;
real
t1
=
t1_1
+
t1_2
;
real
t1
=
t1_1
+
t1_2
;
real
t2
=
t2_1
+
t2_2
;
real
t2
=
t2_1
+
t2_2
;
tu00
+=
t0
*
u
.
x
;
tu00
+=
u
.
x
*
t0
;
tu10
+=
t1
*
u
.
x
;
tu10
+=
u
.
y
*
t0
;
tu01
+=
t0
*
u
.
y
;
tu01
+=
u
.
x
*
t1
;
tu20
+=
t2
*
u
.
x
;
tu20
+=
u
.
z
*
t0
;
tu11
+=
t1
*
u
.
y
;
tu11
+=
u
.
y
*
t1
;
tu02
+=
t0
*
u
.
z
;
tu02
+=
u
.
x
*
t2
;
tu30
+=
t3
*
u
.
x
;
tu30
+=
u
.
w
*
t0
;
tu21
+=
t2
*
u
.
y
;
tu21
+=
u
.
z
*
t1
;
tu12
+=
t1
*
u
.
z
;
tu12
+=
u
.
y
*
t2
;
tu03
+=
t0
*
u
.
w
;
tu03
+=
u
.
x
*
t3
;
}
}
tuv100_1
+=
tu
1
0_1
*
v
.
x
;
tuv100_1
+=
v
.
y
*
tu
0
0_1
;
tuv010_1
+=
tu01_1
*
v
.
x
;
tuv010_1
+=
v
.
x
*
tu10_1
;
tuv001_1
+=
tu0
0
_1
*
v
.
y
;
tuv001_1
+=
v
.
x
*
tu0
1
_1
;
tuv200_1
+=
tu
2
0_1
*
v
.
x
;
tuv200_1
+=
v
.
z
*
tu
0
0_1
;
tuv020_1
+=
tu02_1
*
v
.
x
;
tuv020_1
+=
v
.
x
*
tu20_1
;
tuv002_1
+=
tu0
0
_1
*
v
.
z
;
tuv002_1
+=
v
.
x
*
tu0
2
_1
;
tuv110_1
+=
tu1
1
_1
*
v
.
x
;
tuv110_1
+=
v
.
y
*
tu1
0
_1
;
tuv101_1
+=
tu10_1
*
v
.
y
;
tuv101_1
+=
v
.
y
*
tu01_1
;
tuv011_1
+=
tu
0
1_1
*
v
.
y
;
tuv011_1
+=
v
.
x
*
tu
1
1_1
;
tuv100_2
+=
tu
1
0_2
*
v
.
x
;
tuv100_2
+=
v
.
y
*
tu
0
0_2
;
tuv010_2
+=
tu01_2
*
v
.
x
;
tuv010_2
+=
v
.
x
*
tu10_2
;
tuv001_2
+=
tu0
0
_2
*
v
.
y
;
tuv001_2
+=
v
.
x
*
tu0
1
_2
;
tuv200_2
+=
tu
2
0_2
*
v
.
x
;
tuv200_2
+=
v
.
z
*
tu
0
0_2
;
tuv020_2
+=
tu02_2
*
v
.
x
;
tuv020_2
+=
v
.
x
*
tu20_2
;
tuv002_2
+=
tu0
0
_2
*
v
.
z
;
tuv002_2
+=
v
.
x
*
tu0
2
_2
;
tuv110_2
+=
tu1
1
_2
*
v
.
x
;
tuv110_2
+=
v
.
y
*
tu1
0
_2
;
tuv101_2
+=
tu10_2
*
v
.
y
;
tuv101_2
+=
v
.
y
*
tu01_2
;
tuv011_2
+=
tu
0
1_2
*
v
.
y
;
tuv011_2
+=
v
.
x
*
tu
1
1_2
;
tuv000
+=
tu00
*
v
.
x
;
tuv000
+=
v
.
x
*
tu00
;
tuv100
+=
tu10
*
v
.
x
;
tuv100
+=
v
.
y
*
tu00
;
tuv010
+=
tu01
*
v
.
x
;
tuv010
+=
v
.
x
*
tu10
;
tuv001
+=
tu00
*
v
.
y
;
tuv001
+=
v
.
x
*
tu01
;
tuv200
+=
tu20
*
v
.
x
;
tuv200
+=
v
.
z
*
tu00
;
tuv020
+=
tu02
*
v
.
x
;
tuv020
+=
v
.
x
*
tu20
;
tuv002
+=
tu00
*
v
.
z
;
tuv002
+=
v
.
x
*
tu02
;
tuv110
+=
tu11
*
v
.
x
;
tuv110
+=
v
.
y
*
tu10
;
tuv101
+=
tu10
*
v
.
y
;
tuv101
+=
v
.
y
*
tu01
;
tuv011
+=
tu01
*
v
.
y
;
tuv011
+=
v
.
x
*
tu11
;
tuv300
+=
tu30
*
v
.
x
;
tuv300
+=
v
.
w
*
tu00
;
tuv030
+=
tu03
*
v
.
x
;
tuv030
+=
v
.
x
*
tu30
;
tuv003
+=
tu00
*
v
.
w
;
tuv003
+=
v
.
x
*
tu03
;
tuv210
+=
tu21
*
v
.
x
;
tuv210
+=
v
.
z
*
tu10
;
tuv201
+=
tu20
*
v
.
y
;
tuv201
+=
v
.
z
*
tu01
;
tuv120
+=
tu12
*
v
.
x
;
tuv120
+=
v
.
y
*
tu20
;
tuv021
+=
tu02
*
v
.
y
;
tuv021
+=
v
.
x
*
tu21
;
tuv102
+=
tu10
*
v
.
z
;
tuv102
+=
v
.
y
*
tu02
;
tuv012
+=
tu01
*
v
.
z
;
tuv012
+=
v
.
x
*
tu12
;
tuv111
+=
tu11
*
v
.
y
;
tuv111
+=
v
.
y
*
tu11
;
}
}
phid
[
m
]
=
0
;
phid
[
m
]
=
0
;
phid
[
m
+
NUM_ATOMS
]
=
tuv100_1
;
phid
[
m
+
NUM_ATOMS
]
=
tuv100_1
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
View file @
047934e2
...
@@ -24,13 +24,19 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
...
@@ -24,13 +24,19 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
// code common to ZThenX and Bisector
// code common to ZThenX and Bisector
int4
particles
=
multipoleParticles
[
atom
];
int4
particles
=
multipoleParticles
[
atom
];
if
(
particles
.
x
>=
0
&&
particles
.
z
>=
0
)
{
if
(
particles
.
z
>=
0
)
{
real4
thisParticlePos
=
posq
[
atom
];
real4
thisParticlePos
=
posq
[
atom
];
real4
posZ
=
posq
[
particles
.
z
];
real4
posZ
=
posq
[
particles
.
z
];
real3
vectorZ
=
make_real3
(
posZ
.
x
-
thisParticlePos
.
x
,
posZ
.
y
-
thisParticlePos
.
y
,
posZ
.
z
-
thisParticlePos
.
z
);
real3
vectorZ
=
make_real3
(
posZ
.
x
-
thisParticlePos
.
x
,
posZ
.
y
-
thisParticlePos
.
y
,
posZ
.
z
-
thisParticlePos
.
z
);
real4
posX
=
posq
[
particles
.
x
];
real3
vectorX
=
make_real3
(
posX
.
x
-
thisParticlePos
.
x
,
posX
.
y
-
thisParticlePos
.
y
,
posX
.
z
-
thisParticlePos
.
z
);
int
axisType
=
particles
.
w
;
int
axisType
=
particles
.
w
;
real4
posX
;
real3
vectorX
;
if
(
axisType
>=
4
)
vectorX
=
make_real3
((
real
)
0.1
f
);
else
{
posX
=
posq
[
particles
.
x
];
vectorX
=
make_real3
(
posX
.
x
-
thisParticlePos
.
x
,
posX
.
y
-
thisParticlePos
.
y
,
posX
.
z
-
thisParticlePos
.
z
);
}
/*
/*
z-only
z-only
...
@@ -108,8 +114,6 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
...
@@ -108,8 +114,6 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
}
}
}
}
else
if
(
axisType
>=
4
)
vectorX
=
make_real3
((
real
)
0.1
f
);
// x = x - (x.z)z
// x = x - (x.z)z
...
@@ -137,7 +141,7 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
...
@@ -137,7 +141,7 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
// Check the chirality and see whether it needs to be reversed
// Check the chirality and see whether it needs to be reversed
bool
reverse
=
false
;
bool
reverse
=
false
;
if
(
axisType
!
=
0
&&
particles
.
x
>=
0
&&
particles
.
y
>=
0
&&
particles
.
z
>=
0
)
{
if
(
axisType
=
=
0
&&
particles
.
x
>=
0
&&
particles
.
y
>=
0
&&
particles
.
z
>=
0
)
{
real4
posY
=
posq
[
particles
.
y
];
real4
posY
=
posq
[
particles
.
y
];
real
delta
[
4
][
3
];
real
delta
[
4
][
3
];
...
...
plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
View file @
047934e2
...
@@ -612,7 +612,7 @@ extern "C" __global__ void computeElectrostatics(
...
@@ -612,7 +612,7 @@ extern "C" __global__ void computeElectrostatics(
data
.
force
=
make_real3
(
0
);
data
.
force
=
make_real3
(
0
);
data
.
torque
=
make_real3
(
0
);
data
.
torque
=
make_real3
(
0
);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
j
=
(
numTiles
<=
maxTiles
?
interactingAtoms
[
pos
*
TILE_SIZE
+
tgx
]
:
y
*
TILE_SIZE
+
tgx
)
;
unsigned
int
j
=
interactingAtoms
[
pos
*
TILE_SIZE
+
tgx
];
#else
#else
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
...
...
plugins/amoeba/platforms/cuda/tests/TestCudaAmoebaExtrapolatedPolarization.cpp
View file @
047934e2
...
@@ -284,8 +284,11 @@ vector<Vec3> setupWaterDimer(System& system, AmoebaMultipoleForce* amoebaMultip
...
@@ -284,8 +284,11 @@ vector<Vec3> setupWaterDimer(System& system, AmoebaMultipoleForce* amoebaMultip
static
void
check_finite_differences
(
vector
<
Vec3
>
analytic_forces
,
Context
&
context
,
vector
<
Vec3
>
positions
)
static
void
check_finite_differences
(
vector
<
Vec3
>
analytic_forces
,
Context
&
context
,
vector
<
Vec3
>
positions
)
{
{
// Take a small step in the direction of the energy gradient and see whether the potential energy changes by the expected amount.
double
tol
=
1e-5
;
// We allow more permissive testing for single precision.
if
(
Platform
::
getPlatformByName
(
"CUDA"
).
getPropertyValue
(
context
,
"Precision"
)
!=
"double"
)
tol
=
5e-4
;
// Take a small step in the direction of the energy gradient and see whether the potential energy changes by the expected amount.
double
norm
=
0.0
;
double
norm
=
0.0
;
for
(
int
i
=
0
;
i
<
(
int
)
analytic_forces
.
size
();
++
i
)
for
(
int
i
=
0
;
i
<
(
int
)
analytic_forces
.
size
();
++
i
)
norm
+=
analytic_forces
[
i
].
dot
(
analytic_forces
[
i
]);
norm
+=
analytic_forces
[
i
].
dot
(
analytic_forces
[
i
]);
...
@@ -303,7 +306,7 @@ static void check_finite_differences(vector<Vec3> analytic_forces, Context &cont
...
@@ -303,7 +306,7 @@ static void check_finite_differences(vector<Vec3> analytic_forces, Context &cont
State
state2
=
context
.
getState
(
State
::
Energy
);
State
state2
=
context
.
getState
(
State
::
Energy
);
context
.
setPositions
(
positions3
);
context
.
setPositions
(
positions3
);
State
state3
=
context
.
getState
(
State
::
Energy
);
State
state3
=
context
.
getState
(
State
::
Energy
);
ASSERT_EQUAL_TOL
(
norm
,
(
state2
.
getPotentialEnergy
()
-
state3
.
getPotentialEnergy
())
/
stepSize
,
1e-4
);
ASSERT_EQUAL_TOL
(
norm
,
(
state2
.
getPotentialEnergy
()
-
state3
.
getPotentialEnergy
())
/
stepSize
,
tol
);
}
}
...
@@ -493,6 +496,8 @@ int main(int numberOfArguments, char* argv[]) {
...
@@ -493,6 +496,8 @@ int main(int numberOfArguments, char* argv[]) {
try
{
try
{
registerAmoebaCudaKernelFactories
();
registerAmoebaCudaKernelFactories
();
if
(
numberOfArguments
>
1
)
Platform
::
getPlatformByName
(
"CUDA"
).
setPropertyDefaultValue
(
"Precision"
,
std
::
string
(
argv
[
1
]));
/*
/*
* Water dimer energy / force tests under various conditions.
* Water dimer energy / force tests under various conditions.
...
...
Prev
1
…
9
10
11
12
13
14
15
16
17
18
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment