Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b53c0f10
Commit
b53c0f10
authored
Apr 12, 2019
by
Peter Eastman
Browse files
Minor optimizations to HIPPO
parent
1eec1e15
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
101 additions
and
79 deletions
+101
-79
plugins/amoeba/platforms/cuda/src/kernels/hippoComputeField.cu
...ns/amoeba/platforms/cuda/src/kernels/hippoComputeField.cu
+10
-10
plugins/amoeba/platforms/cuda/src/kernels/hippoFixedField.cu
plugins/amoeba/platforms/cuda/src/kernels/hippoFixedField.cu
+12
-2
plugins/amoeba/platforms/cuda/src/kernels/hippoInteraction.cu
...ins/amoeba/platforms/cuda/src/kernels/hippoInteraction.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/hippoInteractionHeader.cu
...oeba/platforms/cuda/src/kernels/hippoInteractionHeader.cu
+48
-47
plugins/amoeba/platforms/cuda/src/kernels/hippoMutualField.cu
...ins/amoeba/platforms/cuda/src/kernels/hippoMutualField.cu
+12
-2
plugins/amoeba/platforms/cuda/tests/TestCudaHippoNonbondedForce.cpp
...oeba/platforms/cuda/tests/TestCudaHippoNonbondedForce.cpp
+3
-3
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceHippoNonbondedForce.cpp
...src/SimTKReference/AmoebaReferenceHippoNonbondedForce.cpp
+12
-11
plugins/amoeba/platforms/reference/tests/TestReferenceHippoNonbondedForce.cpp
...orms/reference/tests/TestReferenceHippoNonbondedForce.cpp
+3
-3
No files found.
plugins/amoeba/platforms/cuda/src/kernels/hippoComputeField.cu
View file @
b53c0f10
...
...
@@ -32,16 +32,16 @@ __device__ void computeMutualFieldDampingFactors(real alphaI, real alphaJ, real
real
aJ2
=
alphaJ
*
alphaJ
;
real
A
=
aJ2
/
(
aJ2
-
aI2
);
real
B
=
aI2
/
(
aI2
-
aJ2
);
real
A2
=
A
*
A
;
real
B2
=
B
*
B
;
fdamp3
=
1
-
A2
*
(
1
+
arI
+
arI2
*
(
one
/
2
))
*
expARI
-
B2
*
(
1
+
arJ
+
arJ2
*
(
one
/
2
))
*
expARJ
-
2
*
A2
*
B
*
(
1
+
arI
)
*
expARI
-
2
*
B2
*
A
*
(
1
+
arJ
)
*
expARJ
;
fdamp5
=
1
-
A2
*
(
1
+
arI
+
arI2
*
(
one
/
2
)
+
arI3
*
(
one
/
6
))
*
expARI
-
B2
*
(
1
+
arJ
+
arJ2
*
(
one
/
2
)
+
arJ3
*
(
one
/
6
))
*
expARJ
-
2
*
A2
*
B
*
(
1
+
arI
+
arI2
*
(
one
/
3
))
*
expARI
-
2
*
B2
*
A
*
(
1
+
arJ
+
arJ2
*
(
one
/
3
))
*
expARJ
;
real
A2
expARI
=
A
*
A
*
expARI
;
real
B2
expARJ
=
B
*
B
*
expARJ
;
fdamp3
=
1
-
(
1
+
arI
+
arI2
*
(
one
/
2
))
*
A2
expARI
-
(
1
+
arJ
+
arJ2
*
(
one
/
2
))
*
B2
expARJ
-
(
1
+
arI
)
*
2
*
B
*
A2
expARI
-
(
1
+
arJ
)
*
2
*
A
*
B2
expARJ
;
fdamp5
=
1
-
(
1
+
arI
+
arI2
*
(
one
/
2
)
+
arI3
*
(
one
/
6
))
*
A2
expARI
-
(
1
+
arJ
+
arJ2
*
(
one
/
2
)
+
arJ3
*
(
one
/
6
))
*
B2
expARJ
-
(
1
+
arI
+
arI2
*
(
one
/
3
))
*
2
*
B
*
A2
expARI
-
(
1
+
arJ
+
arJ2
*
(
one
/
3
))
*
2
*
A
*
B2
expARJ
;
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/hippoFixedField.cu
View file @
b53c0f10
...
...
@@ -7,10 +7,20 @@ real invR7 = invR5*invR2;
// Calculate the error function damping terms.
real
ralpha
=
PME_ALPHA
*
r
;
real
bn0
=
erfc
(
ralpha
)
*
invR
;
real
exp2a
=
EXP
(
-
(
ralpha
*
ralpha
));
#ifdef USE_DOUBLE_PRECISION
const
real
erfcAlphaR
=
erfc
(
ralpha
);
#else
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 1.5e-7.
const
real
t
=
RECIP
(
1.0
f
+
0.3275911
f
*
ralpha
);
const
real
erfcAlphaR
=
(
0.254829592
f
+
(
-
0.284496736
f
+
(
1.421413741
f
+
(
-
1.453152027
f
+
1.061405429
f
*
t
)
*
t
)
*
t
)
*
t
)
*
t
*
exp2a
;
#endif
real
bn0
=
erfcAlphaR
*
invR
;
real
alsq2
=
2
*
PME_ALPHA
*
PME_ALPHA
;
real
alsq2n
=
1
/
(
SQRT_PI
*
PME_ALPHA
);
real
exp2a
=
EXP
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
real
bn1
=
(
bn0
+
alsq2n
*
exp2a
)
*
invR2
;
alsq2n
*=
alsq2
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/hippoInteraction.cu
View file @
b53c0f10
...
...
@@ -168,7 +168,7 @@ real bn5 = (9*bn4+alsq2n*exp2a)*rInv2;
// Compute the force and torque.
real3
elecForce
=
-
scale
*
(
de
*
make_real3
(
0
,
0
,
r
)
+
term1
*
qiDipole1
+
term2
*
qiDipole2
+
real3
elecForce
=
-
scale
*
(
make_real3
(
0
,
0
,
de
*
r
)
+
term1
*
qiDipole1
+
term2
*
qiDipole2
+
term3
*
(
diqkTemp
-
dkqiTemp
)
+
term4
*
qi
+
term5
*
qk
+
term6
*
(
qikTemp
+
qkiTemp
));
real3
tI
=
scale
*
(
-
rr3ik
*
dikCross
+
term1
*
dirCross
+
term3
*
(
dqik
+
dkqirCross
)
+
term4
*
qirCross
-
term6
*
(
qikrCross
+
qikCross
));
real3
tK
=
scale
*
(
rr3ik
*
dikCross
+
term2
*
dkrCross
-
term3
*
(
dqik
+
diqkrCross
)
+
term5
*
qkrCross
-
term6
*
(
qkirCross
-
qikCross
));
...
...
plugins/amoeba/platforms/cuda/src/kernels/hippoInteractionHeader.cu
View file @
b53c0f10
...
...
@@ -96,35 +96,35 @@ __device__ void computeOverlapDampingFactors(real alphaI, real alphaJ, real r,
real
aJ2
=
alphaJ
*
alphaJ
;
real
A
=
aJ2
/
(
aJ2
-
aI2
);
real
B
=
aI2
/
(
aI2
-
aJ2
);
real
A2
=
A
*
A
;
real
B2
=
B
*
B
;
real
A2
expARI
=
A
*
A
*
expARI
;
real
B2
expARJ
=
B
*
B
*
expARJ
;
fdampJ1
=
1
-
(
1
+
arJ
*
(
one
/
2
))
*
expARJ
;
fdampJ3
=
1
-
(
1
+
arJ
+
arJ2
*
(
one
/
2
))
*
expARJ
;
fdampJ5
=
1
-
(
1
+
arJ
+
arJ2
*
(
one
/
2
)
+
arJ3
*
(
one
/
6
))
*
expARJ
;
fdampJ7
=
1
-
(
1
+
arJ
+
arJ2
*
(
one
/
2
)
+
arJ3
*
(
one
/
6
)
+
arJ4
*
(
one
/
30
))
*
expARJ
;
fdampJ9
=
1
-
(
1
+
arJ
+
arJ2
*
(
one
/
2
)
+
arJ3
*
(
one
/
6
)
+
4
*
arJ4
*
(
one
/
105
)
+
arJ5
*
(
one
/
210
))
*
expARJ
;
fdampIJ1
=
1
-
A2
*
(
1
+
2
*
B
+
arI
*
(
one
/
2
))
*
expARI
-
B2
*
(
1
+
2
*
A
+
arJ
*
(
one
/
2
))
*
expARJ
;
fdampIJ3
=
1
-
A2
*
(
1
+
arI
+
arI2
*
(
one
/
2
))
*
expARI
-
B2
*
(
1
+
arJ
+
arJ2
*
(
one
/
2
))
*
expARJ
-
2
*
A2
*
B
*
(
1
+
arI
)
*
expARI
-
2
*
B2
*
A
*
(
1
+
arJ
)
*
expARJ
;
fdampIJ5
=
1
-
A2
*
(
1
+
arI
+
arI2
*
(
one
/
2
)
+
arI3
*
(
one
/
6
))
*
expARI
-
B2
*
(
1
+
arJ
+
arJ2
*
(
one
/
2
)
+
arJ3
*
(
one
/
6
))
*
expARJ
-
2
*
A2
*
B
*
(
1
+
arI
+
arI2
*
(
one
/
3
))
*
expARI
-
2
*
B2
*
A
*
(
1
+
arJ
+
arJ2
*
(
one
/
3
))
*
expARJ
;
fdampIJ7
=
1
-
A2
*
(
1
+
arI
+
arI2
*
(
one
/
2
)
+
arI3
*
(
one
/
6
)
+
arI4
*
(
one
/
30
))
*
expARI
-
B2
*
(
1
+
arJ
+
arJ2
*
(
one
/
2
)
+
arJ3
*
(
one
/
6
)
+
arJ4
*
(
one
/
30
))
*
expARJ
-
2
*
A2
*
B
*
(
1
+
arI
+
arI2
*
(
two
/
5
)
+
arI3
*
(
one
/
15
))
*
expARI
-
2
*
B2
*
A
*
(
1
+
arJ
+
arJ2
*
(
two
/
5
)
+
arJ3
*
(
one
/
15
))
*
expARJ
;
fdampIJ9
=
1
-
A2
*
(
1
+
arI
+
arI2
*
(
one
/
2
)
+
arI3
*
(
one
/
6
)
+
arI4
*
4
*
(
one
/
105
)
+
arI5
*
(
one
/
210
))
*
expARI
-
B2
*
(
1
+
arJ
+
arJ2
*
(
one
/
2
)
+
arJ3
*
(
one
/
6
)
+
arJ4
*
4
*
(
one
/
105
)
+
arJ5
*
(
one
/
210
))
*
expARJ
-
2
*
A2
*
B
*
(
1
+
arI
+
arI2
*
(
three
/
7
)
+
arI3
*
(
two
/
21
)
+
arI4
*
(
one
/
105
))
*
expARI
-
2
*
B2
*
A
*
(
1
+
arJ
+
arJ2
*
(
three
/
7
)
+
arJ3
*
(
two
/
21
)
+
arJ4
*
(
one
/
105
))
*
expARJ
;
fdampIJ11
=
1
-
A2
*
(
1
+
arI
+
arI2
*
(
one
/
2
)
+
arI3
*
(
one
/
6
)
+
arI4
*
(
five
/
126
)
+
arI5
*
(
two
/
315
)
+
arI6
*
(
one
/
1890
))
*
expARI
-
B2
*
(
1
+
arJ
+
arJ2
*
(
one
/
2
)
+
arJ3
*
(
one
/
6
)
+
arJ4
*
(
five
/
126
)
+
arJ5
*
(
two
/
315
)
+
arJ6
*
(
one
/
1890
))
*
expARJ
-
2
*
A2
*
B
*
(
1
+
arI
+
arI2
*
(
four
/
9
)
+
arI3
*
(
one
/
9
)
+
arI4
*
(
one
/
63
)
+
arI5
*
(
one
/
945
))
*
expARI
-
2
*
B2
*
A
*
(
1
+
arJ
+
arJ2
*
(
four
/
9
)
+
arJ3
*
(
one
/
9
)
+
arJ4
*
(
one
/
63
)
+
arJ5
*
(
one
/
945
))
*
expARJ
;
fdampIJ1
=
1
-
(
1
+
2
*
B
+
arI
*
(
one
/
2
))
*
A2
expARI
-
(
1
+
2
*
A
+
arJ
*
(
one
/
2
))
*
B2
expARJ
;
fdampIJ3
=
1
-
(
1
+
arI
+
arI2
*
(
one
/
2
))
*
A2
expARI
-
(
1
+
arJ
+
arJ2
*
(
one
/
2
))
*
B2
expARJ
-
(
1
+
arI
)
*
2
*
B
*
A2
expARI
-
(
1
+
arJ
)
*
2
*
A
*
B2
expARJ
;
fdampIJ5
=
1
-
(
1
+
arI
+
arI2
*
(
one
/
2
)
+
arI3
*
(
one
/
6
))
*
A2
expARI
-
(
1
+
arJ
+
arJ2
*
(
one
/
2
)
+
arJ3
*
(
one
/
6
))
*
B2
expARJ
-
(
1
+
arI
+
arI2
*
(
one
/
3
))
*
2
*
B
*
A2
expARI
-
(
1
+
arJ
+
arJ2
*
(
one
/
3
))
*
2
*
A
*
B2
expARJ
;
fdampIJ7
=
1
-
(
1
+
arI
+
arI2
*
(
one
/
2
)
+
arI3
*
(
one
/
6
)
+
arI4
*
(
one
/
30
))
*
A2
expARI
-
(
1
+
arJ
+
arJ2
*
(
one
/
2
)
+
arJ3
*
(
one
/
6
)
+
arJ4
*
(
one
/
30
))
*
B2
expARJ
-
(
1
+
arI
+
arI2
*
(
two
/
5
)
+
arI3
*
(
one
/
15
))
*
2
*
B
*
A2
expARI
-
(
1
+
arJ
+
arJ2
*
(
two
/
5
)
+
arJ3
*
(
one
/
15
))
*
2
*
A
*
B2
expARJ
;
fdampIJ9
=
1
-
(
1
+
arI
+
arI2
*
(
one
/
2
)
+
arI3
*
(
one
/
6
)
+
arI4
*
4
*
(
one
/
105
)
+
arI5
*
(
one
/
210
))
*
A2
expARI
-
(
1
+
arJ
+
arJ2
*
(
one
/
2
)
+
arJ3
*
(
one
/
6
)
+
arJ4
*
4
*
(
one
/
105
)
+
arJ5
*
(
one
/
210
))
*
B2
expARJ
-
(
1
+
arI
+
arI2
*
(
three
/
7
)
+
arI3
*
(
two
/
21
)
+
arI4
*
(
one
/
105
))
*
2
*
B
*
A2
expARI
-
(
1
+
arJ
+
arJ2
*
(
three
/
7
)
+
arJ3
*
(
two
/
21
)
+
arJ4
*
(
one
/
105
))
*
2
*
A
*
B2
expARJ
;
fdampIJ11
=
1
-
(
1
+
arI
+
arI2
*
(
one
/
2
)
+
arI3
*
(
one
/
6
)
+
arI4
*
(
five
/
126
)
+
arI5
*
(
two
/
315
)
+
arI6
*
(
one
/
1890
))
*
A2
expARI
-
(
1
+
arJ
+
arJ2
*
(
one
/
2
)
+
arJ3
*
(
one
/
6
)
+
arJ4
*
(
five
/
126
)
+
arJ5
*
(
two
/
315
)
+
arJ6
*
(
one
/
1890
))
*
B2
expARJ
-
(
1
+
arI
+
arI2
*
(
four
/
9
)
+
arI3
*
(
one
/
9
)
+
arI4
*
(
one
/
63
)
+
arI5
*
(
one
/
945
))
*
2
*
B
*
A2
expARI
-
(
1
+
arJ
+
arJ2
*
(
four
/
9
)
+
arJ3
*
(
one
/
9
)
+
arJ4
*
(
one
/
63
)
+
arJ5
*
(
one
/
945
))
*
2
*
A
*
B2
expARJ
;
}
}
...
...
@@ -152,18 +152,18 @@ __device__ void computeDispersionDampingFactors(real alphaI, real alphaJ, real r
real
aJ2
=
alphaJ
*
alphaJ
;
real
A
=
aJ2
/
(
aJ2
-
aI2
);
real
B
=
aI2
/
(
aI2
-
aJ2
);
real
A2
=
A
*
A
;
real
B2
=
B
*
B
;
fdamp3
=
1
-
A2
*
(
1
+
arI
+
arI2
*
(
one
/
2
))
*
expARI
-
B2
*
(
1
+
arJ
+
arJ2
*
(
one
/
2
))
*
expARJ
-
2
*
A2
*
B
*
(
1
+
arI
)
*
expARI
-
2
*
B2
*
A
*
(
1
+
arJ
)
*
expARJ
;
fdamp5
=
1
-
A2
*
(
1
+
arI
+
arI2
*
(
one
/
2
)
+
arI3
*
(
one
/
6
))
*
expARI
-
B2
*
(
1
+
arJ
+
arJ2
*
(
one
/
2
)
+
arJ3
*
(
one
/
6
))
*
expARJ
-
2
*
A2
*
B
*
(
1
+
arI
+
arI2
*
(
one
/
3
))
*
expARI
-
2
*
B2
*
A
*
(
1
+
arJ
+
arJ2
*
(
one
/
3
))
*
expARJ
;
ddamp
=
(
A2
*
arI2
*
alphaI
*
expARI
*
(
r
*
alphaI
+
4
*
B
-
1
)
+
(
B2
*
arJ2
*
alphaJ
*
expARJ
*
(
r
*
alphaJ
+
4
*
A
-
1
)))
*
(
one
/
4
);
real
A2
expARI
=
A
*
A
*
expARI
;
real
B2
expARJ
=
B
*
B
*
expARJ
;
fdamp3
=
1
-
(
1
+
arI
+
arI2
*
(
one
/
2
))
*
A2
expARI
-
(
1
+
arJ
+
arJ2
*
(
one
/
2
))
*
B2
expARJ
-
(
1
+
arI
)
*
2
*
B
*
A2
expARI
-
(
1
+
arJ
)
*
2
*
A
*
B2
expARJ
;
fdamp5
=
1
-
(
1
+
arI
+
arI2
*
(
one
/
2
)
+
arI3
*
(
one
/
6
))
*
A2
expARI
-
(
1
+
arJ
+
arJ2
*
(
one
/
2
)
+
arJ3
*
(
one
/
6
))
*
B2
expARJ
-
(
1
+
arI
+
arI2
*
(
one
/
3
))
*
2
*
B
*
A2
expARI
-
(
1
+
arJ
+
arJ2
*
(
one
/
3
))
*
2
*
A
*
B2
expARJ
;
ddamp
=
(
arI2
*
alphaI
*
A2
expARI
*
(
r
*
alphaI
+
4
*
B
-
1
)
+
(
arJ2
*
alphaJ
*
B2
expARJ
*
(
r
*
alphaJ
+
4
*
A
-
1
)))
*
(
one
/
4
);
}
fdamp
=
1.5
f
*
fdamp5
-
0.5
f
*
fdamp3
;
}
...
...
@@ -210,21 +210,22 @@ __device__ void computeRepulsionDampingFactors(real pauliAlphaI, real pauliAlpha
real
aJ2_3
=
aJ2_2
*
aJ2
;
real
aJ2_4
=
aJ2_2
*
aJ2_2
;
real
aJ2_5
=
aJ2_3
*
aJ2_2
;
real
aJ2_6
=
aJ2_3
*
aJ2_3
;
real
scale
=
1
/
(
aI2_2
-
aJ2_2
);
real
aI2aJ2expI
=
aI2
*
aJ2
*
expI
;
real
aI2aJ2expJ
=
aI2
*
aJ2
*
expJ
;
pre
=
8192
*
aI2_3
*
aJ2_3
*
(
scale
*
scale
*
scale
*
scale
);
real
tmp
=
4
*
aI2
*
aJ2
*
scale
;
fexp
=
(
arI2
-
tmp
)
*
expJ
+
(
arJ2
+
tmp
)
*
expI
;
fexp1
=
(
aI2
*
aJ2
*
r2
-
4
*
a
I2
*
aJ2_2
*
r
*
scale
-
4
*
aI2
*
aJ2
*
scale
)
*
expJ
+
(
aI2
*
aJ2
*
r2
+
4
*
aI2
_2
*
aJ2
*
r
*
scale
+
4
*
aI2
*
aJ2
*
scale
)
*
expI
;
fexp2
=
(
aI2
*
aJ2
*
r2
*
(
one
/
3
)
+
a
I2
*
aJ2_
2
*
r3
*
(
one
/
3
)
-
(
four
/
3
)
*
aI2
*
aJ2_
3
*
r2
*
scale
-
4
*
aI2
*
aJ2_2
*
r
*
scale
-
4
*
aI2
*
aJ2
*
scale
)
*
expJ
+
(
aI2
*
aJ2
*
r2
*
(
one
/
3
)
+
aI2
_2
*
aJ2
*
r3
*
(
one
/
3
)
+
(
four
/
3
)
*
aI2_
3
*
aJ2
*
r2
*
scale
+
4
*
aI2_2
*
aJ2
*
r
*
scale
+
4
*
aI2
*
aJ2
*
scale
)
*
expI
;
fexp3
=
(
aI2
*
aJ2_
3
*
r4
*
(
one
/
15
)
+
a
I2
*
aJ2_
2
*
r3
*
(
one
/
5
)
+
aI2
*
aJ2
*
r2
*
(
one
/
5
)
-
(
four
/
15
)
*
aI2
*
aJ2_
4
*
r3
*
scale
-
(
eight
/
5
)
*
aI2
*
aJ2_
3
*
r2
*
scale
-
4
*
aI2
*
aJ2_2
*
r
*
scale
-
4
*
scale
*
aI2
*
aJ2
)
*
expJ
+
(
aI2_
3
*
aJ
2
*
r4
*
(
one
/
15
)
+
aI2
_2
*
aJ2
*
r3
*
(
one
/
5
)
+
aI2
*
aJ2
*
r2
*
(
one
/
5
)
+
(
four
/
15
)
*
aI2_
4
*
aJ2
*
r3
*
scale
+
(
eight
/
5
)
*
aI2_
3
*
aJ2
*
r2
*
scale
+
4
*
aI2_2
*
aJ2
*
r
*
scale
+
4
*
scale
*
aI2
*
aJ2
)
*
expI
;
fexp4
=
(
aI2
*
aJ2_
4
*
r5
*
(
one
/
105
)
+
(
two
/
35
)
*
aI2
*
aJ2_
3
*
r4
+
a
I2
*
aJ2_
2
*
r3
*
(
one
/
7
)
+
aI2
*
aJ2
*
r2
*
(
one
/
7
)
-
(
four
/
105
)
*
aI2
*
aJ2_
5
*
r4
*
scale
-
(
eight
/
21
)
*
aI2
*
aJ2_
4
*
r3
*
scale
-
(
twelve
/
7
)
*
aI2
*
aJ2_
3
*
r2
*
scale
-
4
*
aI2
*
aJ2_2
*
r
*
scale
-
4
*
aI2
*
aJ2
*
scale
)
*
expJ
+
(
aI2_
4
*
aJ2
*
r5
*
(
one
/
105
)
+
(
two
/
35
)
*
aI2_
3
*
aJ
2
*
r4
+
aI2
_2
*
aJ2
*
r3
*
(
one
/
7
)
+
aI2
*
aJ2
*
r2
*
(
one
/
7
)
+
(
four
/
105
)
*
aI2_
5
*
aJ2
*
r4
*
scale
+
(
eight
/
21
)
*
aI2_
4
*
aJ2
*
r3
*
scale
+
(
twelve
/
7
)
*
aI2_
3
*
aJ2
*
r2
*
scale
+
4
*
aI2_2
*
aJ2
*
r
*
scale
+
4
*
aI2
*
aJ2
*
scale
)
*
expI
;
fexp5
=
(
aI2
*
aJ2_
5
*
r6
*
(
one
/
945
)
+
(
two
/
189
)
*
aI2
*
aJ2_
4
*
r5
+
aI2
*
aJ2_
3
*
r4
*
(
one
/
21
)
+
aI2
*
aJ2
_2
*
r3
*
(
one
/
9
)
+
aI2
*
aJ2
*
r2
*
(
one
/
9
)
-
(
four
/
945
)
*
aI2
*
aJ2_
6
*
r5
*
scale
-
(
four
/
63
)
*
aI2
*
aJ2_
5
*
r4
*
scale
-
(
four
/
9
)
*
aI2
*
aJ2_
4
*
r3
*
scale
-
(
sixteen
/
9
)
*
aI2
*
aJ2_
3
*
r2
*
scale
-
4
*
aI2
*
aJ2_2
*
r
*
scale
-
4
*
aI2
*
aJ2
*
scale
)
*
expJ
+
(
aI2_
5
*
aJ2
*
r6
*
(
one
/
945
)
+
(
two
/
189
)
*
aI2_
4
*
aJ2
*
r5
+
aI2_
3
*
aJ
2
*
r4
*
(
one
/
21
)
+
aI2
_2
*
aJ2
*
r3
*
(
one
/
9
)
+
aI2
*
aJ2
*
r2
*
(
one
/
9
)
+
(
four
/
945
)
*
aI2_
6
*
aJ2
*
r5
*
scale
+
(
four
/
63
)
*
aI2_
5
*
aJ2
*
r4
*
scale
+
(
four
/
9
)
*
aI2_
4
*
aJ2
*
r3
*
scale
+
(
sixteen
/
9
)
*
aI2_
3
*
aJ2
*
r2
*
scale
+
4
*
aI2_2
*
aJ2
*
r
*
scale
+
4
*
aI2
*
aJ2
*
scale
)
*
expI
;
fexp1
=
(
r2
-
(
4
*
a
J2
*
r
+
4
)
*
scale
)
*
aI2aJ2expJ
+
(
r2
+
(
4
*
aI2
*
r
+
4
)
*
scale
)
*
aI2aJ2expI
;
fexp2
=
(
r2
*
(
one
/
3
)
+
a
J
2
*
r3
*
(
one
/
3
)
-
(
(
four
/
3
)
*
aJ2_
2
*
r2
+
4
*
aJ2
*
r
+
4
)
*
scale
)
*
aI2aJ2expJ
+
(
r2
*
(
one
/
3
)
+
aI2
*
r3
*
(
one
/
3
)
+
(
(
four
/
3
)
*
aI2_
2
*
r2
+
4
*
aI2
*
r
+
4
)
*
scale
)
*
aI2aJ2expI
;
fexp3
=
(
aJ2_
2
*
r4
*
(
one
/
15
)
+
a
J
2
*
r3
*
(
one
/
5
)
+
r2
*
(
one
/
5
)
-
(
(
four
/
15
)
*
aJ2_
3
*
r3
+
(
eight
/
5
)
*
aJ2_
2
*
r2
+
4
*
aJ2
*
r
+
4
)
*
scale
)
*
aI2aJ2expJ
+
(
aI2_2
*
r4
*
(
one
/
15
)
+
aI2
*
r3
*
(
one
/
5
)
+
r2
*
(
one
/
5
)
+
(
(
four
/
15
)
*
aI2_
3
*
r3
+
(
eight
/
5
)
*
aI2_
2
*
r2
+
4
*
aI2
*
r
+
4
)
*
scale
)
*
aI2aJ2expI
;
fexp4
=
(
aJ2_
3
*
r5
*
(
one
/
105
)
+
(
two
/
35
)
*
aJ2_
2
*
r4
+
a
J
2
*
r3
*
(
one
/
7
)
+
r2
*
(
one
/
7
)
-
(
(
four
/
105
)
*
aJ2_
4
*
r4
+
(
eight
/
21
)
*
aJ2_
3
*
r3
+
(
twelve
/
7
)
*
aJ2_
2
*
r2
+
4
*
aJ2
*
r
+
4
)
*
scale
)
*
aI2aJ2expJ
+
(
aI2_
3
*
r5
*
(
one
/
105
)
+
(
two
/
35
)
*
aI2_2
*
r4
+
aI2
*
r3
*
(
one
/
7
)
+
r2
*
(
one
/
7
)
+
(
(
four
/
105
)
*
aI2_
4
*
r4
+
(
eight
/
21
)
*
aI2_
3
*
r3
+
(
twelve
/
7
)
*
aI2_
2
*
r2
+
4
*
aI2
*
r
+
4
)
*
scale
)
*
aI2aJ2expI
;
fexp5
=
(
aJ2_
4
*
r6
*
(
one
/
945
)
+
(
two
/
189
)
*
aJ2_
3
*
r5
+
aJ2_
2
*
r4
*
(
one
/
21
)
+
aJ2
*
r3
*
(
one
/
9
)
+
r2
*
(
one
/
9
)
-
(
(
four
/
945
)
*
aJ2_
5
*
r5
+
(
four
/
63
)
*
aJ2_
4
*
r4
+
(
four
/
9
)
*
aJ2_
3
*
r3
+
(
sixteen
/
9
)
*
aJ2_
2
*
r2
+
4
*
aJ2
*
r
+
4
)
*
scale
)
*
aI2aJ2expJ
+
(
aI2_
4
*
r6
*
(
one
/
945
)
+
(
two
/
189
)
*
aI2_
3
*
r5
+
aI2_2
*
r4
*
(
one
/
21
)
+
aI2
*
r3
*
(
one
/
9
)
+
r2
*
(
one
/
9
)
+
(
(
four
/
945
)
*
aI2_
5
*
r5
+
(
four
/
63
)
*
aI2_
4
*
r4
+
(
four
/
9
)
*
aI2_
3
*
r3
+
(
sixteen
/
9
)
*
aI2_
2
*
r2
+
4
*
aI2
*
r
+
4
)
*
scale
)
*
aI2aJ2expI
;
}
fexp
=
fexp
/
r
;
fexp1
=
fexp1
/
r3
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/hippoMutualField.cu
View file @
b53c0f10
...
...
@@ -8,10 +8,20 @@ real invR2 = invR*invR;
real
invR3
=
invR
*
invR2
;
#if USE_EWALD
real
ralpha
=
PME_ALPHA
*
r
;
real
bn0
=
erfc
(
ralpha
)
*
invR
;
real
exp2a
=
EXP
(
-
(
ralpha
*
ralpha
));
#ifdef USE_DOUBLE_PRECISION
const
real
erfcAlphaR
=
erfc
(
ralpha
);
#else
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 1.5e-7.
const
real
t
=
RECIP
(
1.0
f
+
0.3275911
f
*
ralpha
);
const
real
erfcAlphaR
=
(
0.254829592
f
+
(
-
0.284496736
f
+
(
1.421413741
f
+
(
-
1.453152027
f
+
1.061405429
f
*
t
)
*
t
)
*
t
)
*
t
)
*
t
*
exp2a
;
#endif
real
bn0
=
erfcAlphaR
*
invR
;
real
alsq2
=
2
*
PME_ALPHA
*
PME_ALPHA
;
real
alsq2n
=
1
/
(
SQRT_PI
*
PME_ALPHA
);
real
exp2a
=
EXP
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
real
bn1
=
(
bn0
+
alsq2n
*
exp2a
)
*
invR2
;
alsq2n
*=
alsq2
;
...
...
plugins/amoeba/platforms/cuda/tests/TestCudaHippoNonbondedForce.cpp
View file @
b53c0f10
...
...
@@ -95,8 +95,8 @@ void checkForceEnergyConsistency(Context& context) {
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
Vec3
p
=
state
.
getPositions
()[
i
];
Vec3
f
=
state
.
getForces
()[
i
];
positions2
[
i
]
=
Vec3
(
p
[
0
]
-
f
[
0
]
*
step
,
p
[
1
]
-
f
[
1
]
*
step
,
p
[
2
]
-
f
[
2
]
*
step
)
;
positions3
[
i
]
=
Vec3
(
p
[
0
]
+
f
[
0
]
*
step
,
p
[
1
]
+
f
[
1
]
*
step
,
p
[
2
]
+
f
[
2
]
*
step
)
;
positions2
[
i
]
=
p
-
f
*
step
;
positions3
[
i
]
=
p
+
f
*
step
;
}
context
.
setPositions
(
positions2
);
State
state2
=
context
.
getState
(
State
::
Energy
);
...
...
@@ -136,7 +136,7 @@ void testWaterDimer() {
for
(
int
i
=
0
;
i
<
system
.
getNumParticles
();
i
++
)
ASSERT_EQUAL_VEC
(
-
expectedForces
[
i
],
state
.
getForces
()[
i
],
forceTol
);
// Compare the
induced
dipoles to reference values computed with Tinker.
// Compare the
permanent
dipoles to reference values computed with Tinker.
vector
<
Vec3
>
expectedLabDipoles
=
{
Vec3
(
-
1.3999971343167967e-3
,
0.0
,
2.5377493339976591e-3
),
...
...
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceHippoNonbondedForce.cpp
View file @
b53c0f10
...
...
@@ -522,21 +522,22 @@ void AmoebaReferenceHippoNonbondedForce::computeRepulsionDampingFactors(const Mu
double
aJ2_3
=
aJ2_2
*
aJ2
;
double
aJ2_4
=
aJ2_2
*
aJ2_2
;
double
aJ2_5
=
aJ2_3
*
aJ2_2
;
double
aJ2_6
=
aJ2_3
*
aJ2_3
;
double
scale
=
1
/
(
aI2_2
-
aJ2_2
);
double
aI2aJ2expI
=
aI2
*
aJ2
*
expI
;
double
aI2aJ2expJ
=
aI2
*
aJ2
*
expJ
;
pre
=
8192
*
aI2_3
*
aJ2_3
*
(
scale
*
scale
*
scale
*
scale
);
double
tmp
=
4
*
aI2
*
aJ2
*
scale
;
fexp
=
(
arI2
-
tmp
)
*
expJ
+
(
arJ2
+
tmp
)
*
expI
;
fexp1
=
(
aI2
*
aJ2
*
r2
-
4
*
a
I2
*
aJ2_2
*
r
*
scale
-
4
*
aI2
*
aJ2
*
scale
)
*
expJ
+
(
aI2
*
aJ2
*
r2
+
4
*
aI2
_2
*
aJ2
*
r
*
scale
+
4
*
aI2
*
aJ2
*
scale
)
*
expI
;
fexp2
=
(
aI2
*
aJ2
*
r2
*
(
1.0
/
3
)
+
a
I2
*
aJ2_
2
*
r3
*
(
1.0
/
3
)
-
(
4.0
/
3
)
*
aI2
*
aJ2_
3
*
r2
*
scale
-
4
*
aI2
*
aJ2_2
*
r
*
scale
-
4
*
aI2
*
aJ2
*
scale
)
*
expJ
+
(
aI2
*
aJ2
*
r2
*
(
1.0
/
3
)
+
aI2
_2
*
aJ2
*
r3
*
(
1.0
/
3
)
+
(
4.0
/
3
)
*
aI2_
3
*
aJ2
*
r2
*
scale
+
4
*
aI2_2
*
aJ2
*
r
*
scale
+
4
*
aI2
*
aJ2
*
scale
)
*
expI
;
fexp3
=
(
aI2
*
aJ2_
3
*
r4
*
(
1.0
/
15
)
+
a
I2
*
aJ2_
2
*
r3
*
(
1.0
/
5
)
+
aI2
*
aJ2
*
r2
*
(
1.0
/
5
)
-
(
4.0
/
15
)
*
aI2
*
aJ2_
4
*
r3
*
scale
-
(
8.0
/
5
)
*
aI2
*
aJ2_
3
*
r2
*
scale
-
4
*
aI2
*
aJ2_2
*
r
*
scale
-
4
*
scale
*
aI2
*
aJ2
)
*
expJ
+
(
aI2_
3
*
aJ
2
*
r4
*
(
1.0
/
15
)
+
aI2
_2
*
aJ2
*
r3
*
(
1.0
/
5
)
+
aI2
*
aJ2
*
r2
*
(
1.0
/
5
)
+
(
4.0
/
15
)
*
aI2_
4
*
aJ2
*
r3
*
scale
+
(
8.0
/
5
)
*
aI2_
3
*
aJ2
*
r2
*
scale
+
4
*
aI2_2
*
aJ2
*
r
*
scale
+
4
*
scale
*
aI2
*
aJ2
)
*
expI
;
fexp4
=
(
aI2
*
aJ2_
4
*
r5
*
(
1.0
/
105
)
+
(
2.0
/
35
)
*
aI2
*
aJ2_
3
*
r4
+
a
I2
*
aJ2_
2
*
r3
*
(
1.0
/
7
)
+
aI2
*
aJ2
*
r2
*
(
1.0
/
7
)
-
(
4.0
/
105
)
*
aI2
*
aJ2_
5
*
r4
*
scale
-
(
8.0
/
21
)
*
aI2
*
aJ2_
4
*
r3
*
scale
-
(
12.0
/
7
)
*
aI2
*
aJ2_
3
*
r2
*
scale
-
4
*
aI2
*
aJ2_2
*
r
*
scale
-
4
*
aI2
*
aJ2
*
scale
)
*
expJ
+
(
aI2_
4
*
aJ2
*
r5
*
(
1.0
/
105
)
+
(
2.0
/
35
)
*
aI2_
3
*
aJ
2
*
r4
+
aI2
_2
*
aJ2
*
r3
*
(
1.0
/
7
)
+
aI2
*
aJ2
*
r2
*
(
1.0
/
7
)
+
(
4.0
/
105
)
*
aI2_
5
*
aJ2
*
r4
*
scale
+
(
8.0
/
21
)
*
aI2_
4
*
aJ2
*
r3
*
scale
+
(
12.0
/
7
)
*
aI2_
3
*
aJ2
*
r2
*
scale
+
4
*
aI2_2
*
aJ2
*
r
*
scale
+
4
*
aI2
*
aJ2
*
scale
)
*
expI
;
fexp5
=
(
aI2
*
aJ2_
5
*
r6
*
(
1.0
/
945
)
+
(
2.0
/
189
)
*
aI2
*
aJ2_
4
*
r5
+
aI2
*
aJ2_
3
*
r4
*
(
1.0
/
21
)
+
aI2
*
aJ2
_2
*
r3
*
(
1.0
/
9
)
+
aI2
*
aJ2
*
r2
*
(
1.0
/
9
)
-
(
4.0
/
945
)
*
aI2
*
aJ2_
6
*
r5
*
scale
-
(
4.0
/
63
)
*
aI2
*
aJ2_
5
*
r4
*
scale
-
(
4.0
/
9
)
*
aI2
*
aJ2_
4
*
r3
*
scale
-
(
16.0
/
9
)
*
aI2
*
aJ2_
3
*
r2
*
scale
-
4
*
aI2
*
aJ2_2
*
r
*
scale
-
4
*
aI2
*
aJ2
*
scale
)
*
expJ
+
(
aI2_
5
*
aJ2
*
r6
*
(
1.0
/
945
)
+
(
2.0
/
189
)
*
aI2_
4
*
aJ2
*
r5
+
aI2_
3
*
aJ
2
*
r4
*
(
1.0
/
21
)
+
aI2
_2
*
aJ2
*
r3
*
(
1.0
/
9
)
+
aI2
*
aJ2
*
r2
*
(
1.0
/
9
)
+
(
4.0
/
945
)
*
aI2_
6
*
aJ2
*
r5
*
scale
+
(
4.0
/
63
)
*
aI2_
5
*
aJ2
*
r4
*
scale
+
(
4.0
/
9
)
*
aI2_
4
*
aJ2
*
r3
*
scale
+
(
16.0
/
9
)
*
aI2_
3
*
aJ2
*
r2
*
scale
+
4
*
aI2_2
*
aJ2
*
r
*
scale
+
4
*
aI2
*
aJ2
*
scale
)
*
expI
;
fexp1
=
(
r2
-
(
4
*
a
J2
*
r
+
4
)
*
scale
)
*
aI2aJ2expJ
+
(
r2
+
(
4
*
aI2
*
r
+
4
)
*
scale
)
*
aI2aJ2expI
;
fexp2
=
(
r2
*
(
1.0
/
3
)
+
a
J
2
*
r3
*
(
1.0
/
3
)
-
(
(
4.0
/
3
)
*
aJ2_
2
*
r2
+
4
*
aJ2
*
r
+
4
)
*
scale
)
*
aI2aJ2expJ
+
(
r2
*
(
1.0
/
3
)
+
aI2
*
r3
*
(
1.0
/
3
)
+
(
(
4.0
/
3
)
*
aI2_
2
*
r2
+
4
*
aI2
*
r
+
4
)
*
scale
)
*
aI2aJ2expI
;
fexp3
=
(
aJ2_
2
*
r4
*
(
1.0
/
15
)
+
a
J
2
*
r3
*
(
1.0
/
5
)
+
r2
*
(
1.0
/
5
)
-
(
(
4.0
/
15
)
*
aJ2_
3
*
r3
+
(
8.0
/
5
)
*
aJ2_
2
*
r2
+
4
*
aJ2
*
r
+
4
)
*
scale
)
*
aI2aJ2expJ
+
(
aI2_2
*
r4
*
(
1.0
/
15
)
+
aI2
*
r3
*
(
1.0
/
5
)
+
r2
*
(
1.0
/
5
)
+
(
(
4.0
/
15
)
*
aI2_
3
*
r3
+
(
8.0
/
5
)
*
aI2_
2
*
r2
+
4
*
aI2
*
r
+
4
)
*
scale
)
*
aI2aJ2expI
;
fexp4
=
(
aJ2_
3
*
r5
*
(
1.0
/
105
)
+
(
2.0
/
35
)
*
aJ2_
2
*
r4
+
a
J
2
*
r3
*
(
1.0
/
7
)
+
r2
*
(
1.0
/
7
)
-
(
(
4.0
/
105
)
*
aJ2_
4
*
r4
+
(
8.0
/
21
)
*
aJ2_
3
*
r3
+
(
12.0
/
7
)
*
aJ2_
2
*
r2
+
4
*
aJ2
*
r
+
4
)
*
scale
)
*
aI2aJ2expJ
+
(
aI2_
3
*
r5
*
(
1.0
/
105
)
+
(
2.0
/
35
)
*
aI2_2
*
r4
+
aI2
*
r3
*
(
1.0
/
7
)
+
r2
*
(
1.0
/
7
)
+
(
(
4.0
/
105
)
*
aI2_
4
*
r4
+
(
8.0
/
21
)
*
aI2_
3
*
r3
+
(
12.0
/
7
)
*
aI2_
2
*
r2
+
4
*
aI2
*
r
+
4
)
*
scale
)
*
aI2aJ2expI
;
fexp5
=
(
aJ2_
4
*
r6
*
(
1.0
/
945
)
+
(
2.0
/
189
)
*
aJ2_
3
*
r5
+
aJ2_
2
*
r4
*
(
1.0
/
21
)
+
aJ2
*
r3
*
(
1.0
/
9
)
+
r2
*
(
1.0
/
9
)
-
(
(
4.0
/
945
)
*
aJ2_
5
*
r5
+
(
4.0
/
63
)
*
aJ2_
4
*
r4
+
(
4.0
/
9
)
*
aJ2_
3
*
r3
+
(
16.0
/
9
)
*
aJ2_
2
*
r2
+
4
*
aJ2
*
r
+
4
)
*
scale
)
*
aI2aJ2expJ
+
(
aI2_
4
*
r6
*
(
1.0
/
945
)
+
(
2.0
/
189
)
*
aI2_
3
*
r5
+
aI2_2
*
r4
*
(
1.0
/
21
)
+
aI2
*
r3
*
(
1.0
/
9
)
+
r2
*
(
1.0
/
9
)
+
(
(
4.0
/
945
)
*
aI2_
5
*
r5
+
(
4.0
/
63
)
*
aI2_
4
*
r4
+
(
4.0
/
9
)
*
aI2_
3
*
r3
+
(
16.0
/
9
)
*
aI2_
2
*
r2
+
4
*
aI2
*
r
+
4
)
*
scale
)
*
aI2aJ2expI
;
}
fexp
=
fexp
/
r
;
fexp1
=
fexp1
/
r3
;
...
...
plugins/amoeba/platforms/reference/tests/TestReferenceHippoNonbondedForce.cpp
View file @
b53c0f10
...
...
@@ -93,8 +93,8 @@ void checkForceEnergyConsistency(Context& context) {
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
Vec3
p
=
state
.
getPositions
()[
i
];
Vec3
f
=
state
.
getForces
()[
i
];
positions2
[
i
]
=
Vec3
(
p
[
0
]
-
f
[
0
]
*
step
,
p
[
1
]
-
f
[
1
]
*
step
,
p
[
2
]
-
f
[
2
]
*
step
)
;
positions3
[
i
]
=
Vec3
(
p
[
0
]
+
f
[
0
]
*
step
,
p
[
1
]
+
f
[
1
]
*
step
,
p
[
2
]
+
f
[
2
]
*
step
)
;
positions2
[
i
]
=
p
-
f
*
step
;
positions3
[
i
]
=
p
+
f
*
step
;
}
context
.
setPositions
(
positions2
);
State
state2
=
context
.
getState
(
State
::
Energy
);
...
...
@@ -134,7 +134,7 @@ void testWaterDimer() {
for
(
int
i
=
0
;
i
<
system
.
getNumParticles
();
i
++
)
ASSERT_EQUAL_VEC
(
-
expectedForces
[
i
],
state
.
getForces
()[
i
],
1e-5
);
// Compare the
induced
dipoles to reference values computed with Tinker.
// Compare the
permanent
dipoles to reference values computed with Tinker.
vector
<
Vec3
>
expectedLabDipoles
=
{
Vec3
(
-
1.3999971343167967e-3
,
0.0
,
2.5377493339976591e-3
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment