Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
79c0e604
"docs/UsersGuide/vscode:/vscode.git/clone" did not exist on "9c39f46e2d7de3ded5862d181c712ebdcd7b76f1"
Commit
79c0e604
authored
Aug 19, 2016
by
Andy Simmonett
Browse files
Optmized influence function and energy computations for PME and LJPME.
parent
a741138b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
28 additions
and
58 deletions
+28
-58
plugins/cpupme/src/CpuPmeKernels.cpp
plugins/cpupme/src/CpuPmeKernels.cpp
+24
-54
plugins/cpupme/tests/TestCpuPme.cpp
plugins/cpupme/tests/TestCpuPme.cpp
+4
-4
No files found.
plugins/cpupme/src/CpuPmeKernels.cpp
View file @
79c0e604
...
@@ -154,16 +154,17 @@ static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gri
...
@@ -154,16 +154,17 @@ static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gri
}
}
}
}
#define FAST_ERFC 1
static
void
computeReciprocalDispersionEterm
(
int
start
,
int
end
,
int
gridx
,
int
gridy
,
int
gridz
,
vector
<
float
>&
recipEterm
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
*
periodicBoxVectors
,
Vec3
*
recipBoxVectors
)
{
static
void
computeReciprocalDispersionEterm
(
int
start
,
int
end
,
int
gridx
,
int
gridy
,
int
gridz
,
vector
<
float
>&
recipEterm
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
*
periodicBoxVectors
,
Vec3
*
recipBoxVectors
)
{
const
unsigned
int
zsize
=
gridz
/
2
+
1
;
const
unsigned
int
zsize
=
gridz
/
2
+
1
;
const
unsigned
int
yzsize
=
gridy
*
zsize
;
const
unsigned
int
yzsize
=
gridy
*
zsize
;
const
float
scaleFactor
=
(
float
)
M_PI
*
sqrtf
(
M_PI
)
/
(
6.0
*
periodicBoxVectors
[
0
][
0
]
*
periodicBoxVectors
[
1
][
1
]
*
periodicBoxVectors
[
2
][
2
]);
const
float
scaleFactor
=
(
float
)
-
2.0
f
*
M_PI
*
sqrtf
(
M_PI
)
/
(
6.0
*
periodicBoxVectors
[
0
][
0
]
*
periodicBoxVectors
[
1
][
1
]
*
periodicBoxVectors
[
2
][
2
]);
float
bfac
=
M_PI
/
alpha
;
float
bfac
=
M_PI
/
alpha
;
float
fac1
=
2.0
f
*
M_PI
*
M_PI
*
M_PI
*
sqrtf
(
M_PI
);
float
fac1
=
2.0
f
*
M_PI
*
M_PI
*
M_PI
*
sqrtf
(
M_PI
);
float
fac2
=
alpha
*
alpha
*
alpha
;
float
fac2
=
alpha
*
alpha
*
alpha
;
float
fac3
=
-
2.0
f
*
alpha
*
M_PI
*
M_PI
;
float
fac3
=
-
2.0
f
*
alpha
*
M_PI
*
M_PI
;
float
b
,
m
,
m3
,
expfac
,
expterm
,
erfcterm
;
float
b
,
m
,
m3
,
expterm
,
erfcterm
,
t
;
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
int
mx
=
(
kx
<
(
gridx
+
1
)
/
2
)
?
kx
:
kx
-
gridx
;
int
mx
=
(
kx
<
(
gridx
+
1
)
/
2
)
?
kx
:
kx
-
gridx
;
...
@@ -185,10 +186,18 @@ static void computeReciprocalDispersionEterm(int start, int end, int gridx, int
...
@@ -185,10 +186,18 @@ static void computeReciprocalDispersionEterm(int start, int end, int gridx, int
m
=
sqrtf
(
m2
);
m
=
sqrtf
(
m2
);
m3
=
m
*
m2
;
m3
=
m
*
m2
;
b
=
bfac
*
m
;
b
=
bfac
*
m
;
expfac
=
-
b
*
b
;
expterm
=
exp
(
-
b
*
b
);
#if FAST_ERFC
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 1.5e-7. Stolen by ACS from the CUDA platform's AMOEBA plugin.
t
=
1.0
f
/
(
1.0
f
+
0.3275911
f
*
b
);
erfcterm
=
(
0.254829592
f
+
(
-
0.284496736
f
+
(
1.421413741
f
+
(
-
1.453152027
f
+
1.061405429
f
*
t
)
*
t
)
*
t
)
*
t
)
*
t
*
expterm
;
#else
erfcterm
=
erfc
(
b
);
erfcterm
=
erfc
(
b
);
expterm
=
exp
(
expfac
);
#endif
recipEterm
[
index
]
=
-
2.0
f
*
(
fac1
*
erfcterm
*
m3
+
expterm
*
(
fac2
+
fac3
*
m2
))
*
denom
;
recipEterm
[
index
]
=
(
fac1
*
erfcterm
*
m3
+
expterm
*
(
fac2
+
fac3
*
m2
))
*
denom
;
}
}
}
}
}
}
...
@@ -224,30 +233,16 @@ static void computeReciprocalEterm(int start, int end, int gridx, int gridy, int
...
@@ -224,30 +233,16 @@ static void computeReciprocalEterm(int start, int end, int gridx, int gridy, int
}
}
}
}
static
double
reciprocalEnergy
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
int
gridx
,
int
gridy
,
int
gridz
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
*
periodicBoxVectors
,
Vec3
*
recipBoxVectors
)
{
static
double
reciprocalEnergy
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
vector
<
float
>&
recipEterm
,
int
gridx
,
int
gridy
,
int
gridz
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
*
periodicBoxVectors
,
Vec3
*
recipBoxVectors
)
{
const
unsigned
int
zsizeHalf
=
gridz
/
2
+
1
;
const
unsigned
int
zsizeHalf
=
gridz
/
2
+
1
;
const
unsigned
int
yzsizeHalf
=
gridy
*
zsizeHalf
;
const
unsigned
int
yzsizeHalf
=
gridy
*
zsizeHalf
;
const
float
scaleFactor
=
(
float
)
(
M_PI
*
periodicBoxVectors
[
0
][
0
]
*
periodicBoxVectors
[
1
][
1
]
*
periodicBoxVectors
[
2
][
2
]);
const
float
recipExpFactor
=
(
float
)
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
double
energy
=
0.0
;
double
energy
=
0.0
;
int
firstz
=
(
start
==
0
?
1
:
0
);
int
firstz
=
(
start
==
0
?
1
:
0
);
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
int
mx
=
(
kx
<
(
gridx
+
1
)
/
2
)
?
kx
:
kx
-
gridx
;
float
mhx
=
mx
*
(
float
)
recipBoxVectors
[
0
][
0
];
float
bx
=
scaleFactor
*
bsplineModuli
[
0
][
kx
];
for
(
int
ky
=
0
;
ky
<
gridy
;
ky
++
)
{
for
(
int
ky
=
0
;
ky
<
gridy
;
ky
++
)
{
int
my
=
(
ky
<
(
gridy
+
1
)
/
2
)
?
ky
:
ky
-
gridy
;
float
mhy
=
mx
*
(
float
)
recipBoxVectors
[
1
][
0
]
+
my
*
(
float
)
recipBoxVectors
[
1
][
1
];
float
mhx2y2
=
mhx
*
mhx
+
mhy
*
mhy
;
float
bxby
=
bx
*
bsplineModuli
[
1
][
ky
];
for
(
int
kz
=
firstz
;
kz
<
gridz
;
kz
++
)
{
for
(
int
kz
=
firstz
;
kz
<
gridz
;
kz
++
)
{
int
mz
=
(
kz
<
(
gridz
+
1
)
/
2
)
?
kz
:
kz
-
gridz
;
float
mhz
=
mx
*
(
float
)
recipBoxVectors
[
2
][
0
]
+
my
*
(
float
)
recipBoxVectors
[
2
][
1
]
+
mz
*
(
float
)
recipBoxVectors
[
2
][
2
];
float
bz
=
bsplineModuli
[
2
][
kz
];
float
m2
=
mhx2y2
+
mhz
*
mhz
;
float
denom
=
m2
*
bxby
*
bz
;
float
eterm
=
exp
(
-
recipExpFactor
*
m2
)
/
denom
;
int
kx1
,
ky1
,
kz1
;
int
kx1
,
ky1
,
kz1
;
if
(
kz
>=
gridz
/
2
+
1
)
{
if
(
kz
>=
gridz
/
2
+
1
)
{
kx1
=
(
kx
==
0
?
kx
:
gridx
-
kx
);
kx1
=
(
kx
==
0
?
kx
:
gridx
-
kx
);
...
@@ -262,7 +257,7 @@ static double reciprocalEnergy(int start, int end, fftwf_complex* grid, int grid
...
@@ -262,7 +257,7 @@ static double reciprocalEnergy(int start, int end, fftwf_complex* grid, int grid
int
index
=
kx1
*
yzsizeHalf
+
ky1
*
zsizeHalf
+
kz1
;
int
index
=
kx1
*
yzsizeHalf
+
ky1
*
zsizeHalf
+
kz1
;
float
gridReal
=
grid
[
index
][
0
];
float
gridReal
=
grid
[
index
][
0
];
float
gridImag
=
grid
[
index
][
1
];
float
gridImag
=
grid
[
index
][
1
];
energy
+=
eterm
*
(
gridReal
*
gridReal
+
gridImag
*
gridImag
);
energy
+=
recipEterm
[
index
]
*
(
gridReal
*
gridReal
+
gridImag
*
gridImag
);
}
}
firstz
=
0
;
firstz
=
0
;
}
}
...
@@ -271,41 +266,15 @@ static double reciprocalEnergy(int start, int end, fftwf_complex* grid, int grid
...
@@ -271,41 +266,15 @@ static double reciprocalEnergy(int start, int end, fftwf_complex* grid, int grid
}
}
static
double
reciprocalDispersionEnergy
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
int
gridx
,
int
gridy
,
int
gridz
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
*
periodicBoxVectors
,
Vec3
*
recipBoxVectors
)
{
static
double
reciprocalDispersionEnergy
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
const
vector
<
float
>&
recipEterm
,
int
gridx
,
int
gridy
,
int
gridz
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
*
periodicBoxVectors
,
Vec3
*
recipBoxVectors
)
{
const
unsigned
int
zsizeHalf
=
gridz
/
2
+
1
;
const
unsigned
int
zsizeHalf
=
gridz
/
2
+
1
;
const
unsigned
int
yzsizeHalf
=
gridy
*
zsizeHalf
;
const
unsigned
int
yzsizeHalf
=
gridy
*
zsizeHalf
;
const
float
scaleFactor
=
(
float
)
M_PI
*
sqrtf
(
M_PI
)
/
(
6.0
*
periodicBoxVectors
[
0
][
0
]
*
periodicBoxVectors
[
1
][
1
]
*
periodicBoxVectors
[
2
][
2
]);
float
bfac
=
M_PI
/
alpha
;
float
fac1
=
2.0
f
*
M_PI
*
M_PI
*
M_PI
*
sqrtf
(
M_PI
);
float
fac2
=
alpha
*
alpha
*
alpha
;
float
fac3
=
-
2.0
f
*
alpha
*
M_PI
*
M_PI
;
float
b
,
m
,
m3
,
expfac
,
expterm
,
erfcterm
;
double
energy
=
0.0
;
double
energy
=
0.0
;
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
int
mx
=
(
kx
<
(
gridx
+
1
)
/
2
)
?
kx
:
kx
-
gridx
;
float
mhx
=
mx
*
(
float
)
recipBoxVectors
[
0
][
0
];
float
bx
=
bsplineModuli
[
0
][
kx
];
for
(
int
ky
=
0
;
ky
<
gridy
;
ky
++
)
{
for
(
int
ky
=
0
;
ky
<
gridy
;
ky
++
)
{
int
my
=
(
ky
<
(
gridy
+
1
)
/
2
)
?
ky
:
ky
-
gridy
;
float
mhy
=
mx
*
(
float
)
recipBoxVectors
[
1
][
0
]
+
my
*
(
float
)
recipBoxVectors
[
1
][
1
];
float
mhx2y2
=
mhx
*
mhx
+
mhy
*
mhy
;
float
bxby
=
bx
*
bsplineModuli
[
1
][
ky
];
for
(
int
kz
=
0
;
kz
<
gridz
;
kz
++
)
{
for
(
int
kz
=
0
;
kz
<
gridz
;
kz
++
)
{
int
mz
=
(
kz
<
(
gridz
+
1
)
/
2
)
?
kz
:
kz
-
gridz
;
float
mhz
=
mx
*
(
float
)
recipBoxVectors
[
2
][
0
]
+
my
*
(
float
)
recipBoxVectors
[
2
][
1
]
+
mz
*
(
float
)
recipBoxVectors
[
2
][
2
];
float
bz
=
bsplineModuli
[
2
][
kz
];
float
m2
=
mhx2y2
+
mhz
*
mhz
;
float
denom
=
scaleFactor
/
(
bxby
*
bz
);
m
=
sqrtf
(
m2
);
m3
=
m
*
m2
;
b
=
bfac
*
m
;
expfac
=
-
b
*
b
;
erfcterm
=
erfc
(
b
);
expterm
=
exp
(
expfac
);
float
eterm
=
(
fac1
*
erfcterm
*
m3
+
expterm
*
(
fac2
+
fac3
*
m2
))
*
denom
;
int
kx1
,
ky1
,
kz1
;
int
kx1
,
ky1
,
kz1
;
if
(
kz
>=
gridz
/
2
+
1
)
{
if
(
kz
>=
gridz
/
2
+
1
)
{
kx1
=
(
kx
==
0
?
kx
:
gridx
-
kx
);
kx1
=
(
kx
==
0
?
kx
:
gridx
-
kx
);
...
@@ -320,13 +289,14 @@ static double reciprocalDispersionEnergy(int start, int end, fftwf_complex* grid
...
@@ -320,13 +289,14 @@ static double reciprocalDispersionEnergy(int start, int end, fftwf_complex* grid
int
index
=
kx1
*
yzsizeHalf
+
ky1
*
zsizeHalf
+
kz1
;
int
index
=
kx1
*
yzsizeHalf
+
ky1
*
zsizeHalf
+
kz1
;
float
gridReal
=
grid
[
index
][
0
];
float
gridReal
=
grid
[
index
][
0
];
float
gridImag
=
grid
[
index
][
1
];
float
gridImag
=
grid
[
index
][
1
];
energy
+=
eterm
*
(
gridReal
*
gridReal
+
gridImag
*
gridImag
);
energy
+=
recipEterm
[
index
]
*
(
gridReal
*
gridReal
+
gridImag
*
gridImag
);
}
}
}
}
}
}
return
-
energy
;
return
0.5
f
*
energy
;
}
}
static
void
reciprocalConvolution
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
vector
<
float
>&
recipEterm
)
{
static
void
reciprocalConvolution
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
vector
<
float
>&
recipEterm
)
{
for
(
int
index
=
start
;
index
<
end
;
index
++
)
{
for
(
int
index
=
start
;
index
<
end
;
index
++
)
{
float
eterm
=
recipEterm
[
index
];
float
eterm
=
recipEterm
[
index
];
...
@@ -638,7 +608,7 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i
...
@@ -638,7 +608,7 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i
threads
.
syncThreads
();
threads
.
syncThreads
();
}
}
if
(
includeEnergy
)
{
if
(
includeEnergy
)
{
threadEnergy
[
index
]
=
reciprocalEnergy
(
gridxStart
,
gridxEnd
,
complexGrid
,
gridx
,
gridy
,
gridz
,
alpha
,
bsplineModuli
,
periodicBoxVectors
,
recipBoxVectors
);
threadEnergy
[
index
]
=
reciprocalEnergy
(
gridxStart
,
gridxEnd
,
complexGrid
,
recipEterm
,
gridx
,
gridy
,
gridz
,
alpha
,
bsplineModuli
,
periodicBoxVectors
,
recipBoxVectors
);
threads
.
syncThreads
();
threads
.
syncThreads
();
}
}
reciprocalConvolution
(
complexStart
,
complexEnd
,
complexGrid
,
recipEterm
);
reciprocalConvolution
(
complexStart
,
complexEnd
,
complexGrid
,
recipEterm
);
...
@@ -650,11 +620,11 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i
...
@@ -650,11 +620,11 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i
threads
.
syncThreads
();
threads
.
syncThreads
();
}
}
if
(
includeEnergy
)
{
if
(
includeEnergy
)
{
threadEnergy
[
index
]
=
reciprocalDispersionEnergy
(
gridxStart
,
gridxEnd
,
complexGrid
,
gridx
,
gridy
,
gridz
,
alpha
,
bsplineModuli
,
periodicBoxVectors
,
recipBoxVectors
);
threadEnergy
[
index
]
=
reciprocalDispersionEnergy
(
gridxStart
,
gridxEnd
,
complexGrid
,
recipEterm
,
gridx
,
gridy
,
gridz
,
alpha
,
bsplineModuli
,
periodicBoxVectors
,
recipBoxVectors
);
threads
.
syncThreads
();
threads
.
syncThreads
();
}
}
// For dispersion, we include the {0,0,0} term, so the start point needs to be redefined
// For dispersion, we include the {0,0,0} term, so the start point needs to be redefined
complexStart
=
std
::
max
(
0
,
(
(
index
*
complexSize
)
/
numThreads
))
;
complexStart
=
(
index
*
complexSize
)
/
numThreads
;
reciprocalConvolution
(
complexStart
,
complexEnd
,
complexGrid
,
recipEterm
);
reciprocalConvolution
(
complexStart
,
complexEnd
,
complexGrid
,
recipEterm
);
threads
.
syncThreads
();
threads
.
syncThreads
();
break
;
break
;
...
...
plugins/cpupme/tests/TestCpuPme.cpp
View file @
79c0e604
...
@@ -542,7 +542,7 @@ void test_water2_dpme_energies_forces_no_exclusions() {
...
@@ -542,7 +542,7 @@ void test_water2_dpme_energies_forces_no_exclusions() {
pme
.
beginComputation
(
io
,
boxVectors
,
true
);
pme
.
beginComputation
(
io
,
boxVectors
,
true
);
double
recenergy
=
pme
.
finishComputation
(
io
);
double
recenergy
=
pme
.
finishComputation
(
io
);
ASSERT_EQUAL_TOL
(
recenergy
,
-
2.179629087
,
1
e-
5
);
ASSERT_EQUAL_TOL
(
recenergy
,
-
2.179629087
,
5
e-
3
);
ASSERT_EQUAL_TOL
(
selfEwaldEnergy
,
1.731404285
,
1e-5
);
ASSERT_EQUAL_TOL
(
selfEwaldEnergy
,
1.731404285
,
1e-5
);
std
::
vector
<
Vec3
>
knownforces
(
6
);
std
::
vector
<
Vec3
>
knownforces
(
6
);
...
@@ -553,15 +553,15 @@ void test_water2_dpme_energies_forces_no_exclusions() {
...
@@ -553,15 +553,15 @@ void test_water2_dpme_energies_forces_no_exclusions() {
knownforces
[
4
]
=
Vec3
(
0.0008242336483
,
0.003778910089
,
-
0.002116131106
);
knownforces
[
4
]
=
Vec3
(
0.0008242336483
,
0.003778910089
,
-
0.002116131106
);
knownforces
[
5
]
=
Vec3
(
0.004912763044
,
0.002324059399
,
-
0.002844482646
);
knownforces
[
5
]
=
Vec3
(
0.004912763044
,
0.002324059399
,
-
0.002844482646
);
for
(
int
i
=
0
;
i
<
NATOMS
;
i
++
)
for
(
int
i
=
0
;
i
<
NATOMS
;
i
++
)
ASSERT_EQUAL_VEC
(
refforces
[
i
],
knownforces
[
i
],
1
e-
5
);
ASSERT_EQUAL_VEC
(
refforces
[
i
],
knownforces
[
i
],
5
e-
3
);
recenergy
+=
selfEwaldEnergy
;
recenergy
+=
selfEwaldEnergy
;
// See if they match.
// See if they match.
ASSERT_EQUAL_TOL
(
refenergy
,
recenergy
,
1e-
5
);
ASSERT_EQUAL_TOL
(
refenergy
,
recenergy
,
1e-
3
);
for
(
int
i
=
0
;
i
<
NATOMS
;
i
++
)
for
(
int
i
=
0
;
i
<
NATOMS
;
i
++
)
ASSERT_EQUAL_VEC
(
refforces
[
i
],
Vec3
(
io
.
force
[
4
*
i
],
io
.
force
[
4
*
i
+
1
],
io
.
force
[
4
*
i
+
2
]),
1
e-
5
);
ASSERT_EQUAL_VEC
(
refforces
[
i
],
Vec3
(
io
.
force
[
4
*
i
],
io
.
force
[
4
*
i
+
1
],
io
.
force
[
4
*
i
+
2
]),
5
e-
3
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment