Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
79c0e604
Commit
79c0e604
authored
Aug 19, 2016
by
Andy Simmonett
Browse files
Optmized influence function and energy computations for PME and LJPME.
parent
a741138b
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
28 additions
and
58 deletions
+28
-58
plugins/cpupme/src/CpuPmeKernels.cpp
plugins/cpupme/src/CpuPmeKernels.cpp
+24
-54
plugins/cpupme/tests/TestCpuPme.cpp
plugins/cpupme/tests/TestCpuPme.cpp
+4
-4
No files found.
plugins/cpupme/src/CpuPmeKernels.cpp
View file @
79c0e604
...
@@ -154,16 +154,17 @@ static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gri
...
@@ -154,16 +154,17 @@ static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gri
}
}
}
}
#define FAST_ERFC 1
static
void
computeReciprocalDispersionEterm
(
int
start
,
int
end
,
int
gridx
,
int
gridy
,
int
gridz
,
vector
<
float
>&
recipEterm
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
*
periodicBoxVectors
,
Vec3
*
recipBoxVectors
)
{
static
void
computeReciprocalDispersionEterm
(
int
start
,
int
end
,
int
gridx
,
int
gridy
,
int
gridz
,
vector
<
float
>&
recipEterm
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
*
periodicBoxVectors
,
Vec3
*
recipBoxVectors
)
{
const
unsigned
int
zsize
=
gridz
/
2
+
1
;
const
unsigned
int
zsize
=
gridz
/
2
+
1
;
const
unsigned
int
yzsize
=
gridy
*
zsize
;
const
unsigned
int
yzsize
=
gridy
*
zsize
;
const
float
scaleFactor
=
(
float
)
M_PI
*
sqrtf
(
M_PI
)
/
(
6.0
*
periodicBoxVectors
[
0
][
0
]
*
periodicBoxVectors
[
1
][
1
]
*
periodicBoxVectors
[
2
][
2
]);
const
float
scaleFactor
=
(
float
)
-
2.0
f
*
M_PI
*
sqrtf
(
M_PI
)
/
(
6.0
*
periodicBoxVectors
[
0
][
0
]
*
periodicBoxVectors
[
1
][
1
]
*
periodicBoxVectors
[
2
][
2
]);
float
bfac
=
M_PI
/
alpha
;
float
bfac
=
M_PI
/
alpha
;
float
fac1
=
2.0
f
*
M_PI
*
M_PI
*
M_PI
*
sqrtf
(
M_PI
);
float
fac1
=
2.0
f
*
M_PI
*
M_PI
*
M_PI
*
sqrtf
(
M_PI
);
float
fac2
=
alpha
*
alpha
*
alpha
;
float
fac2
=
alpha
*
alpha
*
alpha
;
float
fac3
=
-
2.0
f
*
alpha
*
M_PI
*
M_PI
;
float
fac3
=
-
2.0
f
*
alpha
*
M_PI
*
M_PI
;
float
b
,
m
,
m3
,
expfac
,
expterm
,
erfcterm
;
float
b
,
m
,
m3
,
expterm
,
erfcterm
,
t
;
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
int
mx
=
(
kx
<
(
gridx
+
1
)
/
2
)
?
kx
:
kx
-
gridx
;
int
mx
=
(
kx
<
(
gridx
+
1
)
/
2
)
?
kx
:
kx
-
gridx
;
...
@@ -185,10 +186,18 @@ static void computeReciprocalDispersionEterm(int start, int end, int gridx, int
...
@@ -185,10 +186,18 @@ static void computeReciprocalDispersionEterm(int start, int end, int gridx, int
m
=
sqrtf
(
m2
);
m
=
sqrtf
(
m2
);
m3
=
m
*
m2
;
m3
=
m
*
m2
;
b
=
bfac
*
m
;
b
=
bfac
*
m
;
expfac
=
-
b
*
b
;
expterm
=
exp
(
-
b
*
b
);
#if FAST_ERFC
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 1.5e-7. Stolen by ACS from the CUDA platform's AMOEBA plugin.
t
=
1.0
f
/
(
1.0
f
+
0.3275911
f
*
b
);
erfcterm
=
(
0.254829592
f
+
(
-
0.284496736
f
+
(
1.421413741
f
+
(
-
1.453152027
f
+
1.061405429
f
*
t
)
*
t
)
*
t
)
*
t
)
*
t
*
expterm
;
#else
erfcterm
=
erfc
(
b
);
erfcterm
=
erfc
(
b
);
expterm
=
exp
(
expfac
);
#endif
recipEterm
[
index
]
=
-
2.0
f
*
(
fac1
*
erfcterm
*
m3
+
expterm
*
(
fac2
+
fac3
*
m2
))
*
denom
;
recipEterm
[
index
]
=
(
fac1
*
erfcterm
*
m3
+
expterm
*
(
fac2
+
fac3
*
m2
))
*
denom
;
}
}
}
}
}
}
...
@@ -224,30 +233,16 @@ static void computeReciprocalEterm(int start, int end, int gridx, int gridy, int
...
@@ -224,30 +233,16 @@ static void computeReciprocalEterm(int start, int end, int gridx, int gridy, int
}
}
}
}
static
double
reciprocalEnergy
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
int
gridx
,
int
gridy
,
int
gridz
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
*
periodicBoxVectors
,
Vec3
*
recipBoxVectors
)
{
static
double
reciprocalEnergy
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
vector
<
float
>&
recipEterm
,
int
gridx
,
int
gridy
,
int
gridz
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
*
periodicBoxVectors
,
Vec3
*
recipBoxVectors
)
{
const
unsigned
int
zsizeHalf
=
gridz
/
2
+
1
;
const
unsigned
int
zsizeHalf
=
gridz
/
2
+
1
;
const
unsigned
int
yzsizeHalf
=
gridy
*
zsizeHalf
;
const
unsigned
int
yzsizeHalf
=
gridy
*
zsizeHalf
;
const
float
scaleFactor
=
(
float
)
(
M_PI
*
periodicBoxVectors
[
0
][
0
]
*
periodicBoxVectors
[
1
][
1
]
*
periodicBoxVectors
[
2
][
2
]);
const
float
recipExpFactor
=
(
float
)
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
double
energy
=
0.0
;
double
energy
=
0.0
;
int
firstz
=
(
start
==
0
?
1
:
0
);
int
firstz
=
(
start
==
0
?
1
:
0
);
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
int
mx
=
(
kx
<
(
gridx
+
1
)
/
2
)
?
kx
:
kx
-
gridx
;
float
mhx
=
mx
*
(
float
)
recipBoxVectors
[
0
][
0
];
float
bx
=
scaleFactor
*
bsplineModuli
[
0
][
kx
];
for
(
int
ky
=
0
;
ky
<
gridy
;
ky
++
)
{
for
(
int
ky
=
0
;
ky
<
gridy
;
ky
++
)
{
int
my
=
(
ky
<
(
gridy
+
1
)
/
2
)
?
ky
:
ky
-
gridy
;
float
mhy
=
mx
*
(
float
)
recipBoxVectors
[
1
][
0
]
+
my
*
(
float
)
recipBoxVectors
[
1
][
1
];
float
mhx2y2
=
mhx
*
mhx
+
mhy
*
mhy
;
float
bxby
=
bx
*
bsplineModuli
[
1
][
ky
];
for
(
int
kz
=
firstz
;
kz
<
gridz
;
kz
++
)
{
for
(
int
kz
=
firstz
;
kz
<
gridz
;
kz
++
)
{
int
mz
=
(
kz
<
(
gridz
+
1
)
/
2
)
?
kz
:
kz
-
gridz
;
float
mhz
=
mx
*
(
float
)
recipBoxVectors
[
2
][
0
]
+
my
*
(
float
)
recipBoxVectors
[
2
][
1
]
+
mz
*
(
float
)
recipBoxVectors
[
2
][
2
];
float
bz
=
bsplineModuli
[
2
][
kz
];
float
m2
=
mhx2y2
+
mhz
*
mhz
;
float
denom
=
m2
*
bxby
*
bz
;
float
eterm
=
exp
(
-
recipExpFactor
*
m2
)
/
denom
;
int
kx1
,
ky1
,
kz1
;
int
kx1
,
ky1
,
kz1
;
if
(
kz
>=
gridz
/
2
+
1
)
{
if
(
kz
>=
gridz
/
2
+
1
)
{
kx1
=
(
kx
==
0
?
kx
:
gridx
-
kx
);
kx1
=
(
kx
==
0
?
kx
:
gridx
-
kx
);
...
@@ -262,7 +257,7 @@ static double reciprocalEnergy(int start, int end, fftwf_complex* grid, int grid
...
@@ -262,7 +257,7 @@ static double reciprocalEnergy(int start, int end, fftwf_complex* grid, int grid
int
index
=
kx1
*
yzsizeHalf
+
ky1
*
zsizeHalf
+
kz1
;
int
index
=
kx1
*
yzsizeHalf
+
ky1
*
zsizeHalf
+
kz1
;
float
gridReal
=
grid
[
index
][
0
];
float
gridReal
=
grid
[
index
][
0
];
float
gridImag
=
grid
[
index
][
1
];
float
gridImag
=
grid
[
index
][
1
];
energy
+=
eterm
*
(
gridReal
*
gridReal
+
gridImag
*
gridImag
);
energy
+=
recipEterm
[
index
]
*
(
gridReal
*
gridReal
+
gridImag
*
gridImag
);
}
}
firstz
=
0
;
firstz
=
0
;
}
}
...
@@ -271,41 +266,15 @@ static double reciprocalEnergy(int start, int end, fftwf_complex* grid, int grid
...
@@ -271,41 +266,15 @@ static double reciprocalEnergy(int start, int end, fftwf_complex* grid, int grid
}
}
static
double
reciprocalDispersionEnergy
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
int
gridx
,
int
gridy
,
int
gridz
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
*
periodicBoxVectors
,
Vec3
*
recipBoxVectors
)
{
static
double
reciprocalDispersionEnergy
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
const
vector
<
float
>&
recipEterm
,
int
gridx
,
int
gridy
,
int
gridz
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
*
periodicBoxVectors
,
Vec3
*
recipBoxVectors
)
{
const
unsigned
int
zsizeHalf
=
gridz
/
2
+
1
;
const
unsigned
int
zsizeHalf
=
gridz
/
2
+
1
;
const
unsigned
int
yzsizeHalf
=
gridy
*
zsizeHalf
;
const
unsigned
int
yzsizeHalf
=
gridy
*
zsizeHalf
;
const
float
scaleFactor
=
(
float
)
M_PI
*
sqrtf
(
M_PI
)
/
(
6.0
*
periodicBoxVectors
[
0
][
0
]
*
periodicBoxVectors
[
1
][
1
]
*
periodicBoxVectors
[
2
][
2
]);
float
bfac
=
M_PI
/
alpha
;
float
fac1
=
2.0
f
*
M_PI
*
M_PI
*
M_PI
*
sqrtf
(
M_PI
);
float
fac2
=
alpha
*
alpha
*
alpha
;
float
fac3
=
-
2.0
f
*
alpha
*
M_PI
*
M_PI
;
float
b
,
m
,
m3
,
expfac
,
expterm
,
erfcterm
;
double
energy
=
0.0
;
double
energy
=
0.0
;
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
int
mx
=
(
kx
<
(
gridx
+
1
)
/
2
)
?
kx
:
kx
-
gridx
;
float
mhx
=
mx
*
(
float
)
recipBoxVectors
[
0
][
0
];
float
bx
=
bsplineModuli
[
0
][
kx
];
for
(
int
ky
=
0
;
ky
<
gridy
;
ky
++
)
{
for
(
int
ky
=
0
;
ky
<
gridy
;
ky
++
)
{
int
my
=
(
ky
<
(
gridy
+
1
)
/
2
)
?
ky
:
ky
-
gridy
;
float
mhy
=
mx
*
(
float
)
recipBoxVectors
[
1
][
0
]
+
my
*
(
float
)
recipBoxVectors
[
1
][
1
];
float
mhx2y2
=
mhx
*
mhx
+
mhy
*
mhy
;
float
bxby
=
bx
*
bsplineModuli
[
1
][
ky
];
for
(
int
kz
=
0
;
kz
<
gridz
;
kz
++
)
{
for
(
int
kz
=
0
;
kz
<
gridz
;
kz
++
)
{
int
mz
=
(
kz
<
(
gridz
+
1
)
/
2
)
?
kz
:
kz
-
gridz
;
float
mhz
=
mx
*
(
float
)
recipBoxVectors
[
2
][
0
]
+
my
*
(
float
)
recipBoxVectors
[
2
][
1
]
+
mz
*
(
float
)
recipBoxVectors
[
2
][
2
];
float
bz
=
bsplineModuli
[
2
][
kz
];
float
m2
=
mhx2y2
+
mhz
*
mhz
;
float
denom
=
scaleFactor
/
(
bxby
*
bz
);
m
=
sqrtf
(
m2
);
m3
=
m
*
m2
;
b
=
bfac
*
m
;
expfac
=
-
b
*
b
;
erfcterm
=
erfc
(
b
);
expterm
=
exp
(
expfac
);
float
eterm
=
(
fac1
*
erfcterm
*
m3
+
expterm
*
(
fac2
+
fac3
*
m2
))
*
denom
;
int
kx1
,
ky1
,
kz1
;
int
kx1
,
ky1
,
kz1
;
if
(
kz
>=
gridz
/
2
+
1
)
{
if
(
kz
>=
gridz
/
2
+
1
)
{
kx1
=
(
kx
==
0
?
kx
:
gridx
-
kx
);
kx1
=
(
kx
==
0
?
kx
:
gridx
-
kx
);
...
@@ -320,13 +289,14 @@ static double reciprocalDispersionEnergy(int start, int end, fftwf_complex* grid
...
@@ -320,13 +289,14 @@ static double reciprocalDispersionEnergy(int start, int end, fftwf_complex* grid
int
index
=
kx1
*
yzsizeHalf
+
ky1
*
zsizeHalf
+
kz1
;
int
index
=
kx1
*
yzsizeHalf
+
ky1
*
zsizeHalf
+
kz1
;
float
gridReal
=
grid
[
index
][
0
];
float
gridReal
=
grid
[
index
][
0
];
float
gridImag
=
grid
[
index
][
1
];
float
gridImag
=
grid
[
index
][
1
];
energy
+=
eterm
*
(
gridReal
*
gridReal
+
gridImag
*
gridImag
);
energy
+=
recipEterm
[
index
]
*
(
gridReal
*
gridReal
+
gridImag
*
gridImag
);
}
}
}
}
}
}
return
-
energy
;
return
0.5
f
*
energy
;
}
}
static
void
reciprocalConvolution
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
vector
<
float
>&
recipEterm
)
{
static
void
reciprocalConvolution
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
vector
<
float
>&
recipEterm
)
{
for
(
int
index
=
start
;
index
<
end
;
index
++
)
{
for
(
int
index
=
start
;
index
<
end
;
index
++
)
{
float
eterm
=
recipEterm
[
index
];
float
eterm
=
recipEterm
[
index
];
...
@@ -638,7 +608,7 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i
...
@@ -638,7 +608,7 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i
threads
.
syncThreads
();
threads
.
syncThreads
();
}
}
if
(
includeEnergy
)
{
if
(
includeEnergy
)
{
threadEnergy
[
index
]
=
reciprocalEnergy
(
gridxStart
,
gridxEnd
,
complexGrid
,
gridx
,
gridy
,
gridz
,
alpha
,
bsplineModuli
,
periodicBoxVectors
,
recipBoxVectors
);
threadEnergy
[
index
]
=
reciprocalEnergy
(
gridxStart
,
gridxEnd
,
complexGrid
,
recipEterm
,
gridx
,
gridy
,
gridz
,
alpha
,
bsplineModuli
,
periodicBoxVectors
,
recipBoxVectors
);
threads
.
syncThreads
();
threads
.
syncThreads
();
}
}
reciprocalConvolution
(
complexStart
,
complexEnd
,
complexGrid
,
recipEterm
);
reciprocalConvolution
(
complexStart
,
complexEnd
,
complexGrid
,
recipEterm
);
...
@@ -650,11 +620,11 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i
...
@@ -650,11 +620,11 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i
threads
.
syncThreads
();
threads
.
syncThreads
();
}
}
if
(
includeEnergy
)
{
if
(
includeEnergy
)
{
threadEnergy
[
index
]
=
reciprocalDispersionEnergy
(
gridxStart
,
gridxEnd
,
complexGrid
,
gridx
,
gridy
,
gridz
,
alpha
,
bsplineModuli
,
periodicBoxVectors
,
recipBoxVectors
);
threadEnergy
[
index
]
=
reciprocalDispersionEnergy
(
gridxStart
,
gridxEnd
,
complexGrid
,
recipEterm
,
gridx
,
gridy
,
gridz
,
alpha
,
bsplineModuli
,
periodicBoxVectors
,
recipBoxVectors
);
threads
.
syncThreads
();
threads
.
syncThreads
();
}
}
// For dispersion, we include the {0,0,0} term, so the start point needs to be redefined
// For dispersion, we include the {0,0,0} term, so the start point needs to be redefined
complexStart
=
std
::
max
(
0
,
(
(
index
*
complexSize
)
/
numThreads
))
;
complexStart
=
(
index
*
complexSize
)
/
numThreads
;
reciprocalConvolution
(
complexStart
,
complexEnd
,
complexGrid
,
recipEterm
);
reciprocalConvolution
(
complexStart
,
complexEnd
,
complexGrid
,
recipEterm
);
threads
.
syncThreads
();
threads
.
syncThreads
();
break
;
break
;
...
...
plugins/cpupme/tests/TestCpuPme.cpp
View file @
79c0e604
...
@@ -542,7 +542,7 @@ void test_water2_dpme_energies_forces_no_exclusions() {
...
@@ -542,7 +542,7 @@ void test_water2_dpme_energies_forces_no_exclusions() {
pme
.
beginComputation
(
io
,
boxVectors
,
true
);
pme
.
beginComputation
(
io
,
boxVectors
,
true
);
double
recenergy
=
pme
.
finishComputation
(
io
);
double
recenergy
=
pme
.
finishComputation
(
io
);
ASSERT_EQUAL_TOL
(
recenergy
,
-
2.179629087
,
1
e-
5
);
ASSERT_EQUAL_TOL
(
recenergy
,
-
2.179629087
,
5
e-
3
);
ASSERT_EQUAL_TOL
(
selfEwaldEnergy
,
1.731404285
,
1e-5
);
ASSERT_EQUAL_TOL
(
selfEwaldEnergy
,
1.731404285
,
1e-5
);
std
::
vector
<
Vec3
>
knownforces
(
6
);
std
::
vector
<
Vec3
>
knownforces
(
6
);
...
@@ -553,15 +553,15 @@ void test_water2_dpme_energies_forces_no_exclusions() {
...
@@ -553,15 +553,15 @@ void test_water2_dpme_energies_forces_no_exclusions() {
knownforces
[
4
]
=
Vec3
(
0.0008242336483
,
0.003778910089
,
-
0.002116131106
);
knownforces
[
4
]
=
Vec3
(
0.0008242336483
,
0.003778910089
,
-
0.002116131106
);
knownforces
[
5
]
=
Vec3
(
0.004912763044
,
0.002324059399
,
-
0.002844482646
);
knownforces
[
5
]
=
Vec3
(
0.004912763044
,
0.002324059399
,
-
0.002844482646
);
for
(
int
i
=
0
;
i
<
NATOMS
;
i
++
)
for
(
int
i
=
0
;
i
<
NATOMS
;
i
++
)
ASSERT_EQUAL_VEC
(
refforces
[
i
],
knownforces
[
i
],
1
e-
5
);
ASSERT_EQUAL_VEC
(
refforces
[
i
],
knownforces
[
i
],
5
e-
3
);
recenergy
+=
selfEwaldEnergy
;
recenergy
+=
selfEwaldEnergy
;
// See if they match.
// See if they match.
ASSERT_EQUAL_TOL
(
refenergy
,
recenergy
,
1e-
5
);
ASSERT_EQUAL_TOL
(
refenergy
,
recenergy
,
1e-
3
);
for
(
int
i
=
0
;
i
<
NATOMS
;
i
++
)
for
(
int
i
=
0
;
i
<
NATOMS
;
i
++
)
ASSERT_EQUAL_VEC
(
refforces
[
i
],
Vec3
(
io
.
force
[
4
*
i
],
io
.
force
[
4
*
i
+
1
],
io
.
force
[
4
*
i
+
2
]),
1
e-
5
);
ASSERT_EQUAL_VEC
(
refforces
[
i
],
Vec3
(
io
.
force
[
4
*
i
],
io
.
force
[
4
*
i
+
1
],
io
.
force
[
4
*
i
+
2
]),
5
e-
3
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment