Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
2882737d
Commit
2882737d
authored
Dec 05, 2013
by
peastman
Browse files
Minor optimizations to CPU platform
parent
2cd817b4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
35 additions
and
27 deletions
+35
-27
openmmapi/include/openmm/internal/vectorize.h
openmmapi/include/openmm/internal/vectorize.h
+6
-0
platforms/cpu/src/CpuGBSAOBCForce.cpp
platforms/cpu/src/CpuGBSAOBCForce.cpp
+9
-10
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+20
-17
No files found.
openmmapi/include/openmm/internal/vectorize.h
View file @
2882737d
...
...
@@ -91,6 +91,9 @@ public:
fvec4
operator
&
(
fvec4
other
)
const
{
return
_mm_and_ps
(
val
,
other
);
}
fvec4
operator
|
(
fvec4
other
)
const
{
return
_mm_or_ps
(
val
,
other
);
}
fvec4
operator
==
(
fvec4
other
)
const
{
return
_mm_cmpeq_ps
(
val
,
other
);
}
...
...
@@ -157,6 +160,9 @@ public:
ivec4
operator
&
(
ivec4
other
)
const
{
return
_mm_and_si128
(
val
,
other
);
}
ivec4
operator
|
(
ivec4
other
)
const
{
return
_mm_or_si128
(
val
,
other
);
}
ivec4
operator
==
(
ivec4
other
)
const
{
return
_mm_cmpeq_epi32
(
val
,
other
);
}
...
...
platforms/cpu/src/CpuGBSAOBCForce.cpp
View file @
2882737d
...
...
@@ -48,8 +48,8 @@ public:
CpuGBSAOBCForce
::
CpuGBSAOBCForce
()
:
cutoff
(
false
),
periodic
(
false
)
{
logDX
=
(
TABLE_MAX
-
TABLE_MIN
)
/
NUM_TABLE_POINTS
;
logDXInv
=
1.0
f
/
logDX
;
logTable
.
resize
(
NUM_TABLE_POINTS
+
1
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
1
;
i
++
)
{
logTable
.
resize
(
NUM_TABLE_POINTS
+
4
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
4
;
i
++
)
{
double
x
=
TABLE_MIN
+
i
*
logDX
;
logTable
[
i
]
=
log
(
x
);
}
...
...
@@ -395,17 +395,16 @@ void CpuGBSAOBCForce::getDeltaR(const fvec4& posI, const fvec4& x, const fvec4&
fvec4
CpuGBSAOBCForce
::
fastLog
(
fvec4
x
)
{
// Evaluate log(x) using a lookup table for speed.
if
(
any
(
x
<
TABLE_MIN
)
|
|
any
(
x
>=
TABLE_MAX
))
if
(
any
(
(
x
<
TABLE_MIN
)
|
(
x
>=
TABLE_MAX
))
)
return
fvec4
(
logf
(
x
[
0
]),
logf
(
x
[
1
]),
logf
(
x
[
2
]),
logf
(
x
[
3
]));
fvec4
x1
=
(
x
-
TABLE_MIN
)
*
logDXInv
;
ivec4
index
=
floor
(
x1
);
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
float
table1
[
4
],
table2
[
4
];
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
int
tableIndex
=
index
[
i
];
table1
[
i
]
=
logTable
[
tableIndex
];
table2
[
i
]
=
logTable
[
tableIndex
+
1
];
}
return
coeff1
*
fvec4
(
table1
)
+
coeff2
*
fvec4
(
table2
);
fvec4
t1
(
&
logTable
[
index
[
0
]]);
fvec4
t2
(
&
logTable
[
index
[
1
]]);
fvec4
t3
(
&
logTable
[
index
[
2
]]);
fvec4
t4
(
&
logTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
2882737d
...
...
@@ -163,10 +163,10 @@ void CpuNonbondedForce::tabulateEwaldScaleFactor() {
if
(
tableIsValid
)
return
;
tableIsValid
=
true
;
ewaldDX
=
cutoffDistance
/
(
NUM_TABLE_POINTS
-
2
)
;
ewaldDX
=
cutoffDistance
/
NUM_TABLE_POINTS
;
ewaldDXInv
=
1.0
f
/
ewaldDX
;
ewaldScaleTable
.
resize
(
NUM_TABLE_POINTS
+
1
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
1
;
i
++
)
{
ewaldScaleTable
.
resize
(
NUM_TABLE_POINTS
+
4
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
4
;
i
++
)
{
double
r
=
i
*
ewaldDX
;
double
alphaR
=
alphaEwald
*
r
;
ewaldScaleTable
[
i
]
=
erfc
(
alphaR
)
+
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
);
...
...
@@ -510,16 +510,17 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
eps
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
;
fvec4
dEdR
=
switchValue
*
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
fvec4
eps
Sig6
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
*
sig6
;
fvec4
dEdR
=
switchValue
*
eps
Sig6
*
(
12.0
f
*
sig6
-
6.0
f
);
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
fvec4
energy
=
eps
*
(
sig6
-
1.0
f
)
*
sig6
;
fvec4
energy
;
if
(
useSwitch
)
{
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
}
...
...
@@ -527,6 +528,8 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
// Accumulate energies.
if
(
totalEnergy
)
{
if
(
!
useSwitch
)
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
...
...
@@ -623,8 +626,9 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
fvec4
epsSig6
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
*
sig6
;
dEdR
+=
switchValue
*
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
dEdR
*=
inverseR
*
inverseR
;
fvec4
energy
=
epsSig6
*
(
sig6
-
1.0
f
)
;
fvec4
energy
;
if
(
useSwitch
)
{
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
}
...
...
@@ -632,6 +636,8 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
// Accumulate energies.
if
(
totalEnergy
)
{
if
(
!
useSwitch
)
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
if
(
include
[
j
])
...
...
@@ -695,16 +701,13 @@ fvec4 CpuNonbondedForce::ewaldScaleFunction(fvec4 x) {
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
fvec4
x1
=
x
*
ewaldDXInv
;
ivec4
index
=
floor
(
x1
);
ivec4
index
=
min
(
floor
(
x1
)
,
NUM_TABLE_POINTS
)
;
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
float
table1
[
4
],
table2
[
4
];
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
int
tableIndex
=
index
[
i
];
if
(
tableIndex
<
NUM_TABLE_POINTS
)
{
table1
[
i
]
=
ewaldScaleTable
[
tableIndex
];
table2
[
i
]
=
ewaldScaleTable
[
tableIndex
+
1
];
}
}
return
coeff1
*
fvec4
(
table1
)
+
coeff2
*
fvec4
(
table2
);
fvec4
t1
(
&
ewaldScaleTable
[
index
[
0
]]);
fvec4
t2
(
&
ewaldScaleTable
[
index
[
1
]]);
fvec4
t3
(
&
ewaldScaleTable
[
index
[
2
]]);
fvec4
t4
(
&
ewaldScaleTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment