Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
d600e589
Commit
d600e589
authored
Dec 05, 2013
by
peastman
Browse files
Merge pull request #228 from peastman/master
Minor optimizations to CPU platform
parents
72959084
2882737d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
35 additions
and
27 deletions
+35
-27
openmmapi/include/openmm/internal/vectorize.h
openmmapi/include/openmm/internal/vectorize.h
+6
-0
platforms/cpu/src/CpuGBSAOBCForce.cpp
platforms/cpu/src/CpuGBSAOBCForce.cpp
+9
-10
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+20
-17
No files found.
openmmapi/include/openmm/internal/vectorize.h
View file @
d600e589
...
@@ -91,6 +91,9 @@ public:
...
@@ -91,6 +91,9 @@ public:
fvec4
operator
&
(
fvec4
other
)
const
{
fvec4
operator
&
(
fvec4
other
)
const
{
return
_mm_and_ps
(
val
,
other
);
return
_mm_and_ps
(
val
,
other
);
}
}
fvec4
operator
|
(
fvec4
other
)
const
{
return
_mm_or_ps
(
val
,
other
);
}
fvec4
operator
==
(
fvec4
other
)
const
{
fvec4
operator
==
(
fvec4
other
)
const
{
return
_mm_cmpeq_ps
(
val
,
other
);
return
_mm_cmpeq_ps
(
val
,
other
);
}
}
...
@@ -157,6 +160,9 @@ public:
...
@@ -157,6 +160,9 @@ public:
ivec4
operator
&
(
ivec4
other
)
const
{
ivec4
operator
&
(
ivec4
other
)
const
{
return
_mm_and_si128
(
val
,
other
);
return
_mm_and_si128
(
val
,
other
);
}
}
ivec4
operator
|
(
ivec4
other
)
const
{
return
_mm_or_si128
(
val
,
other
);
}
ivec4
operator
==
(
ivec4
other
)
const
{
ivec4
operator
==
(
ivec4
other
)
const
{
return
_mm_cmpeq_epi32
(
val
,
other
);
return
_mm_cmpeq_epi32
(
val
,
other
);
}
}
...
...
platforms/cpu/src/CpuGBSAOBCForce.cpp
View file @
d600e589
...
@@ -48,8 +48,8 @@ public:
...
@@ -48,8 +48,8 @@ public:
CpuGBSAOBCForce
::
CpuGBSAOBCForce
()
:
cutoff
(
false
),
periodic
(
false
)
{
CpuGBSAOBCForce
::
CpuGBSAOBCForce
()
:
cutoff
(
false
),
periodic
(
false
)
{
logDX
=
(
TABLE_MAX
-
TABLE_MIN
)
/
NUM_TABLE_POINTS
;
logDX
=
(
TABLE_MAX
-
TABLE_MIN
)
/
NUM_TABLE_POINTS
;
logDXInv
=
1.0
f
/
logDX
;
logDXInv
=
1.0
f
/
logDX
;
logTable
.
resize
(
NUM_TABLE_POINTS
+
1
);
logTable
.
resize
(
NUM_TABLE_POINTS
+
4
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
1
;
i
++
)
{
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
4
;
i
++
)
{
double
x
=
TABLE_MIN
+
i
*
logDX
;
double
x
=
TABLE_MIN
+
i
*
logDX
;
logTable
[
i
]
=
log
(
x
);
logTable
[
i
]
=
log
(
x
);
}
}
...
@@ -395,17 +395,16 @@ void CpuGBSAOBCForce::getDeltaR(const fvec4& posI, const fvec4& x, const fvec4&
...
@@ -395,17 +395,16 @@ void CpuGBSAOBCForce::getDeltaR(const fvec4& posI, const fvec4& x, const fvec4&
fvec4
CpuGBSAOBCForce
::
fastLog
(
fvec4
x
)
{
fvec4
CpuGBSAOBCForce
::
fastLog
(
fvec4
x
)
{
// Evaluate log(x) using a lookup table for speed.
// Evaluate log(x) using a lookup table for speed.
if
(
any
(
x
<
TABLE_MIN
)
|
|
any
(
x
>=
TABLE_MAX
))
if
(
any
(
(
x
<
TABLE_MIN
)
|
(
x
>=
TABLE_MAX
))
)
return
fvec4
(
logf
(
x
[
0
]),
logf
(
x
[
1
]),
logf
(
x
[
2
]),
logf
(
x
[
3
]));
return
fvec4
(
logf
(
x
[
0
]),
logf
(
x
[
1
]),
logf
(
x
[
2
]),
logf
(
x
[
3
]));
fvec4
x1
=
(
x
-
TABLE_MIN
)
*
logDXInv
;
fvec4
x1
=
(
x
-
TABLE_MIN
)
*
logDXInv
;
ivec4
index
=
floor
(
x1
);
ivec4
index
=
floor
(
x1
);
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
float
table1
[
4
],
table2
[
4
];
fvec4
t1
(
&
logTable
[
index
[
0
]]);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
fvec4
t2
(
&
logTable
[
index
[
1
]]);
int
tableIndex
=
index
[
i
];
fvec4
t3
(
&
logTable
[
index
[
2
]]);
table1
[
i
]
=
logTable
[
tableIndex
];
fvec4
t4
(
&
logTable
[
index
[
3
]]);
table2
[
i
]
=
logTable
[
tableIndex
+
1
];
transpose
(
t1
,
t2
,
t3
,
t4
);
}
return
coeff1
*
t1
+
coeff2
*
t2
;
return
coeff1
*
fvec4
(
table1
)
+
coeff2
*
fvec4
(
table2
);
}
}
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
d600e589
...
@@ -163,10 +163,10 @@ void CpuNonbondedForce::tabulateEwaldScaleFactor() {
...
@@ -163,10 +163,10 @@ void CpuNonbondedForce::tabulateEwaldScaleFactor() {
if
(
tableIsValid
)
if
(
tableIsValid
)
return
;
return
;
tableIsValid
=
true
;
tableIsValid
=
true
;
ewaldDX
=
cutoffDistance
/
(
NUM_TABLE_POINTS
-
2
)
;
ewaldDX
=
cutoffDistance
/
NUM_TABLE_POINTS
;
ewaldDXInv
=
1.0
f
/
ewaldDX
;
ewaldDXInv
=
1.0
f
/
ewaldDX
;
ewaldScaleTable
.
resize
(
NUM_TABLE_POINTS
+
1
);
ewaldScaleTable
.
resize
(
NUM_TABLE_POINTS
+
4
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
1
;
i
++
)
{
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
4
;
i
++
)
{
double
r
=
i
*
ewaldDX
;
double
r
=
i
*
ewaldDX
;
double
alphaR
=
alphaEwald
*
r
;
double
alphaR
=
alphaEwald
*
r
;
ewaldScaleTable
[
i
]
=
erfc
(
alphaR
)
+
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
);
ewaldScaleTable
[
i
]
=
erfc
(
alphaR
)
+
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
);
...
@@ -510,16 +510,17 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
...
@@ -510,16 +510,17 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
fvec4
sig2
=
inverseR
*
sig
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
eps
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
;
fvec4
eps
Sig6
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
*
sig6
;
fvec4
dEdR
=
switchValue
*
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
fvec4
dEdR
=
switchValue
*
eps
Sig6
*
(
12.0
f
*
sig6
-
6.0
f
);
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
fvec4
energy
=
eps
*
(
sig6
-
1.0
f
)
*
sig6
;
fvec4
energy
;
if
(
useSwitch
)
{
if
(
useSwitch
)
{
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
energy
*=
switchValue
;
}
}
...
@@ -527,6 +528,8 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
...
@@ -527,6 +528,8 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
// Accumulate energies.
// Accumulate energies.
if
(
totalEnergy
)
{
if
(
totalEnergy
)
{
if
(
!
useSwitch
)
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
cutoff
)
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
else
...
@@ -623,8 +626,9 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
...
@@ -623,8 +626,9 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
fvec4
epsSig6
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
*
sig6
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
*
sig6
;
dEdR
+=
switchValue
*
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
dEdR
+=
switchValue
*
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
dEdR
*=
inverseR
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
fvec4
energy
=
epsSig6
*
(
sig6
-
1.0
f
)
;
fvec4
energy
;
if
(
useSwitch
)
{
if
(
useSwitch
)
{
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
energy
*=
switchValue
;
}
}
...
@@ -632,6 +636,8 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
...
@@ -632,6 +636,8 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
// Accumulate energies.
// Accumulate energies.
if
(
totalEnergy
)
{
if
(
totalEnergy
)
{
if
(
!
useSwitch
)
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
if
(
include
[
j
])
if
(
include
[
j
])
...
@@ -695,16 +701,13 @@ fvec4 CpuNonbondedForce::ewaldScaleFunction(fvec4 x) {
...
@@ -695,16 +701,13 @@ fvec4 CpuNonbondedForce::ewaldScaleFunction(fvec4 x) {
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
fvec4
x1
=
x
*
ewaldDXInv
;
fvec4
x1
=
x
*
ewaldDXInv
;
ivec4
index
=
floor
(
x1
);
ivec4
index
=
min
(
floor
(
x1
)
,
NUM_TABLE_POINTS
)
;
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
float
table1
[
4
],
table2
[
4
];
fvec4
t1
(
&
ewaldScaleTable
[
index
[
0
]]);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
fvec4
t2
(
&
ewaldScaleTable
[
index
[
1
]]);
int
tableIndex
=
index
[
i
];
fvec4
t3
(
&
ewaldScaleTable
[
index
[
2
]]);
if
(
tableIndex
<
NUM_TABLE_POINTS
)
{
fvec4
t4
(
&
ewaldScaleTable
[
index
[
3
]]);
table1
[
i
]
=
ewaldScaleTable
[
tableIndex
];
transpose
(
t1
,
t2
,
t3
,
t4
);
table2
[
i
]
=
ewaldScaleTable
[
tableIndex
+
1
];
return
coeff1
*
t1
+
coeff2
*
t2
;
}
}
return
coeff1
*
fvec4
(
table1
)
+
coeff2
*
fvec4
(
table2
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment