Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
3a415486
Commit
3a415486
authored
Apr 10, 2015
by
peastman
Browse files
Optimizations to CpuNonbondedForce
parent
79b85ad7
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
44 additions
and
12 deletions
+44
-12
openmmapi/include/openmm/internal/vectorize8.h
openmmapi/include/openmm/internal/vectorize8.h
+12
-0
openmmapi/include/openmm/internal/vectorize_neon.h
openmmapi/include/openmm/internal/vectorize_neon.h
+6
-2
openmmapi/include/openmm/internal/vectorize_pnacl.h
openmmapi/include/openmm/internal/vectorize_pnacl.h
+6
-2
openmmapi/include/openmm/internal/vectorize_sse.h
openmmapi/include/openmm/internal/vectorize_sse.h
+12
-0
platforms/cpu/src/CpuNonbondedForceVec4.cpp
platforms/cpu/src/CpuNonbondedForceVec4.cpp
+4
-4
platforms/cpu/src/CpuNonbondedForceVec8.cpp
platforms/cpu/src/CpuNonbondedForceVec8.cpp
+4
-4
No files found.
openmmapi/include/openmm/internal/vectorize8.h
View file @
3a415486
...
...
@@ -191,6 +191,18 @@ static inline fvec8 sqrt(const fvec8& v) {
return
fvec8
(
_mm256_sqrt_ps
(
v
.
val
));
}
static
inline
fvec8
rsqrt
(
const
fvec8
&
v
)
{
// Initial estimate of rsqrt().
fvec8
y
(
_mm256_rsqrt_ps
(
v
.
val
));
// Perform an iteration of Newton refinement.
fvec8
x2
=
v
*
0.5
f
;
y
*=
fvec8
(
1.5
f
)
-
x2
*
y
*
y
;
return
y
;
}
static
inline
float
dot8
(
const
fvec8
&
v1
,
const
fvec8
&
v2
)
{
fvec8
result
=
_mm256_dp_ps
(
v1
,
v2
,
0xF1
);
return
_mm_cvtss_f32
(
result
.
lowerVec
())
+
_mm_cvtss_f32
(
result
.
upperVec
());
...
...
openmmapi/include/openmm/internal/vectorize_neon.h
View file @
3a415486
...
...
@@ -251,11 +251,15 @@ static inline fvec4 abs(const fvec4& v) {
return
vabsq_f32
(
v
);
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
static
inline
fvec4
r
sqrt
(
const
fvec4
&
v
)
{
float32x4_t
recipSqrt
=
vrsqrteq_f32
(
v
);
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
return
vmulq_f32
(
v
,
recipSqrt
);
return
recipSqrt
;
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
return
rsqrt
(
v
)
*
v
;
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
...
...
openmmapi/include/openmm/internal/vectorize_pnacl.h
View file @
3a415486
...
...
@@ -330,7 +330,7 @@ static inline fvec4 ceil(const fvec4& v) {
return
truncated
+
blend
(
0.0
f
,
1.0
f
,
truncated
<
v
);
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
static
inline
fvec4
r
sqrt
(
const
fvec4
&
v
)
{
// Initial estimate of rsqrt().
ivec4
i
=
(
__m128i
)
v
;
...
...
@@ -343,7 +343,11 @@ static inline fvec4 sqrt(const fvec4& v) {
y
*=
1.5
f
-
x2
*
y
*
y
;
y
*=
1.5
f
-
x2
*
y
*
y
;
y
*=
1.5
f
-
x2
*
y
*
y
;
return
y
*
v
;
return
y
;
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
return
rsqrt
(
v
)
*
v
;
}
#endif
/*OPENMM_VECTORIZE_PNACL_H_*/
...
...
openmmapi/include/openmm/internal/vectorize_sse.h
View file @
3a415486
...
...
@@ -241,6 +241,18 @@ static inline fvec4 sqrt(const fvec4& v) {
return
fvec4
(
_mm_sqrt_ps
(
v
.
val
));
}
static
inline
fvec4
rsqrt
(
const
fvec4
&
v
)
{
// Initial estimate of rsqrt().
fvec4
y
(
_mm_rsqrt_ps
(
v
.
val
));
// Perform an iteration of Newton refinement.
fvec4
x2
=
v
*
0.5
f
;
y
*=
fvec4
(
1.5
f
)
-
x2
*
y
*
y
;
return
y
;
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0x71
));
}
...
...
platforms/cpu/src/CpuNonbondedForceVec4.cpp
View file @
3a415486
...
...
@@ -95,8 +95,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
inverseR
=
rsqrt
(
r2
);
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
...
...
@@ -108,6 +107,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
r
=
r2
*
inverseR
;
fvec4
t
=
blend
(
0.0
f
,
(
r
-
switchingDistance
)
*
invSwitchingInterval
,
r
>
switchingDistance
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
...
...
@@ -212,8 +212,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
inverseR
=
r
sqrt
(
r2
);
fvec4
r
=
r2
*
inverseR
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
...
...
platforms/cpu/src/CpuNonbondedForceVec8.cpp
View file @
3a415486
...
...
@@ -126,8 +126,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
inverseR
=
rsqrt
(
r2
);
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
...
...
@@ -139,6 +138,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec8
r
=
r2
*
inverseR
;
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
...
...
@@ -242,8 +242,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
inverseR
=
r
sqrt
(
r2
);
fvec8
r
=
r2
*
inverseR
;
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment