Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
99f3a8a2
Commit
99f3a8a2
authored
Dec 06, 2013
by
peastman
Browse files
Further optimizations to CPU platform
parent
2882737d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
77 additions
and
69 deletions
+77
-69
openmmapi/include/openmm/internal/vectorize.h
openmmapi/include/openmm/internal/vectorize.h
+6
-0
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+71
-69
No files found.
openmmapi/include/openmm/internal/vectorize.h
View file @
99f3a8a2
...
@@ -273,5 +273,11 @@ static inline fvec4 operator/(float v1, fvec4 v2) {
...
@@ -273,5 +273,11 @@ static inline fvec4 operator/(float v1, fvec4 v2) {
return
fvec4
(
v1
)
/
v2
;
return
fvec4
(
v1
)
/
v2
;
}
}
// Operations for blending fvec4s based on an ivec4.
static
inline
fvec4
blend
(
fvec4
v1
,
fvec4
v2
,
ivec4
mask
)
{
return
fvec4
(
_mm_blendv_ps
(
v1
.
val
,
v2
.
val
,
_mm_castsi128_ps
(
mask
.
val
)));
}
#endif
/*OPENMM_VECTORIZE_H_*/
#endif
/*OPENMM_VECTORIZE_H_*/
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
99f3a8a2
...
@@ -477,7 +477,6 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
...
@@ -477,7 +477,6 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
bool
include
[
4
];
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
// Load the next neighbor.
...
@@ -486,75 +485,77 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
...
@@ -486,75 +485,77 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
bool
any
=
false
;
fvec4
dx
,
dy
,
dz
,
r2
;
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
ivec4
include
;
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
(
!
cutoff
||
r2
[
j
]
<
cutoffDistance
*
cutoffDistance
));
char
excl
=
exclusions
[
i
];
any
|=
include
[
j
];
if
(
excl
==
0
)
}
include
=
-
1
;
if
(
!
any
)
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
continue
;
// No interactions to compute.
// Compute the interactions.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
switchValue
(
1.0
f
),
switchDeriv
(
0.0
f
);
fvec4
energy
,
dEdR
;
if
(
useSwitch
)
{
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
if
(
atomEpsilon
!=
0.0
f
)
{
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
}
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
*
sig6
;
fvec4
dEdR
=
switchValue
*
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
fvec4
energy
;
if
(
useSwitch
)
{
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
}
// Accumulate energies.
// Accumulate energies.
if
(
totalEnergy
)
{
if
(
totalEnergy
)
{
if
(
!
useSwitch
)
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
cutoff
)
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
else
energy
+=
chargeProd
*
inverseR
;
energy
+=
chargeProd
*
inverseR
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
energy
=
blend
(
0.0
f
,
energy
,
include
);
if
(
include
[
j
])
*
totalEnergy
+=
dot4
(
energy
,
1.0
f
);
*
totalEnergy
+=
energy
[
j
];
}
}
// Accumulate forces.
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atom
);
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
blockAtomForce
[
j
]
+=
result
[
j
];
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
atomForce
-=
result
[
j
];
}
}
}
atomForce
.
store
(
forces
+
4
*
atom
);
atomForce
.
store
(
forces
+
4
*
atom
);
}
}
// Record the forces on the block atoms.
// Record the forces on the block atoms.
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
}
...
@@ -588,7 +589,6 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
...
@@ -588,7 +589,6 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
bool
include
[
4
];
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
// Load the next neighbor.
...
@@ -597,63 +597,65 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
...
@@ -597,63 +597,65 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
bool
any
=
false
;
fvec4
dx
,
dy
,
dz
,
r2
;
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
ivec4
include
;
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
r2
[
j
]
<
cutoffDistance
*
cutoffDistance
);
char
excl
=
exclusions
[
i
];
any
|=
include
[
j
];
if
(
excl
==
0
)
}
include
=
-
1
;
if
(
!
any
)
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
continue
;
// No interactions to compute.
// Compute the interactions.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
switchValue
(
1.0
f
),
switchDeriv
(
0.0
f
);
fvec4
energy
,
dEdR
;
if
(
useSwitch
)
{
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
if
(
atomEpsilon
!=
0.0
f
)
{
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
fvec4
sig2
=
inverseR
*
sig
;
}
sig2
*=
sig2
;
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
dEdR
=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
*
sig6
;
dEdR
+=
switchValue
*
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
dEdR
*=
inverseR
*
inverseR
;
fvec4
energy
;
if
(
useSwitch
)
{
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
if
(
useSwitch
)
{
energy
*=
switchValue
;
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
// Accumulate energies.
if
(
totalEnergy
)
{
if
(
totalEnergy
)
{
if
(
!
useSwitch
)
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
energy
=
blend
(
0.0
f
,
energy
,
include
);
if
(
include
[
j
])
*
totalEnergy
+=
dot4
(
energy
,
1.0
f
);
*
totalEnergy
+=
energy
[
j
];
}
}
// Accumulate forces.
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atom
);
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
blockAtomForce
[
j
]
+=
result
[
j
];
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
atomForce
-=
result
[
j
];
}
}
}
atomForce
.
store
(
forces
+
4
*
atom
);
atomForce
.
store
(
forces
+
4
*
atom
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment