Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
6caf992a
Commit
6caf992a
authored
Dec 06, 2013
by
peastman
Browse files
Merge pull request #229 from peastman/master
Further optimizations to CPU platform
parents
d600e589
99f3a8a2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
77 additions
and
69 deletions
+77
-69
openmmapi/include/openmm/internal/vectorize.h
openmmapi/include/openmm/internal/vectorize.h
+6
-0
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+71
-69
No files found.
openmmapi/include/openmm/internal/vectorize.h
View file @
6caf992a
...
@@ -273,5 +273,11 @@ static inline fvec4 operator/(float v1, fvec4 v2) {
...
@@ -273,5 +273,11 @@ static inline fvec4 operator/(float v1, fvec4 v2) {
return
fvec4
(
v1
)
/
v2
;
return
fvec4
(
v1
)
/
v2
;
}
}
// Operations for blending fvec4s based on an ivec4.
static
inline
fvec4
blend
(
fvec4
v1
,
fvec4
v2
,
ivec4
mask
)
{
return
fvec4
(
_mm_blendv_ps
(
v1
.
val
,
v2
.
val
,
_mm_castsi128_ps
(
mask
.
val
)));
}
#endif
/*OPENMM_VECTORIZE_H_*/
#endif
/*OPENMM_VECTORIZE_H_*/
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
6caf992a
...
@@ -477,7 +477,6 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
...
@@ -477,7 +477,6 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
bool
include
[
4
];
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
// Load the next neighbor.
...
@@ -486,75 +485,77 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
...
@@ -486,75 +485,77 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
bool
any
=
false
;
fvec4
dx
,
dy
,
dz
,
r2
;
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
ivec4
include
;
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
(
!
cutoff
||
r2
[
j
]
<
cutoffDistance
*
cutoffDistance
));
char
excl
=
exclusions
[
i
];
any
|=
include
[
j
];
if
(
excl
==
0
)
}
include
=
-
1
;
if
(
!
any
)
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
continue
;
// No interactions to compute.
// Compute the interactions.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
switchValue
(
1.0
f
),
switchDeriv
(
0.0
f
);
fvec4
energy
,
dEdR
;
if
(
useSwitch
)
{
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
if
(
atomEpsilon
!=
0.0
f
)
{
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
}
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
*
sig6
;
fvec4
dEdR
=
switchValue
*
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
fvec4
energy
;
if
(
useSwitch
)
{
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
}
// Accumulate energies.
// Accumulate energies.
if
(
totalEnergy
)
{
if
(
totalEnergy
)
{
if
(
!
useSwitch
)
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
cutoff
)
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
else
energy
+=
chargeProd
*
inverseR
;
energy
+=
chargeProd
*
inverseR
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
energy
=
blend
(
0.0
f
,
energy
,
include
);
if
(
include
[
j
])
*
totalEnergy
+=
dot4
(
energy
,
1.0
f
);
*
totalEnergy
+=
energy
[
j
];
}
}
// Accumulate forces.
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atom
);
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
blockAtomForce
[
j
]
+=
result
[
j
];
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
atomForce
-=
result
[
j
];
}
}
}
atomForce
.
store
(
forces
+
4
*
atom
);
atomForce
.
store
(
forces
+
4
*
atom
);
}
}
// Record the forces on the block atoms.
// Record the forces on the block atoms.
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
}
...
@@ -588,7 +589,6 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
...
@@ -588,7 +589,6 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
bool
include
[
4
];
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
// Load the next neighbor.
...
@@ -597,63 +597,65 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
...
@@ -597,63 +597,65 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
bool
any
=
false
;
fvec4
dx
,
dy
,
dz
,
r2
;
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
ivec4
include
;
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
r2
[
j
]
<
cutoffDistance
*
cutoffDistance
);
char
excl
=
exclusions
[
i
];
any
|=
include
[
j
];
if
(
excl
==
0
)
}
include
=
-
1
;
if
(
!
any
)
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
continue
;
// No interactions to compute.
// Compute the interactions.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
switchValue
(
1.0
f
),
switchDeriv
(
0.0
f
);
fvec4
energy
,
dEdR
;
if
(
useSwitch
)
{
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
if
(
atomEpsilon
!=
0.0
f
)
{
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
fvec4
sig2
=
inverseR
*
sig
;
}
sig2
*=
sig2
;
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
dEdR
=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
*
sig6
;
dEdR
+=
switchValue
*
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
dEdR
*=
inverseR
*
inverseR
;
fvec4
energy
;
if
(
useSwitch
)
{
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
if
(
useSwitch
)
{
energy
*=
switchValue
;
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
// Accumulate energies.
if
(
totalEnergy
)
{
if
(
totalEnergy
)
{
if
(
!
useSwitch
)
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
energy
=
blend
(
0.0
f
,
energy
,
include
);
if
(
include
[
j
])
*
totalEnergy
+=
dot4
(
energy
,
1.0
f
);
*
totalEnergy
+=
energy
[
j
];
}
}
// Accumulate forces.
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atom
);
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
blockAtomForce
[
j
]
+=
result
[
j
];
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
atomForce
-=
result
[
j
];
}
}
}
atomForce
.
store
(
forces
+
4
*
atom
);
atomForce
.
store
(
forces
+
4
*
atom
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment