Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
be0d5cbe
"platforms/hip/tests/TestHipBrownianIntegrator.cpp" did not exist on "b06fc4a7a47478beacbaf5cc108f755653bc7abb"
Commit
be0d5cbe
authored
Aug 14, 2014
by
peastman
Browse files
Began vectorizing CustomManyParticleForce
parent
0e2ffb4b
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
57 additions
and
51 deletions
+57
-51
openmmapi/include/openmm/internal/vectorize_neon.h
openmmapi/include/openmm/internal/vectorize_neon.h
+6
-0
openmmapi/include/openmm/internal/vectorize_pnacl.h
openmmapi/include/openmm/internal/vectorize_pnacl.h
+6
-0
openmmapi/include/openmm/internal/vectorize_sse.h
openmmapi/include/openmm/internal/vectorize_sse.h
+6
-0
platforms/cpu/src/CpuCustomManyParticleForce.cpp
platforms/cpu/src/CpuCustomManyParticleForce.cpp
+39
-51
No files found.
openmmapi/include/openmm/internal/vectorize_neon.h
View file @
be0d5cbe
...
...
@@ -268,6 +268,12 @@ static inline float dot4(const fvec4& v1, const fvec4& v2) {
return
vgetq_lane_f32
(
result
,
0
)
+
vgetq_lane_f32
(
result
,
1
)
+
vgetq_lane_f32
(
result
,
2
)
+
vgetq_lane_f32
(
result
,
3
);
}
static
inline
fvec4
cross
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
[
1
]
*
v2
[
2
]
-
v1
[
2
]
*
v2
[
1
],
v1
[
2
]
*
v2
[
0
]
-
v1
[
0
]
*
v2
[
2
],
v1
[
0
]
*
v2
[
1
]
-
v1
[
1
]
*
v2
[
0
],
0
);
}
static
inline
void
transpose
(
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
,
fvec4
&
v4
)
{
float32x4x2_t
t1
=
vuzpq_f32
(
v1
,
v3
);
float32x4x2_t
t2
=
vuzpq_f32
(
v2
,
v4
);
...
...
openmmapi/include/openmm/internal/vectorize_pnacl.h
View file @
be0d5cbe
...
...
@@ -255,6 +255,12 @@ static inline float dot4(const fvec4& v1, const fvec4& v2) {
return
r
[
0
]
+
r
[
1
]
+
r
[
2
]
+
r
[
3
];
}
static
inline
fvec4
cross
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
__m128
temp
=
v2
.
val
*
__builtin_shufflevector
(
v1
.
val
,
v1
.
val
,
2
,
0
,
1
,
3
)
-
v1
.
val
*
__builtin_shufflevector
(
v2
.
val
,
v2
.
val
,
2
,
0
,
1
,
3
);
return
__builtin_shufflevector
(
temp
,
temp
,
2
,
0
,
1
,
3
);
}
static
inline
void
transpose
(
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
,
fvec4
&
v4
)
{
__m128
a1
=
__builtin_shufflevector
(
v1
.
val
,
v2
.
val
,
0
,
4
,
2
,
6
);
__m128
a2
=
__builtin_shufflevector
(
v1
.
val
,
v2
.
val
,
1
,
5
,
3
,
7
);
...
...
openmmapi/include/openmm/internal/vectorize_sse.h
View file @
be0d5cbe
...
...
@@ -249,6 +249,12 @@ static inline float dot4(const fvec4& v1, const fvec4& v2) {
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0xF1
));
}
static
inline
fvec4
cross
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
fvec4
temp
=
_mm_mul_ps
(
v2
,
_mm_shuffle_ps
(
v1
,
v1
,
_MM_SHUFFLE
(
3
,
0
,
2
,
1
)))
-
_mm_mul_ps
(
v1
,
_mm_shuffle_ps
(
v2
,
v2
,
_MM_SHUFFLE
(
3
,
0
,
2
,
1
)));
return
_mm_shuffle_ps
(
temp
,
temp
,
_MM_SHUFFLE
(
3
,
0
,
2
,
1
));
}
static
inline
void
transpose
(
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
,
fvec4
&
v4
)
{
_MM_TRANSPOSE4_PS
(
v1
,
v2
,
v3
,
v4
);
}
...
...
platforms/cpu/src/CpuCustomManyParticleForce.cpp
View file @
be0d5cbe
...
...
@@ -207,7 +207,7 @@ void CpuCustomManyParticleForce::setPeriodic(RealVec& boxSize) {
void
CpuCustomManyParticleForce
::
loopOverInteractions
(
vector
<
int
>&
availableParticles
,
vector
<
int
>&
particleSet
,
int
loopIndex
,
int
startIndex
,
RealOpenMM
**
particleParameters
,
float
*
forces
,
ThreadData
&
data
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
int
numParticles
=
availableParticles
.
size
();
//
double cutoff2 = cutoffDistance*cutoffDistance;
double
cutoff2
=
cutoffDistance
*
cutoffDistance
;
for
(
int
i
=
startIndex
;
i
<
numParticles
;
i
++
)
{
int
particle
=
availableParticles
[
i
];
...
...
@@ -215,18 +215,13 @@ void CpuCustomManyParticleForce::loopOverInteractions(vector<int>& availablePart
bool
include
=
true
;
if
(
useCutoff
)
{
// fvec4 deltaR;
// fvec4 pos1(posq+4*particle);
// float r2;
// for (int j = 0; j < loopIndex && include; j++) {
// fvec4 pos2(posq+4*particleSet[j]);
// getDeltaR(pos1, pos2, deltaR, r2, boxSize, invBoxSize);
// include &= (r2 < cutoff2);
// }
RealOpenMM
delta
[
ReferenceForce
::
LastDeltaRIndex
];
fvec4
deltaR
;
fvec4
pos1
(
posq
+
4
*
particle
);
float
r2
;
for
(
int
j
=
0
;
j
<
loopIndex
&&
include
;
j
++
)
{
computeDelta
(
particle
,
particleSet
[
j
],
delta
,
atomCoordinates
);
include
&=
(
delta
[
ReferenceForce
::
RIndex
]
<
cutoffDistance
);
fvec4
pos2
(
posq
+
4
*
particleSet
[
j
]);
getDeltaR
(
pos1
,
pos2
,
deltaR
,
r2
,
boxSize
,
invBoxSize
);
include
&=
(
r2
<
cutoff2
);
}
}
for
(
int
j
=
0
;
j
<
loopIndex
&&
include
;
j
++
)
...
...
@@ -299,10 +294,15 @@ void CpuCustomManyParticleForce::calculateOneIxn(vector<int>& particleSet, RealO
if
(
includeForces
)
{
// Apply forces based on individual particle coordinates.
vector
<
RealVec
>
f
(
numParticlesPerSet
);
AlignedArray
<
fvec4
>
f
(
numParticlesPerSet
);
for
(
int
i
=
0
;
i
<
numParticlesPerSet
;
i
++
)
f
[
i
]
=
fvec4
(
0.0
f
);
for
(
int
i
=
0
;
i
<
(
int
)
data
.
particleTerms
.
size
();
i
++
)
{
const
ParticleTermInfo
&
term
=
data
.
particleTerms
[
i
];
f
[
term
.
atom
][
term
.
component
]
-=
term
.
forceExpression
.
evaluate
();
float
temp
[
4
];
f
[
term
.
atom
].
store
(
temp
);
temp
[
term
.
component
]
-=
term
.
forceExpression
.
evaluate
();
f
[
term
.
atom
]
=
fvec4
(
temp
);
}
// Apply forces based on distances.
...
...
@@ -310,11 +310,9 @@ void CpuCustomManyParticleForce::calculateOneIxn(vector<int>& particleSet, RealO
for
(
int
i
=
0
;
i
<
(
int
)
data
.
distanceTerms
.
size
();
i
++
)
{
const
DistanceTermInfo
&
term
=
data
.
distanceTerms
[
i
];
RealOpenMM
dEdR
=
(
RealOpenMM
)
(
term
.
forceExpression
.
evaluate
()
*
term
.
deltaSign
/
(
delta
[
term
.
delta
][
ReferenceForce
::
RIndex
]));
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
RealOpenMM
force
=
-
dEdR
*
delta
[
term
.
delta
][
i
];
f
[
term
.
p1
][
i
]
-=
force
;
f
[
term
.
p2
][
i
]
+=
force
;
}
fvec4
force
=
-
dEdR
*
fvec4
((
float
)
delta
[
term
.
delta
][
0
],
(
float
)
delta
[
term
.
delta
][
1
],
(
float
)
delta
[
term
.
delta
][
2
],
0.0
f
);
f
[
term
.
p1
]
-=
force
;
f
[
term
.
p2
]
+=
force
;
}
// Apply forces based on angles.
...
...
@@ -322,26 +320,22 @@ void CpuCustomManyParticleForce::calculateOneIxn(vector<int>& particleSet, RealO
for
(
int
i
=
0
;
i
<
(
int
)
data
.
angleTerms
.
size
();
i
++
)
{
const
AngleTermInfo
&
term
=
data
.
angleTerms
[
i
];
RealOpenMM
dEdTheta
=
(
RealOpenMM
)
term
.
forceExpression
.
evaluate
();
RealOpenMM
thetaCross
[
ReferenceForce
::
LastDeltaRIndex
];
SimTKOpenMMUtilities
::
crossProductVector3
(
delta
[
term
.
delta1
],
delta
[
term
.
delta2
],
thetaCross
);
RealOpenMM
lengthThetaCross
=
SQRT
(
DOT3
(
thetaCross
,
thetaCross
));
if
(
lengthThetaCross
<
1.0e-06
)
lengthThetaCross
=
(
RealOpenMM
)
1.0e-06
;
fvec4
delta1
((
float
)
delta
[
term
.
delta1
][
0
],
(
float
)
delta
[
term
.
delta1
][
1
],
(
float
)
delta
[
term
.
delta1
][
2
],
0.0
f
);
fvec4
delta2
((
float
)
delta
[
term
.
delta2
][
0
],
(
float
)
delta
[
term
.
delta2
][
1
],
(
float
)
delta
[
term
.
delta2
][
2
],
0.0
f
);
fvec4
thetaCross
=
cross
(
delta1
,
delta2
);
float
lengthThetaCross
=
sqrtf
(
dot3
(
thetaCross
,
thetaCross
));
if
(
lengthThetaCross
<
1.0e-6
f
)
lengthThetaCross
=
1.0e-6
f
;
RealOpenMM
termA
=
dEdTheta
*
term
.
delta2Sign
/
(
delta
[
term
.
delta1
][
ReferenceForce
::
R2Index
]
*
lengthThetaCross
);
RealOpenMM
termC
=
-
dEdTheta
*
term
.
delta1Sign
/
(
delta
[
term
.
delta2
][
ReferenceForce
::
R2Index
]
*
lengthThetaCross
);
RealOpenMM
deltaCrossP
[
3
][
3
];
SimTKOpenMMUtilities
::
crossProductVector3
(
delta
[
term
.
delta1
],
thetaCross
,
deltaCrossP
[
0
]);
SimTKOpenMMUtilities
::
crossProductVector3
(
delta
[
term
.
delta2
],
thetaCross
,
deltaCrossP
[
2
]);
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
deltaCrossP
[
0
][
i
]
*=
termA
;
deltaCrossP
[
2
][
i
]
*=
termC
;
deltaCrossP
[
1
][
i
]
=
-
(
deltaCrossP
[
0
][
i
]
+
deltaCrossP
[
2
][
i
]);
}
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
f
[
term
.
p1
][
i
]
+=
deltaCrossP
[
0
][
i
];
f
[
term
.
p2
][
i
]
+=
deltaCrossP
[
1
][
i
];
f
[
term
.
p3
][
i
]
+=
deltaCrossP
[
2
][
i
];
}
fvec4
deltaCross1
=
cross
(
delta1
,
thetaCross
);
fvec4
deltaCross2
=
cross
(
delta2
,
thetaCross
);
fvec4
force1
=
termA
*
deltaCross1
;
fvec4
force3
=
termC
*
deltaCross2
;
fvec4
force2
=
-
(
force1
+
force3
);
f
[
term
.
p1
]
+=
force1
;
f
[
term
.
p2
]
+=
force2
;
f
[
term
.
p3
]
+=
force3
;
}
// Apply forces based on dihedrals.
...
...
@@ -360,26 +354,20 @@ void CpuCustomManyParticleForce::calculateOneIxn(vector<int>& particleSet, RealO
forceFactors
[
1
]
/=
delta
[
term
.
delta2
][
ReferenceForce
::
R2Index
];
forceFactors
[
2
]
=
DOT3
(
delta
[
term
.
delta3
],
delta
[
term
.
delta2
]);
forceFactors
[
2
]
/=
delta
[
term
.
delta2
][
ReferenceForce
::
R2Index
];
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
internalF
[
0
][
i
]
=
forceFactors
[
0
]
*
term
.
cross1
[
i
];
internalF
[
3
][
i
]
=
forceFactors
[
3
]
*
term
.
cross2
[
i
];
RealOpenMM
s
=
forceFactors
[
1
]
*
internalF
[
0
][
i
]
-
forceFactors
[
2
]
*
internalF
[
3
][
i
];
internalF
[
1
][
i
]
=
internalF
[
0
][
i
]
-
s
;
internalF
[
2
][
i
]
=
internalF
[
3
][
i
]
+
s
;
}
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
f
[
term
.
p1
][
i
]
+=
internalF
[
0
][
i
];
f
[
term
.
p2
][
i
]
-=
internalF
[
1
][
i
];
f
[
term
.
p3
][
i
]
-=
internalF
[
2
][
i
];
f
[
term
.
p4
][
i
]
+=
internalF
[
3
][
i
];
}
fvec4
force1
=
forceFactors
[
0
]
*
fvec4
((
float
)
term
.
cross1
[
0
],
(
float
)
term
.
cross1
[
1
],
(
float
)
term
.
cross1
[
2
],
0.0
f
);
fvec4
force4
=
forceFactors
[
3
]
*
fvec4
((
float
)
term
.
cross2
[
0
],
(
float
)
term
.
cross2
[
1
],
(
float
)
term
.
cross2
[
2
],
0.0
f
);
fvec4
s
=
forceFactors
[
1
]
*
force1
-
forceFactors
[
2
]
*
force4
;
f
[
term
.
p1
]
+=
force1
;
f
[
term
.
p2
]
-=
force1
-
s
;
f
[
term
.
p3
]
-=
force4
+
s
;
f
[
term
.
p4
]
+=
force4
;
}
// Store the forces.
for
(
int
i
=
0
;
i
<
numParticlesPerSet
;
i
++
)
{
int
index
=
permutedParticles
[
i
];
(
fvec4
(
forces
+
4
*
index
)
+
f
vec4
((
float
)
f
[
i
][
0
],
(
float
)
f
[
i
][
1
],
(
float
)
f
[
i
][
2
],
0.0
f
)
).
store
(
forces
+
4
*
index
);
(
fvec4
(
forces
+
4
*
index
)
+
f
[
i
]
).
store
(
forces
+
4
*
index
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment