Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
5cd23acb
Commit
5cd23acb
authored
Dec 18, 2013
by
peastman
Browse files
Improved vectorization of CpuNonbondedForce
parent
e22808f3
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
88 additions
and
106 deletions
+88
-106
openmmapi/include/openmm/internal/vectorize8.h
openmmapi/include/openmm/internal/vectorize8.h
+2
-2
platforms/cpu/include/CpuNonbondedForce.h
platforms/cpu/include/CpuNonbondedForce.h
+1
-1
platforms/cpu/include/CpuNonbondedForceVec8.h
platforms/cpu/include/CpuNonbondedForceVec8.h
+1
-1
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+39
-45
platforms/cpu/src/CpuNonbondedForceVec8.cpp
platforms/cpu/src/CpuNonbondedForceVec8.cpp
+45
-57
No files found.
openmmapi/include/openmm/internal/vectorize8.h
View file @
5cd23acb
...
@@ -192,7 +192,8 @@ static inline fvec8 sqrt(fvec8 v) {
...
@@ -192,7 +192,8 @@ static inline fvec8 sqrt(fvec8 v) {
}
}
static
inline
float
dot8
(
fvec8
v1
,
fvec8
v2
)
{
static
inline
float
dot8
(
fvec8
v1
,
fvec8
v2
)
{
return
dot4
(
v1
.
lowerVec
(),
v2
.
lowerVec
())
+
dot4
(
v1
.
upperVec
(),
v2
.
upperVec
());
fvec8
result
=
_mm256_dp_ps
(
v1
,
v2
,
0xF1
);
return
_mm_cvtss_f32
(
result
.
lowerVec
())
+
_mm_cvtss_f32
(
result
.
upperVec
());
}
}
static
inline
void
transpose
(
fvec4
in1
,
fvec4
in2
,
fvec4
in3
,
fvec4
in4
,
fvec4
in5
,
fvec4
in6
,
fvec4
in7
,
fvec4
in8
,
fvec8
&
out1
,
fvec8
&
out2
,
fvec8
&
out3
,
fvec8
&
out4
)
{
static
inline
void
transpose
(
fvec4
in1
,
fvec4
in2
,
fvec4
in3
,
fvec4
in4
,
fvec4
in5
,
fvec4
in6
,
fvec4
in7
,
fvec4
in8
,
fvec8
&
out1
,
fvec8
&
out2
,
fvec8
&
out3
,
fvec8
&
out4
)
{
...
@@ -223,7 +224,6 @@ static inline void transpose(fvec8 in1, fvec8 in2, fvec8 in3, fvec8 in4, fvec4&
...
@@ -223,7 +224,6 @@ static inline void transpose(fvec8 in1, fvec8 in2, fvec8 in3, fvec8 in4, fvec4&
// Functions that operate on ivec8s.
// Functions that operate on ivec8s.
static
inline
bool
any
(
ivec8
v
)
{
static
inline
bool
any
(
ivec8
v
)
{
return
!
_mm256_testz_si256
(
v
,
_mm256_set1_epi32
(
0xFFFFFFFF
));
return
!
_mm256_testz_si256
(
v
,
_mm256_set1_epi32
(
0xFFFFFFFF
));
}
}
...
...
platforms/cpu/include/CpuNonbondedForce.h
View file @
5cd23acb
...
@@ -228,7 +228,7 @@ private:
...
@@ -228,7 +228,7 @@ private:
* Compute the displacement and squared distance between a collection of points, optionally using
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
* periodic boundary conditions.
*/
*/
void
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
void
getDeltaR
(
const
f
loat
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
/**
* Compute a fast approximation to erfc(x).
* Compute a fast approximation to erfc(x).
...
...
platforms/cpu/include/CpuNonbondedForceVec8.h
View file @
5cd23acb
...
@@ -228,7 +228,7 @@ private:
...
@@ -228,7 +228,7 @@ private:
* Compute the displacement and squared distance between a collection of points, optionally using
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
* periodic boundary conditions.
*/
*/
void
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
void
getDeltaR
(
const
f
loat
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
/**
* Compute a fast approximation to erfc(x).
* Compute a fast approximation to erfc(x).
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
5cd23acb
...
@@ -451,11 +451,10 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
...
@@ -451,11 +451,10 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
int
blockAtom
[
4
];
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForce
[
4
]
;
fvec4
blockAtomForce
X
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
)
;
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
...
@@ -463,15 +462,8 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
...
@@ -463,15 +462,8 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
false
;
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
if
(
periodic
)
{
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
for
(
int
i
=
0
;
i
<
4
&&
!
needPeriodic
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
if
(
blockAtomPosq
[
i
][
j
]
-
cutoffDistance
<
0.0
||
blockAtomPosq
[
i
][
j
]
+
cutoffDistance
>
boxSize
[
j
])
{
needPeriodic
=
true
;
break
;
}
}
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
// Loop over neighbors for this block.
...
@@ -482,12 +474,11 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
...
@@ -482,12 +474,11 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
// Load the next neighbor.
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atom
Posq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
getDeltaR
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
ivec4
include
;
char
excl
=
exclusions
[
i
];
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
if
(
excl
==
0
)
...
@@ -533,32 +524,37 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
...
@@ -533,32 +524,37 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
// Accumulate energies.
// Accumulate energies.
fvec4
one
(
1.0
f
);
if
(
totalEnergy
)
{
if
(
totalEnergy
)
{
if
(
cutoff
)
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
else
energy
+=
chargeProd
*
inverseR
;
energy
+=
chargeProd
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
1.0
f
);
*
totalEnergy
+=
dot4
(
energy
,
one
);
}
}
// Accumulate forces.
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
fvec4
fx
=
dx
*
dEdR
;
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
fy
=
dy
*
dEdR
;
fvec4
atomForce
(
forces
+
4
*
atom
);
fvec4
fz
=
dz
*
dEdR
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
blockAtomForceX
+=
fx
;
blockAtomForce
[
j
]
+=
result
[
j
];
blockAtomForceY
+=
fy
;
atomForce
-=
result
[
j
];
blockAtomForceZ
+=
fz
;
}
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
.
store
(
forces
+
4
*
atom
);
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
}
// Record the forces on the block atoms.
// Record the forces on the block atoms.
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
}
void
CpuNonbondedForce
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForce
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
...
@@ -566,11 +562,10 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
...
@@ -566,11 +562,10 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
int
blockAtom
[
4
];
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForce
[
4
]
;
fvec4
blockAtomForce
X
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
)
;
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
...
@@ -578,13 +573,8 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
...
@@ -578,13 +573,8 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
false
;
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
for
(
int
i
=
0
;
i
<
4
&&
!
needPeriodic
;
i
++
)
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
for
(
int
j
=
0
;
j
<
3
;
j
++
)
if
(
blockAtomPosq
[
i
][
j
]
-
cutoffDistance
<
0.0
||
blockAtomPosq
[
i
][
j
]
+
cutoffDistance
>
boxSize
[
j
])
{
needPeriodic
=
true
;
break
;
}
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
// Loop over neighbors for this block.
...
@@ -595,12 +585,11 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
...
@@ -595,12 +585,11 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
// Load the next neighbor.
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atom
Posq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
getDeltaR
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
ivec4
include
;
char
excl
=
exclusions
[
i
];
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
if
(
excl
==
0
)
...
@@ -643,29 +632,34 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
...
@@ -643,29 +632,34 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
// Accumulate energies.
// Accumulate energies.
fvec4
one
(
1.0
f
);
if
(
totalEnergy
)
{
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
1.0
f
);
*
totalEnergy
+=
dot4
(
energy
,
one
);
}
}
// Accumulate forces.
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
fvec4
fx
=
dx
*
dEdR
;
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
fy
=
dy
*
dEdR
;
fvec4
atomForce
(
forces
+
4
*
atom
);
fvec4
fz
=
dz
*
dEdR
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
blockAtomForceX
+=
fx
;
blockAtomForce
[
j
]
+=
result
[
j
];
blockAtomForceY
+=
fy
;
atomForce
-=
result
[
j
];
blockAtomForceZ
+=
fz
;
}
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
.
store
(
forces
+
4
*
atom
);
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
}
// Record the forces on the block atoms.
// Record the forces on the block atoms.
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
}
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
...
@@ -677,7 +671,7 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d
...
@@ -677,7 +671,7 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d
r2
=
dot3
(
deltaR
,
deltaR
);
r2
=
dot3
(
deltaR
,
deltaR
);
}
}
void
CpuNonbondedForce
::
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
void
CpuNonbondedForce
::
getDeltaR
(
const
f
loat
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
dz
=
z
-
posI
[
2
];
...
...
platforms/cpu/src/CpuNonbondedForceVec8.cpp
View file @
5cd23acb
...
@@ -451,27 +451,18 @@ void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, dou
...
@@ -451,27 +451,18 @@ void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, dou
int
blockAtom
[
8
];
int
blockAtom
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec4
blockAtomForce
[
8
];
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
+
i
];
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
}
fvec8
blockAtomX
=
fvec8
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
],
blockAtomPosq
[
4
][
0
],
blockAtomPosq
[
5
][
0
],
blockAtomPosq
[
6
][
0
],
blockAtomPosq
[
7
][
0
]);
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
fvec8
blockAtomY
=
fvec8
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
],
blockAtomPosq
[
4
][
1
],
blockAtomPosq
[
5
][
1
],
blockAtomPosq
[
6
][
1
],
blockAtomPosq
[
7
][
1
]);
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomZ
=
fvec8
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
],
blockAtomPosq
[
4
][
2
],
blockAtomPosq
[
5
][
2
],
blockAtomPosq
[
6
][
2
],
blockAtomPosq
[
7
][
2
]);
fvec8
blockAtomCharge
=
fvec8
(
ONE_4PI_EPS0
)
*
fvec8
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
],
blockAtomPosq
[
4
][
3
],
blockAtomPosq
[
5
][
3
],
blockAtomPosq
[
6
][
3
],
blockAtomPosq
[
7
][
3
]);
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
false
;
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
if
(
periodic
)
{
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
for
(
int
i
=
0
;
i
<
8
&&
!
needPeriodic
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
if
(
blockAtomPosq
[
i
][
j
]
-
cutoffDistance
<
0.0
||
blockAtomPosq
[
i
][
j
]
+
cutoffDistance
>
boxSize
[
j
])
{
needPeriodic
=
true
;
break
;
}
}
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
// Loop over neighbors for this block.
...
@@ -482,12 +473,11 @@ void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, dou
...
@@ -482,12 +473,11 @@ void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, dou
// Load the next neighbor.
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
getDeltaR
(
&
posq
[
4
*
atom
]
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
ivec8
include
;
char
excl
=
exclusions
[
i
];
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
if
(
excl
==
0
)
...
@@ -533,33 +523,37 @@ void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, dou
...
@@ -533,33 +523,37 @@ void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, dou
// Accumulate energies.
// Accumulate energies.
fvec8
one
(
1.0
f
);
if
(
totalEnergy
)
{
if
(
totalEnergy
)
{
if
(
cutoff
)
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
else
energy
+=
chargeProd
*
inverseR
;
energy
+=
chargeProd
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot8
(
energy
,
1.0
f
);
*
totalEnergy
+=
dot8
(
energy
,
one
);
}
}
// Accumulate forces.
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec8
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
fvec8
fx
=
dx
*
dEdR
;
fvec4
rt
[
8
];
fvec8
fy
=
dy
*
dEdR
;
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
],
rt
[
0
],
rt
[
1
],
rt
[
2
],
rt
[
3
],
rt
[
4
],
rt
[
5
],
rt
[
6
],
rt
[
7
]);
fvec8
fz
=
dz
*
dEdR
;
fvec4
atomForce
(
forces
+
4
*
atom
);
blockAtomForceX
+=
fx
;
for
(
int
j
=
0
;
j
<
8
;
j
++
)
{
blockAtomForceY
+=
fy
;
blockAtomForce
[
j
]
+=
rt
[
j
];
blockAtomForceZ
+=
fz
;
atomForce
-=
rt
[
j
];
float
*
atomForce
=
forces
+
4
*
atom
;
}
atomForce
[
0
]
-=
dot8
(
fx
,
one
);
atomForce
.
store
(
forces
+
4
*
atom
);
atomForce
[
1
]
-=
dot8
(
fy
,
one
);
atomForce
[
2
]
-=
dot8
(
fz
,
one
);
}
}
// Record the forces on the block atoms.
// Record the forces on the block atoms.
fvec4
f
[
8
];
transpose
(
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
,
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
],
f
[
4
],
f
[
5
],
f
[
6
],
f
[
7
]);
for
(
int
j
=
0
;
j
<
8
;
j
++
)
for
(
int
j
=
0
;
j
<
8
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
}
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
...
@@ -567,27 +561,18 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces
...
@@ -567,27 +561,18 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces
int
blockAtom
[
8
];
int
blockAtom
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec4
blockAtomForce
[
8
];
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
+
i
];
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
}
fvec8
blockAtomX
=
fvec8
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
],
blockAtomPosq
[
4
][
0
],
blockAtomPosq
[
5
][
0
],
blockAtomPosq
[
6
][
0
],
blockAtomPosq
[
7
][
0
]);
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
fvec8
blockAtomY
=
fvec8
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
],
blockAtomPosq
[
4
][
1
],
blockAtomPosq
[
5
][
1
],
blockAtomPosq
[
6
][
1
],
blockAtomPosq
[
7
][
1
]);
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomZ
=
fvec8
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
],
blockAtomPosq
[
4
][
2
],
blockAtomPosq
[
5
][
2
],
blockAtomPosq
[
6
][
2
],
blockAtomPosq
[
7
][
2
]);
fvec8
blockAtomCharge
=
fvec8
(
ONE_4PI_EPS0
)
*
fvec8
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
],
blockAtomPosq
[
4
][
3
],
blockAtomPosq
[
5
][
3
],
blockAtomPosq
[
6
][
3
],
blockAtomPosq
[
7
][
3
]);
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
false
;
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
if
(
periodic
)
{
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
for
(
int
i
=
0
;
i
<
8
&&
!
needPeriodic
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
if
(
blockAtomPosq
[
i
][
j
]
-
cutoffDistance
<
0.0
||
blockAtomPosq
[
i
][
j
]
+
cutoffDistance
>
boxSize
[
j
])
{
needPeriodic
=
true
;
break
;
}
}
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
// Loop over neighbors for this block.
...
@@ -598,12 +583,11 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces
...
@@ -598,12 +583,11 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces
// Load the next neighbor.
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
getDeltaR
(
&
posq
[
4
*
atom
]
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
ivec8
include
;
char
excl
=
exclusions
[
i
];
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
if
(
excl
==
0
)
...
@@ -646,30 +630,34 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces
...
@@ -646,30 +630,34 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces
// Accumulate energies.
// Accumulate energies.
fvec8
one
(
1.0
f
);
if
(
totalEnergy
)
{
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot8
(
energy
,
1.0
f
);
*
totalEnergy
+=
dot8
(
energy
,
one
);
}
}
// Accumulate forces.
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec8
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
fvec8
fx
=
dx
*
dEdR
;
fvec4
rt
[
8
];
fvec8
fy
=
dy
*
dEdR
;
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
],
rt
[
0
],
rt
[
1
],
rt
[
2
],
rt
[
3
],
rt
[
4
],
rt
[
5
],
rt
[
6
],
rt
[
7
]);
fvec8
fz
=
dz
*
dEdR
;
fvec4
atomForce
(
forces
+
4
*
atom
);
blockAtomForceX
+=
fx
;
for
(
int
j
=
0
;
j
<
8
;
j
++
)
{
blockAtomForceY
+=
fy
;
blockAtomForce
[
j
]
+=
rt
[
j
];
blockAtomForceZ
+=
fz
;
atomForce
-=
rt
[
j
];
float
*
atomForce
=
forces
+
4
*
atom
;
}
atomForce
[
0
]
-=
dot8
(
fx
,
one
);
atomForce
.
store
(
forces
+
4
*
atom
);
atomForce
[
1
]
-=
dot8
(
fy
,
one
);
atomForce
[
2
]
-=
dot8
(
fz
,
one
);
}
}
// Record the forces on the block atoms.
// Record the forces on the block atoms.
fvec4
f
[
8
];
transpose
(
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
,
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
],
f
[
4
],
f
[
5
],
f
[
6
],
f
[
7
]);
for
(
int
j
=
0
;
j
<
8
;
j
++
)
for
(
int
j
=
0
;
j
<
8
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
}
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
...
@@ -681,7 +669,7 @@ void CpuNonbondedForceVec8::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec
...
@@ -681,7 +669,7 @@ void CpuNonbondedForceVec8::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec
r2
=
dot3
(
deltaR
,
deltaR
);
r2
=
dot3
(
deltaR
,
deltaR
);
}
}
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
f
loat
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
dz
=
z
-
posI
[
2
];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment