Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
2db04610
Commit
2db04610
authored
Apr 14, 2015
by
peastman
Browse files
Merge pull request #873 from peastman/cpuopt
Optimizations to CPU platform
parents
79b85ad7
05381ef1
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
406 additions
and
152 deletions
+406
-152
CMakeLists.txt
CMakeLists.txt
+3
-1
openmmapi/include/openmm/internal/vectorize8.h
openmmapi/include/openmm/internal/vectorize8.h
+12
-0
openmmapi/include/openmm/internal/vectorize_neon.h
openmmapi/include/openmm/internal/vectorize_neon.h
+6
-2
openmmapi/include/openmm/internal/vectorize_pnacl.h
openmmapi/include/openmm/internal/vectorize_pnacl.h
+6
-2
openmmapi/include/openmm/internal/vectorize_sse.h
openmmapi/include/openmm/internal/vectorize_sse.h
+12
-0
platforms/cpu/include/CpuNeighborList.h
platforms/cpu/include/CpuNeighborList.h
+2
-1
platforms/cpu/include/CpuNonbondedForceVec4.h
platforms/cpu/include/CpuNonbondedForceVec4.h
+6
-6
platforms/cpu/include/CpuNonbondedForceVec8.h
platforms/cpu/include/CpuNonbondedForceVec8.h
+6
-6
platforms/cpu/src/CpuCustomManyParticleForce.cpp
platforms/cpu/src/CpuCustomManyParticleForce.cpp
+1
-1
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+71
-41
platforms/cpu/src/CpuNonbondedForceVec4.cpp
platforms/cpu/src/CpuNonbondedForceVec4.cpp
+137
-43
platforms/cpu/src/CpuNonbondedForceVec8.cpp
platforms/cpu/src/CpuNonbondedForceVec8.cpp
+139
-45
platforms/reference/src/SimTKReference/ObcParameters.cpp
platforms/reference/src/SimTKReference/ObcParameters.cpp
+3
-3
tests/TestVectorize.cpp
tests/TestVectorize.cpp
+2
-1
No files found.
CMakeLists.txt
View file @
2db04610
...
@@ -296,7 +296,9 @@ SET(OPENMM_BUILD_SHARED_LIB ON CACHE BOOL "Whether to build shared OpenMM librar
...
@@ -296,7 +296,9 @@ SET(OPENMM_BUILD_SHARED_LIB ON CACHE BOOL "Whether to build shared OpenMM librar
SET
(
EXTRA_LINK_FLAGS
${
EXTRA_COMPILE_FLAGS
}
)
SET
(
EXTRA_LINK_FLAGS
${
EXTRA_COMPILE_FLAGS
}
)
IF
(
CMAKE_SYSTEM_NAME MATCHES
"Linux"
)
IF
(
CMAKE_SYSTEM_NAME MATCHES
"Linux"
)
SET
(
EXTRA_LINK_FLAGS
"
${
EXTRA_LINK_FLAGS
}
-Wl,--no-as-needed -lrt"
)
IF
(
NOT ANDROID
)
SET
(
EXTRA_LINK_FLAGS
"
${
EXTRA_LINK_FLAGS
}
-Wl,--no-as-needed -lrt"
)
ENDIF
(
NOT ANDROID
)
ENDIF
(
CMAKE_SYSTEM_NAME MATCHES
"Linux"
)
ENDIF
(
CMAKE_SYSTEM_NAME MATCHES
"Linux"
)
IF
(
OPENMM_BUILD_SHARED_LIB
)
IF
(
OPENMM_BUILD_SHARED_LIB
)
...
...
openmmapi/include/openmm/internal/vectorize8.h
View file @
2db04610
...
@@ -191,6 +191,18 @@ static inline fvec8 sqrt(const fvec8& v) {
...
@@ -191,6 +191,18 @@ static inline fvec8 sqrt(const fvec8& v) {
return
fvec8
(
_mm256_sqrt_ps
(
v
.
val
));
return
fvec8
(
_mm256_sqrt_ps
(
v
.
val
));
}
}
static
inline
fvec8
rsqrt
(
const
fvec8
&
v
)
{
// Initial estimate of rsqrt().
fvec8
y
(
_mm256_rsqrt_ps
(
v
.
val
));
// Perform an iteration of Newton refinement.
fvec8
x2
=
v
*
0.5
f
;
y
*=
fvec8
(
1.5
f
)
-
x2
*
y
*
y
;
return
y
;
}
static
inline
float
dot8
(
const
fvec8
&
v1
,
const
fvec8
&
v2
)
{
static
inline
float
dot8
(
const
fvec8
&
v1
,
const
fvec8
&
v2
)
{
fvec8
result
=
_mm256_dp_ps
(
v1
,
v2
,
0xF1
);
fvec8
result
=
_mm256_dp_ps
(
v1
,
v2
,
0xF1
);
return
_mm_cvtss_f32
(
result
.
lowerVec
())
+
_mm_cvtss_f32
(
result
.
upperVec
());
return
_mm_cvtss_f32
(
result
.
lowerVec
())
+
_mm_cvtss_f32
(
result
.
upperVec
());
...
...
openmmapi/include/openmm/internal/vectorize_neon.h
View file @
2db04610
...
@@ -251,11 +251,15 @@ static inline fvec4 abs(const fvec4& v) {
...
@@ -251,11 +251,15 @@ static inline fvec4 abs(const fvec4& v) {
return
vabsq_f32
(
v
);
return
vabsq_f32
(
v
);
}
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
static
inline
fvec4
r
sqrt
(
const
fvec4
&
v
)
{
float32x4_t
recipSqrt
=
vrsqrteq_f32
(
v
);
float32x4_t
recipSqrt
=
vrsqrteq_f32
(
v
);
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
return
vmulq_f32
(
v
,
recipSqrt
);
return
recipSqrt
;
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
return
rsqrt
(
v
)
*
v
;
}
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
...
...
openmmapi/include/openmm/internal/vectorize_pnacl.h
View file @
2db04610
...
@@ -330,7 +330,7 @@ static inline fvec4 ceil(const fvec4& v) {
...
@@ -330,7 +330,7 @@ static inline fvec4 ceil(const fvec4& v) {
return
truncated
+
blend
(
0.0
f
,
1.0
f
,
truncated
<
v
);
return
truncated
+
blend
(
0.0
f
,
1.0
f
,
truncated
<
v
);
}
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
static
inline
fvec4
r
sqrt
(
const
fvec4
&
v
)
{
// Initial estimate of rsqrt().
// Initial estimate of rsqrt().
ivec4
i
=
(
__m128i
)
v
;
ivec4
i
=
(
__m128i
)
v
;
...
@@ -343,7 +343,11 @@ static inline fvec4 sqrt(const fvec4& v) {
...
@@ -343,7 +343,11 @@ static inline fvec4 sqrt(const fvec4& v) {
y
*=
1.5
f
-
x2
*
y
*
y
;
y
*=
1.5
f
-
x2
*
y
*
y
;
y
*=
1.5
f
-
x2
*
y
*
y
;
y
*=
1.5
f
-
x2
*
y
*
y
;
y
*=
1.5
f
-
x2
*
y
*
y
;
y
*=
1.5
f
-
x2
*
y
*
y
;
return
y
*
v
;
return
y
;
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
return
rsqrt
(
v
)
*
v
;
}
}
#endif
/*OPENMM_VECTORIZE_PNACL_H_*/
#endif
/*OPENMM_VECTORIZE_PNACL_H_*/
...
...
openmmapi/include/openmm/internal/vectorize_sse.h
View file @
2db04610
...
@@ -241,6 +241,18 @@ static inline fvec4 sqrt(const fvec4& v) {
...
@@ -241,6 +241,18 @@ static inline fvec4 sqrt(const fvec4& v) {
return
fvec4
(
_mm_sqrt_ps
(
v
.
val
));
return
fvec4
(
_mm_sqrt_ps
(
v
.
val
));
}
}
static
inline
fvec4
rsqrt
(
const
fvec4
&
v
)
{
// Initial estimate of rsqrt().
fvec4
y
(
_mm_rsqrt_ps
(
v
.
val
));
// Perform an iteration of Newton refinement.
fvec4
x2
=
v
*
0.5
f
;
y
*=
fvec4
(
1.5
f
)
-
x2
*
y
*
y
;
return
y
;
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0x71
));
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0x71
));
}
}
...
...
platforms/cpu/include/CpuNeighborList.h
View file @
2db04610
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2013 Stanford University and the Authors.
*
* Portions copyright (c) 2013
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -61,6 +61,7 @@ public:
...
@@ -61,6 +61,7 @@ public:
private:
private:
int
blockSize
;
int
blockSize
;
std
::
vector
<
int
>
sortedAtoms
;
std
::
vector
<
int
>
sortedAtoms
;
std
::
vector
<
float
>
sortedPositions
;
std
::
vector
<
std
::
vector
<
int
>
>
blockNeighbors
;
std
::
vector
<
std
::
vector
<
int
>
>
blockNeighbors
;
std
::
vector
<
std
::
vector
<
char
>
>
blockExclusions
;
std
::
vector
<
std
::
vector
<
char
>
>
blockExclusions
;
// The following variables are used to make information accessible to the individual threads.
// The following variables are used to make information accessible to the individual threads.
...
...
platforms/cpu/include/CpuNonbondedForceVec4.h
View file @
2db04610
...
@@ -56,8 +56,8 @@ protected:
...
@@ -56,8 +56,8 @@ protected:
/**
/**
* Templatized implementation of calculateBlockIxn.
* Templatized implementation of calculateBlockIxn.
*/
*/
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
void
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
...
@@ -74,15 +74,15 @@ protected:
...
@@ -74,15 +74,15 @@ protected:
/**
/**
* Templatized implementation of calculateBlockEwaldIxn.
* Templatized implementation of calculateBlockEwaldIxn.
*/
*/
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
void
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
* periodic boundary conditions.
*/
*/
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
getDeltaR
(
const
f
loat
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
void
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
/**
* Compute a fast approximation to erfc(x).
* Compute a fast approximation to erfc(x).
...
...
platforms/cpu/include/CpuNonbondedForceVec8.h
View file @
2db04610
...
@@ -55,8 +55,8 @@ protected:
...
@@ -55,8 +55,8 @@ protected:
/**
/**
* Templatized implementation of calculateBlockIxn.
* Templatized implementation of calculateBlockIxn.
*/
*/
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
void
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
...
@@ -73,15 +73,15 @@ protected:
...
@@ -73,15 +73,15 @@ protected:
/**
/**
* Templatized implementation of calculateBlockEwaldIxn.
* Templatized implementation of calculateBlockEwaldIxn.
*/
*/
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
void
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
* periodic boundary conditions.
*/
*/
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
getDeltaR
(
const
f
loat
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
void
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
/**
* Compute a fast approximation to erfc(x).
* Compute a fast approximation to erfc(x).
...
...
platforms/cpu/src/CpuCustomManyParticleForce.cpp
View file @
2db04610
...
@@ -199,7 +199,7 @@ void CpuCustomManyParticleForce::setUseCutoff(RealOpenMM distance) {
...
@@ -199,7 +199,7 @@ void CpuCustomManyParticleForce::setUseCutoff(RealOpenMM distance) {
}
}
void
CpuCustomManyParticleForce
::
setPeriodic
(
RealVec
*
periodicBoxVectors
)
{
void
CpuCustomManyParticleForce
::
setPeriodic
(
RealVec
*
periodicBoxVectors
)
{
assert
(
c
utoff
);
assert
(
useC
utoff
);
assert
(
periodicBoxVectors
[
0
][
0
]
>=
2.0
*
cutoffDistance
);
assert
(
periodicBoxVectors
[
0
][
0
]
>=
2.0
*
cutoffDistance
);
assert
(
periodicBoxVectors
[
1
][
1
]
>=
2.0
*
cutoffDistance
);
assert
(
periodicBoxVectors
[
1
][
1
]
>=
2.0
*
cutoffDistance
);
assert
(
periodicBoxVectors
[
2
][
2
]
>=
2.0
*
cutoffDistance
);
assert
(
periodicBoxVectors
[
2
][
2
]
>=
2.0
*
cutoffDistance
);
...
...
platforms/cpu/src/CpuNeighborList.cpp
View file @
2db04610
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2013 Stanford University and the Authors.
*
* Portions copyright (c) 2013
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -164,8 +164,8 @@ public:
...
@@ -164,8 +164,8 @@ public:
return
VoxelIndex
(
y
,
z
);
return
VoxelIndex
(
y
,
z
);
}
}
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
const
fvec4
&
blockCenter
,
const
fvec4
&
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>&
blockAtoms
,
const
float
*
atomLoca
tions
,
const
vector
<
VoxelIndex
>&
atomVoxelIndex
)
const
{
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
const
fvec4
&
blockCenter
,
const
fvec4
&
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>&
blockAtoms
,
const
vector
<
float
>&
blockAtomX
,
const
vector
<
float
>&
blockAtomY
,
const
vector
<
float
>&
blockAtomZ
,
const
vector
<
float
>&
sortedPosi
tions
,
const
vector
<
VoxelIndex
>&
atomVoxelIndex
)
const
{
neighbors
.
resize
(
0
);
neighbors
.
resize
(
0
);
exclusions
.
resize
(
0
);
exclusions
.
resize
(
0
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
...
@@ -233,24 +233,34 @@ public:
...
@@ -233,24 +233,34 @@ public:
float
minx
=
centerPos
[
0
];
float
minx
=
centerPos
[
0
];
float
maxx
=
centerPos
[
0
];
float
maxx
=
centerPos
[
0
];
fvec4
offset
(
-
xoffset
,
-
yoffset
+
voxelSizeY
*
y
+
(
usePeriodic
?
0.0
f
:
miny
),
voxelSizeZ
*
z
+
(
usePeriodic
?
0.0
f
:
minz
),
0
);
float
offset
[
3
]
=
{
-
xoffset
,
-
yoffset
+
voxelSizeY
*
y
+
(
usePeriodic
?
0.0
f
:
miny
),
voxelSizeZ
*
z
+
(
usePeriodic
?
0.0
f
:
minz
)};
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
++
)
{
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
+=
4
)
{
const
float
*
atomPos
=
&
atomLocations
[
4
*
blockAtoms
[
k
]];
fvec4
dist2
=
maxDistanceSquared
;
fvec4
posVec
(
atomPos
);
if
(
y
!=
atomVoxelIndex
[
k
].
y
)
{
fvec4
delta1
=
offset
-
posVec
;
fvec4
dy1
=
offset
[
1
]
-
fvec4
(
&
blockAtomY
[
k
]);
fvec4
delta2
=
delta1
+
fvec4
(
0
,
voxelSizeY
,
voxelSizeZ
,
0
);
fvec4
dy2
=
dy1
+
voxelSizeY
;
if
(
usePeriodic
)
{
if
(
usePeriodic
)
{
delta1
-=
round
(
delta1
*
invBoxSize
)
*
boxSize
;
dy1
-=
round
(
dy1
*
invBoxSize
[
1
])
*
boxSize
[
1
];
delta2
-=
round
(
delta2
*
invBoxSize
)
*
boxSize
;
dy2
-=
round
(
dy2
*
invBoxSize
[
1
])
*
boxSize
[
1
];
}
fvec4
dy
=
min
(
abs
(
dy1
),
abs
(
dy2
));
dist2
-=
dy
*
dy
;
}
if
(
z
!=
atomVoxelIndex
[
k
].
z
)
{
fvec4
dz1
=
offset
[
2
]
-
fvec4
(
&
blockAtomZ
[
k
]);
fvec4
dz2
=
dz1
+
voxelSizeZ
;
if
(
usePeriodic
)
{
dz1
-=
round
(
dz1
*
invBoxSize
[
2
])
*
boxSize
[
2
];
dz2
-=
round
(
dz2
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
fvec4
dz
=
min
(
abs
(
dz1
),
abs
(
dz2
));
dist2
-=
dz
*
dz
;
}
}
fvec4
delta
=
min
(
abs
(
delta1
),
abs
(
delta2
));
fvec4
dist
=
sqrt
(
dist2
);
float
dy
=
(
y
==
atomVoxelIndex
[
k
].
y
?
0.0
f
:
delta
[
1
]);
int
numToCheck
=
min
(
4
,
(
int
)
(
blockAtoms
.
size
()
-
k
));
float
dz
=
(
z
==
atomVoxelIndex
[
k
].
z
?
0.0
f
:
delta
[
2
]);
for
(
int
m
=
0
;
m
<
numToCheck
;
m
++
)
{
float
dist2
=
maxDistanceSquared
-
dy
*
dy
-
dz
*
dz
;
minx
=
min
(
minx
,
blockAtomX
[
k
+
m
]
-
dist
[
m
]
-
xoffset
);
if
(
dist2
>
0
)
{
maxx
=
max
(
maxx
,
blockAtomX
[
k
+
m
]
+
dist
[
m
]
-
xoffset
);
float
dist
=
sqrtf
(
dist2
);
minx
=
min
(
minx
,
atomPos
[
0
]
-
dist
-
xoffset
);
maxx
=
max
(
maxx
,
atomPos
[
0
]
+
dist
-
xoffset
);
}
}
}
}
if
(
minx
==
maxx
)
if
(
minx
==
maxx
)
...
@@ -290,12 +300,10 @@ public:
...
@@ -290,12 +300,10 @@ public:
if
(
sortedIndex
>=
lastSortedIndex
)
if
(
sortedIndex
>=
lastSortedIndex
)
continue
;
continue
;
fvec4
atomPos
(
atomLocations
+
4
*
sortedAtoms
[
sortedIndex
]);
fvec4
atomPos
(
&
sortedPositions
[
4
*
sortedIndex
]);
fvec4
delta
=
atomPos
-
centerPos
;
fvec4
delta
=
atomPos
-
centerPos
;
if
(
periodicRectangular
)
{
if
(
periodicRectangular
)
fvec4
base
=
round
(
delta
*
invBoxSize
)
*
boxSize
;
delta
-=
round
(
delta
*
invBoxSize
)
*
boxSize
;
delta
=
delta
-
base
;
}
else
if
(
needPeriodic
)
{
else
if
(
needPeriodic
)
{
delta
-=
periodicBoxVec4
[
2
]
*
floorf
(
delta
[
2
]
*
recipBoxSize
[
2
]
+
0.5
f
);
delta
-=
periodicBoxVec4
[
2
]
*
floorf
(
delta
[
2
]
*
recipBoxSize
[
2
]
+
0.5
f
);
delta
-=
periodicBoxVec4
[
1
]
*
floorf
(
delta
[
1
]
*
recipBoxSize
[
1
]
+
0.5
f
);
delta
-=
periodicBoxVec4
[
1
]
*
floorf
(
delta
[
1
]
*
recipBoxSize
[
1
]
+
0.5
f
);
...
@@ -310,26 +318,34 @@ public:
...
@@ -310,26 +318,34 @@ public:
// The distance is large enough that there might not be any actual interactions.
// The distance is large enough that there might not be any actual interactions.
// Check individual atom pairs to be sure.
// Check individual atom pairs to be sure.
bool
any
=
false
;
bool
anyInteraction
=
false
;
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
++
)
{
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
+=
4
)
{
fvec4
pos1
(
&
atomLocations
[
4
*
blockAtoms
[
k
]]);
fvec4
dx
=
fvec4
(
&
blockAtomX
[
k
])
-
atomPos
[
0
];
delta
=
atomPos
-
pos1
;
fvec4
dy
=
fvec4
(
&
blockAtomY
[
k
])
-
atomPos
[
1
];
fvec4
dz
=
fvec4
(
&
blockAtomZ
[
k
])
-
atomPos
[
2
];
if
(
periodicRectangular
)
{
if
(
periodicRectangular
)
{
fvec4
base
=
round
(
delta
*
invBoxSize
)
*
boxSize
;
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
delta
=
delta
-
base
;
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
}
else
if
(
needPeriodic
)
{
else
if
(
needPeriodic
)
{
delta
-=
periodicBoxVec4
[
2
]
*
floorf
(
delta
[
2
]
*
recipBoxSize
[
2
]
+
0.5
f
);
fvec4
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
delta
-=
periodicBoxVec4
[
1
]
*
floorf
(
delta
[
1
]
*
recipBoxSize
[
1
]
+
0.5
f
);
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
delta
-=
periodicBoxVec4
[
0
]
*
floorf
(
delta
[
0
]
*
recipBoxSize
[
0
]
+
0.5
f
);
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
fvec4
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
fvec4
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
}
}
f
loat
r2
=
d
ot3
(
delta
,
delta
)
;
f
vec4
r2
=
d
x
*
dx
+
dy
*
dy
+
dz
*
dz
;
if
(
r2
<
maxDistanceSquared
)
{
if
(
any
(
r2
<
maxDistanceSquared
)
)
{
any
=
true
;
any
Interaction
=
true
;
break
;
break
;
}
}
}
}
if
(
!
any
)
if
(
!
any
Interaction
)
continue
;
continue
;
}
}
...
@@ -379,6 +395,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
...
@@ -379,6 +395,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
blockNeighbors
.
resize
(
numBlocks
);
blockNeighbors
.
resize
(
numBlocks
);
blockExclusions
.
resize
(
numBlocks
);
blockExclusions
.
resize
(
numBlocks
);
sortedAtoms
.
resize
(
numAtoms
);
sortedAtoms
.
resize
(
numAtoms
);
sortedPositions
.
resize
(
4
*
numAtoms
);
// Record the parameters for the threads.
// Record the parameters for the threads.
...
@@ -428,6 +445,8 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
...
@@ -428,6 +445,8 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
atomIndex
=
atomBins
[
i
].
second
;
int
atomIndex
=
atomBins
[
i
].
second
;
sortedAtoms
[
i
]
=
atomIndex
;
sortedAtoms
[
i
]
=
atomIndex
;
fvec4
atomPos
(
&
atomLocations
[
4
*
atomIndex
]);
atomPos
.
store
(
&
sortedPositions
[
4
*
i
]);
voxels
.
insert
(
i
,
&
atomLocations
[
4
*
atomIndex
]);
voxels
.
insert
(
i
,
&
atomLocations
[
4
*
atomIndex
]);
}
}
voxels
.
sortItems
();
voxels
.
sortItems
();
...
@@ -489,6 +508,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
...
@@ -489,6 +508,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
int
numBlocks
=
blockNeighbors
.
size
();
int
numBlocks
=
blockNeighbors
.
size
();
vector
<
int
>
blockAtoms
;
vector
<
int
>
blockAtoms
;
vector
<
float
>
blockAtomX
(
blockSize
),
blockAtomY
(
blockSize
),
blockAtomZ
(
blockSize
);
vector
<
VoxelIndex
>
atomVoxelIndex
;
vector
<
VoxelIndex
>
atomVoxelIndex
;
for
(
int
i
=
threadIndex
;
i
<
numBlocks
;
i
+=
numThreads
)
{
for
(
int
i
=
threadIndex
;
i
<
numBlocks
;
i
+=
numThreads
)
{
// Find the atoms in this block and compute their bounding box.
// Find the atoms in this block and compute their bounding box.
...
@@ -501,14 +521,24 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
...
@@ -501,14 +521,24 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
atomVoxelIndex
[
j
]
=
voxels
->
getVoxelIndex
(
&
atomLocations
[
4
*
blockAtoms
[
j
]]);
atomVoxelIndex
[
j
]
=
voxels
->
getVoxelIndex
(
&
atomLocations
[
4
*
blockAtoms
[
j
]]);
}
}
fvec4
minPos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
]
]
);
fvec4
minPos
(
&
sortedPositions
[
4
*
firstIndex
]);
fvec4
maxPos
=
minPos
;
fvec4
maxPos
=
minPos
;
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
fvec4
pos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
+
j
]
]);
fvec4
pos
(
&
sortedPositions
[
4
*
(
firstIndex
+
j
)
]);
minPos
=
min
(
minPos
,
pos
);
minPos
=
min
(
minPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
}
}
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
atomLocations
,
atomVoxelIndex
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
{
blockAtomX
[
j
]
=
sortedPositions
[
4
*
(
firstIndex
+
j
)];
blockAtomY
[
j
]
=
sortedPositions
[
4
*
(
firstIndex
+
j
)
+
1
];
blockAtomZ
[
j
]
=
sortedPositions
[
4
*
(
firstIndex
+
j
)
+
2
];
}
for
(
int
j
=
atomsInBlock
;
j
<
blockSize
;
j
++
)
{
blockAtomX
[
j
]
=
1e10
;
blockAtomY
[
j
]
=
1e10
;
blockAtomZ
[
j
]
=
1e10
;
}
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
sortedPositions
,
atomVoxelIndex
);
// Record the exclusions for this block.
// Record the exclusions for this block.
...
...
platforms/cpu/src/CpuNonbondedForceVec4.cpp
View file @
2db04610
...
@@ -44,30 +44,76 @@ CpuNonbondedForce* createCpuNonbondedForceVec4() {
...
@@ -44,30 +44,76 @@ CpuNonbondedForce* createCpuNonbondedForceVec4() {
CpuNonbondedForceVec4
::
CpuNonbondedForceVec4
()
{
CpuNonbondedForceVec4
::
CpuNonbondedForceVec4
()
{
}
}
enum
PeriodicType
{
NoPeriodic
,
PeriodicPerAtom
,
PeriodicPerInteraction
,
PeriodicTriclinic
};
void
CpuNonbondedForceVec4
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec4
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
if
(
triclinic
)
// Determine whether we need to apply periodic boundary conditions.
calculateBlockIxnImpl
<
true
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
else
PeriodicType
periodicType
;
calculateBlockIxnImpl
<
false
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
fvec4
blockCenter
;
if
(
!
periodic
)
{
periodicType
=
NoPeriodic
;
blockCenter
=
0.0
f
;
}
else
{
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
minz
=
maxz
=
posq
[
4
*
blockAtom
[
0
]
+
2
];
for
(
int
i
=
1
;
i
<
4
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
4
*
blockAtom
[
i
]]);
maxx
=
max
(
maxx
,
posq
[
4
*
blockAtom
[
i
]]);
miny
=
min
(
miny
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
maxy
=
max
(
maxy
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
minz
=
min
(
minz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
maxz
=
max
(
maxz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
}
blockCenter
=
fvec4
(
0.5
f
*
(
minx
+
maxx
),
0.5
f
*
(
miny
+
maxy
),
0.5
f
*
(
minz
+
maxz
),
0.0
f
);
if
(
!
(
minx
<
cutoffDistance
||
miny
<
cutoffDistance
||
minz
<
cutoffDistance
||
maxx
>
boxSize
[
0
]
-
cutoffDistance
||
maxy
>
boxSize
[
1
]
-
cutoffDistance
||
maxz
>
boxSize
[
2
]
-
cutoffDistance
))
periodicType
=
NoPeriodic
;
else
if
(
triclinic
)
periodicType
=
PeriodicTriclinic
;
else
if
(
0.5
f
*
(
boxSize
[
0
]
-
(
maxx
-
minx
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
1
]
-
(
maxy
-
miny
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
2
]
-
(
maxz
-
minz
))
>=
cutoffDistance
)
periodicType
=
PeriodicPerAtom
;
else
periodicType
=
PeriodicPerInteraction
;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if
(
periodicType
==
NoPeriodic
)
calculateBlockIxnImpl
<
NoPeriodic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerAtom
)
calculateBlockIxnImpl
<
PeriodicPerAtom
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerInteraction
)
calculateBlockIxnImpl
<
PeriodicPerInteraction
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicTriclinic
)
calculateBlockIxnImpl
<
PeriodicTriclinic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
}
}
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec4
::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec4
::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
blockAtomPosq
[
i
]
-=
floor
((
blockAtomPosq
[
i
]
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
// Loop over neighbors for this block.
...
@@ -82,7 +128,10 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -82,7 +128,10 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
<
TRICLINIC
>
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
fvec4
atomPos
(
posq
+
4
*
atom
);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
atomPos
-=
floor
((
atomPos
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
getDeltaR
<
PERIODIC_TYPE
>
(
atomPos
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
ivec4
include
;
char
excl
=
exclusions
[
i
];
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
if
(
excl
==
0
)
...
@@ -95,8 +144,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -95,8 +144,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the interactions.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
rsqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
if
(
atomEpsilon
!=
0.0
f
)
{
...
@@ -108,6 +156,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -108,6 +156,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
if
(
useSwitch
)
{
fvec4
r
=
r2
*
inverseR
;
fvec4
t
=
blend
(
0.0
f
,
(
r
-
switchingDistance
)
*
invSwitchingInterval
,
r
>
switchingDistance
);
fvec4
t
=
blend
(
0.0
f
,
(
r
-
switchingDistance
)
*
invSwitchingInterval
,
r
>
switchingDistance
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
...
@@ -162,29 +211,73 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -162,29 +211,73 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
}
}
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
if
(
triclinic
)
// Determine whether we need to apply periodic boundary conditions.
calculateBlockEwaldIxnImpl
<
true
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
else
PeriodicType
periodicType
;
calculateBlockEwaldIxnImpl
<
false
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
fvec4
blockCenter
;
if
(
!
periodic
)
{
periodicType
=
NoPeriodic
;
blockCenter
=
0.0
f
;
}
else
{
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
minz
=
maxz
=
posq
[
4
*
blockAtom
[
0
]
+
2
];
for
(
int
i
=
1
;
i
<
4
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
4
*
blockAtom
[
i
]]);
maxx
=
max
(
maxx
,
posq
[
4
*
blockAtom
[
i
]]);
miny
=
min
(
miny
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
maxy
=
max
(
maxy
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
minz
=
min
(
minz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
maxz
=
max
(
maxz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
}
blockCenter
=
fvec4
(
0.5
f
*
(
minx
+
maxx
),
0.5
f
*
(
miny
+
maxy
),
0.5
f
*
(
minz
+
maxz
),
0.0
f
);
if
(
!
(
minx
<
cutoffDistance
||
miny
<
cutoffDistance
||
minz
<
cutoffDistance
||
maxx
>
boxSize
[
0
]
-
cutoffDistance
||
maxy
>
boxSize
[
1
]
-
cutoffDistance
||
maxz
>
boxSize
[
2
]
-
cutoffDistance
))
periodicType
=
NoPeriodic
;
else
if
(
triclinic
)
periodicType
=
PeriodicTriclinic
;
else
if
(
0.5
f
*
(
boxSize
[
0
]
-
(
maxx
-
minx
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
1
]
-
(
maxy
-
miny
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
2
]
-
(
maxz
-
minz
))
>=
cutoffDistance
)
periodicType
=
PeriodicPerAtom
;
else
periodicType
=
PeriodicPerInteraction
;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if
(
periodicType
==
NoPeriodic
)
calculateBlockEwaldIxnImpl
<
NoPeriodic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerAtom
)
calculateBlockEwaldIxnImpl
<
PeriodicPerAtom
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerInteraction
)
calculateBlockEwaldIxnImpl
<
PeriodicPerInteraction
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicTriclinic
)
calculateBlockEwaldIxnImpl
<
PeriodicTriclinic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
}
}
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
blockAtomPosq
[
i
]
-=
floor
((
blockAtomPosq
[
i
]
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
// Loop over neighbors for this block.
...
@@ -199,7 +292,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -199,7 +292,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
<
TRICLINIC
>
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
fvec4
atomPos
(
posq
+
4
*
atom
);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
atomPos
-=
floor
((
atomPos
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
getDeltaR
<
PERIODIC_TYPE
>
(
atomPos
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
ivec4
include
;
char
excl
=
exclusions
[
i
];
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
if
(
excl
==
0
)
...
@@ -212,8 +308,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -212,8 +308,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the interactions.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
r
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
r
=
r2
*
inverseR
;
fvec4
energy
,
dEdR
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
if
(
atomEpsilon
!=
0.0
f
)
{
...
@@ -272,28 +368,26 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -272,28 +368,26 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
}
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec4
::
getDeltaR
(
const
f
loat
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
void
CpuNonbondedForceVec4
::
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
if
(
PERIODIC_TYPE
==
PeriodicTriclinic
)
{
if
(
TRICLINIC
)
{
fvec4
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
fvec4
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
fvec4
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
fvec4
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
fvec4
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
fvec4
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
}
}
else
if
(
PERIODIC_TYPE
==
PeriodicPerInteraction
)
{
else
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
}
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
}
...
...
platforms/cpu/src/CpuNonbondedForceVec8.cpp
View file @
2db04610
...
@@ -50,7 +50,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec8() {
...
@@ -50,7 +50,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec8() {
*/
*/
bool
isVec8Supported
()
{
bool
isVec8Supported
()
{
// Make sure the CPU supports AVX.
// Make sure the CPU supports AVX.
int
cpuInfo
[
4
];
int
cpuInfo
[
4
];
cpuid
(
cpuInfo
,
0
);
cpuid
(
cpuInfo
,
0
);
if
(
cpuInfo
[
0
]
>=
1
)
{
if
(
cpuInfo
[
0
]
>=
1
)
{
...
@@ -76,29 +76,75 @@ CpuNonbondedForce* createCpuNonbondedForceVec8() {
...
@@ -76,29 +76,75 @@ CpuNonbondedForce* createCpuNonbondedForceVec8() {
CpuNonbondedForceVec8
::
CpuNonbondedForceVec8
()
{
CpuNonbondedForceVec8
::
CpuNonbondedForceVec8
()
{
}
}
enum
PeriodicType
{
NoPeriodic
,
PeriodicPerAtom
,
PeriodicPerInteraction
,
PeriodicTriclinic
};
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
if
(
triclinic
)
// Determine whether we need to apply periodic boundary conditions.
calculateBlockIxnImpl
<
true
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
else
PeriodicType
periodicType
;
calculateBlockIxnImpl
<
false
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
fvec4
blockCenter
;
if
(
!
periodic
)
{
periodicType
=
NoPeriodic
;
blockCenter
=
0.0
f
;
}
else
{
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
minz
=
maxz
=
posq
[
4
*
blockAtom
[
0
]
+
2
];
for
(
int
i
=
1
;
i
<
8
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
4
*
blockAtom
[
i
]]);
maxx
=
max
(
maxx
,
posq
[
4
*
blockAtom
[
i
]]);
miny
=
min
(
miny
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
maxy
=
max
(
maxy
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
minz
=
min
(
minz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
maxz
=
max
(
maxz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
}
blockCenter
=
fvec4
(
0.5
f
*
(
minx
+
maxx
),
0.5
f
*
(
miny
+
maxy
),
0.5
f
*
(
minz
+
maxz
),
0.0
f
);
if
(
!
(
minx
<
cutoffDistance
||
miny
<
cutoffDistance
||
minz
<
cutoffDistance
||
maxx
>
boxSize
[
0
]
-
cutoffDistance
||
maxy
>
boxSize
[
1
]
-
cutoffDistance
||
maxz
>
boxSize
[
2
]
-
cutoffDistance
))
periodicType
=
NoPeriodic
;
else
if
(
triclinic
)
periodicType
=
PeriodicTriclinic
;
else
if
(
0.5
f
*
(
boxSize
[
0
]
-
(
maxx
-
minx
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
1
]
-
(
maxy
-
miny
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
2
]
-
(
maxz
-
minz
))
>=
cutoffDistance
)
periodicType
=
PeriodicPerAtom
;
else
periodicType
=
PeriodicPerInteraction
;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if
(
periodicType
==
NoPeriodic
)
calculateBlockIxnImpl
<
NoPeriodic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerAtom
)
calculateBlockIxnImpl
<
PeriodicPerAtom
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerInteraction
)
calculateBlockIxnImpl
<
PeriodicPerInteraction
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicTriclinic
)
calculateBlockIxnImpl
<
PeriodicTriclinic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
}
}
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec8
::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec8
::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
fvec4
blockAtomPosq
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
blockAtomPosq
[
i
]
-=
floor
((
blockAtomPosq
[
i
]
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
}
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
blockAtomCharge
*=
ONE_4PI_EPS0
;
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
// Loop over neighbors for this block.
...
@@ -113,7 +159,10 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -113,7 +159,10 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
<
TRICLINIC
>
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
fvec4
atomPos
(
posq
+
4
*
atom
);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
atomPos
-=
floor
((
atomPos
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
getDeltaR
<
PERIODIC_TYPE
>
(
atomPos
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
ivec8
include
;
char
excl
=
exclusions
[
i
];
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
if
(
excl
==
0
)
...
@@ -126,8 +175,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -126,8 +175,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the interactions.
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
rsqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
energy
,
dEdR
;
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
if
(
atomEpsilon
!=
0.0
f
)
{
...
@@ -139,6 +187,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -139,6 +187,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
if
(
useSwitch
)
{
fvec8
r
=
r2
*
inverseR
;
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
...
@@ -193,28 +242,72 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -193,28 +242,72 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
}
}
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
if
(
triclinic
)
// Determine whether we need to apply periodic boundary conditions.
calculateBlockEwaldIxnImpl
<
true
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
else
PeriodicType
periodicType
;
calculateBlockEwaldIxnImpl
<
false
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
fvec4
blockCenter
;
if
(
!
periodic
)
{
periodicType
=
NoPeriodic
;
blockCenter
=
0.0
f
;
}
else
{
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
minz
=
maxz
=
posq
[
4
*
blockAtom
[
0
]
+
2
];
for
(
int
i
=
1
;
i
<
8
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
4
*
blockAtom
[
i
]]);
maxx
=
max
(
maxx
,
posq
[
4
*
blockAtom
[
i
]]);
miny
=
min
(
miny
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
maxy
=
max
(
maxy
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
minz
=
min
(
minz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
maxz
=
max
(
maxz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
}
blockCenter
=
fvec4
(
0.5
f
*
(
minx
+
maxx
),
0.5
f
*
(
miny
+
maxy
),
0.5
f
*
(
minz
+
maxz
),
0.0
f
);
if
(
!
(
minx
<
cutoffDistance
||
miny
<
cutoffDistance
||
minz
<
cutoffDistance
||
maxx
>
boxSize
[
0
]
-
cutoffDistance
||
maxy
>
boxSize
[
1
]
-
cutoffDistance
||
maxz
>
boxSize
[
2
]
-
cutoffDistance
))
periodicType
=
NoPeriodic
;
else
if
(
triclinic
)
periodicType
=
PeriodicTriclinic
;
else
if
(
0.5
f
*
(
boxSize
[
0
]
-
(
maxx
-
minx
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
1
]
-
(
maxy
-
miny
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
2
]
-
(
maxz
-
minz
))
>=
cutoffDistance
)
periodicType
=
PeriodicPerAtom
;
else
periodicType
=
PeriodicPerInteraction
;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if
(
periodicType
==
NoPeriodic
)
calculateBlockEwaldIxnImpl
<
NoPeriodic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerAtom
)
calculateBlockEwaldIxnImpl
<
PeriodicPerAtom
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerInteraction
)
calculateBlockEwaldIxnImpl
<
PeriodicPerInteraction
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicTriclinic
)
calculateBlockEwaldIxnImpl
<
PeriodicTriclinic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
}
}
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
fvec4
blockAtomPosq
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
blockAtomPosq
[
i
]
-=
floor
((
blockAtomPosq
[
i
]
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
}
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
blockAtomCharge
*=
ONE_4PI_EPS0
;
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
// Loop over neighbors for this block.
...
@@ -229,7 +322,10 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -229,7 +322,10 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
<
TRICLINIC
>
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
fvec4
atomPos
(
posq
+
4
*
atom
);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
atomPos
-=
floor
((
atomPos
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
getDeltaR
<
PERIODIC_TYPE
>
(
atomPos
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
ivec8
include
;
char
excl
=
exclusions
[
i
];
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
if
(
excl
==
0
)
...
@@ -242,8 +338,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -242,8 +338,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the interactions.
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
r
sqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
r
=
r2
*
inverseR
;
fvec8
energy
,
dEdR
;
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
if
(
atomEpsilon
!=
0.0
f
)
{
...
@@ -268,7 +364,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -268,7 +364,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
}
}
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
// Accumulate energies.
...
@@ -302,28 +398,26 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -302,28 +398,26 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
}
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
f
loat
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
if
(
PERIODIC_TYPE
==
PeriodicTriclinic
)
{
if
(
TRICLINIC
)
{
fvec8
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
fvec8
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
fvec8
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
fvec8
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
fvec8
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
fvec8
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
}
}
else
if
(
PERIODIC_TYPE
==
PeriodicPerInteraction
)
{
else
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
}
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
}
...
...
platforms/reference/src/SimTKReference/ObcParameters.cpp
View file @
2db04610
...
@@ -382,9 +382,9 @@ void ObcParameters::setPeriodic(OpenMM::RealVec* vectors) {
...
@@ -382,9 +382,9 @@ void ObcParameters::setPeriodic(OpenMM::RealVec* vectors) {
assert
(
_cutoff
);
assert
(
_cutoff
);
assert
(
boxSize
[
0
][
0
]
>=
2.0
*
_cutoffDistance
);
assert
(
vectors
[
0
][
0
]
>=
2.0
*
_cutoffDistance
);
assert
(
boxSize
[
1
][
1
]
>=
2.0
*
_cutoffDistance
);
assert
(
vectors
[
1
][
1
]
>=
2.0
*
_cutoffDistance
);
assert
(
boxSize
[
2
][
2
]
>=
2.0
*
_cutoffDistance
);
assert
(
vectors
[
2
][
2
]
>=
2.0
*
_cutoffDistance
);
_periodic
=
true
;
_periodic
=
true
;
_periodicBoxVectors
[
0
]
=
vectors
[
0
];
_periodicBoxVectors
[
0
]
=
vectors
[
0
];
...
...
tests/TestVectorize.cpp
View file @
2db04610
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2014 Stanford University and the Authors.
*
* Portions copyright (c) 2014
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -148,6 +148,7 @@ void testMathFunctions() {
...
@@ -148,6 +148,7 @@ void testMathFunctions() {
ASSERT_VEC4_EQUAL
(
min
(
f1
,
f2
),
0.4
,
1.2
,
-
1.2
,
-
5.0
);
ASSERT_VEC4_EQUAL
(
min
(
f1
,
f2
),
0.4
,
1.2
,
-
1.2
,
-
5.0
);
ASSERT_VEC4_EQUAL
(
max
(
f1
,
f2
),
1.1
,
1.9
,
1.3
,
-
3.8
);
ASSERT_VEC4_EQUAL
(
max
(
f1
,
f2
),
1.1
,
1.9
,
1.3
,
-
3.8
);
ASSERT_VEC4_EQUAL
(
sqrt
(
fvec4
(
1.5
,
3.1
,
4.0
,
15.0
)),
sqrt
(
1.5
),
sqrt
(
3.1
),
sqrt
(
4.0
),
sqrt
(
15.0
));
ASSERT_VEC4_EQUAL
(
sqrt
(
fvec4
(
1.5
,
3.1
,
4.0
,
15.0
)),
sqrt
(
1.5
),
sqrt
(
3.1
),
sqrt
(
4.0
),
sqrt
(
15.0
));
ASSERT_VEC4_EQUAL
(
rsqrt
(
fvec4
(
1.5
,
3.1
,
4.0
,
15.0
)),
1.0
/
sqrt
(
1.5
),
1.0
/
sqrt
(
3.1
),
1.0
/
sqrt
(
4.0
),
1.0
/
sqrt
(
15.0
));
ASSERT_EQUAL_TOL
(
f1
[
0
]
*
f2
[
0
]
+
f1
[
1
]
*
f2
[
1
]
+
f1
[
2
]
*
f2
[
2
],
dot3
(
f1
,
f2
),
1e-6
);
ASSERT_EQUAL_TOL
(
f1
[
0
]
*
f2
[
0
]
+
f1
[
1
]
*
f2
[
1
]
+
f1
[
2
]
*
f2
[
2
],
dot3
(
f1
,
f2
),
1e-6
);
ASSERT_EQUAL_TOL
(
f1
[
0
]
*
f2
[
0
]
+
f1
[
1
]
*
f2
[
1
]
+
f1
[
2
]
*
f2
[
2
]
+
f1
[
3
]
*
f2
[
3
],
dot4
(
f1
,
f2
),
1e-6
);
ASSERT_EQUAL_TOL
(
f1
[
0
]
*
f2
[
0
]
+
f1
[
1
]
*
f2
[
1
]
+
f1
[
2
]
*
f2
[
2
]
+
f1
[
3
]
*
f2
[
3
],
dot4
(
f1
,
f2
),
1e-6
);
ASSERT
(
any
(
f1
>
0.5
));
ASSERT
(
any
(
f1
>
0.5
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment