Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
56902b45
Commit
56902b45
authored
Oct 22, 2013
by
peastman
Browse files
Restructured nonbonded calculation to allow more efficient vectorization
parent
6eacad45
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
468 additions
and
112 deletions
+468
-112
openmmapi/include/openmm/internal/vectorize.h
openmmapi/include/openmm/internal/vectorize.h
+52
-0
platforms/cpu/include/CpuNeighborList.h
platforms/cpu/include/CpuNeighborList.h
+9
-3
platforms/cpu/include/CpuNonbondedForce.h
platforms/cpu/include/CpuNonbondedForce.h
+27
-11
platforms/cpu/src/CpuKernels.cpp
platforms/cpu/src/CpuKernels.cpp
+1
-1
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+155
-35
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+208
-57
platforms/cpu/tests/TestCpuNeighborList.cpp
platforms/cpu/tests/TestCpuNeighborList.cpp
+16
-5
No files found.
openmmapi/include/openmm/internal/vectorize.h
View file @
56902b45
...
@@ -88,6 +88,27 @@ public:
...
@@ -88,6 +88,27 @@ public:
fvec4
operator
-
()
const
{
fvec4
operator
-
()
const
{
return
_mm_sub_ps
(
_mm_set1_ps
(
0.0
f
),
val
);
return
_mm_sub_ps
(
_mm_set1_ps
(
0.0
f
),
val
);
}
}
fvec4
operator
&
(
__m128i
other
)
const
{
return
_mm_and_si128
(
val
,
other
);
}
fvec4
operator
==
(
fvec4
other
)
const
{
return
_mm_cmpeq_ps
(
val
,
other
);
}
fvec4
operator
!=
(
fvec4
other
)
const
{
return
_mm_cmpneq_ps
(
val
,
other
);
}
fvec4
operator
>
(
fvec4
other
)
const
{
return
_mm_cmpgt_ps
(
val
,
other
);
}
fvec4
operator
<
(
fvec4
other
)
const
{
return
_mm_cmplt_ps
(
val
,
other
);
}
fvec4
operator
>=
(
fvec4
other
)
const
{
return
_mm_cmpge_ps
(
val
,
other
);
}
fvec4
operator
<=
(
fvec4
other
)
const
{
return
_mm_cmple_ps
(
val
,
other
);
}
operator
ivec4
()
const
;
operator
ivec4
()
const
;
};
};
...
@@ -174,6 +195,11 @@ static inline fvec4 max(fvec4 v1, fvec4 v2) {
...
@@ -174,6 +195,11 @@ static inline fvec4 max(fvec4 v1, fvec4 v2) {
return
fvec4
(
_mm_max_ps
(
v1
.
val
,
v2
.
val
));
return
fvec4
(
_mm_max_ps
(
v1
.
val
,
v2
.
val
));
}
}
static
inline
fvec4
abs
(
fvec4
v
)
{
static
const
__m128
mask
=
_mm_castsi128_ps
(
_mm_set1_epi32
(
0x7FFFFFFF
));
return
fvec4
(
_mm_and_ps
(
v
.
val
,
mask
));
}
static
inline
fvec4
sqrt
(
fvec4
v
)
{
static
inline
fvec4
sqrt
(
fvec4
v
)
{
return
fvec4
(
_mm_sqrt_ps
(
v
.
val
));
return
fvec4
(
_mm_sqrt_ps
(
v
.
val
));
}
}
...
@@ -182,6 +208,10 @@ static inline float dot3(fvec4 v1, fvec4 v2) {
...
@@ -182,6 +208,10 @@ static inline float dot3(fvec4 v1, fvec4 v2) {
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0x71
));
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0x71
));
}
}
static
inline
float
dot4
(
fvec4
v1
,
fvec4
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0xF1
));
}
// Functions that operate on ivec4s.
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
ivec4
v1
,
ivec4
v2
)
{
static
inline
ivec4
min
(
ivec4
v1
,
ivec4
v2
)
{
...
@@ -192,5 +222,27 @@ static inline ivec4 max(ivec4 v1, ivec4 v2) {
...
@@ -192,5 +222,27 @@ static inline ivec4 max(ivec4 v1, ivec4 v2) {
return
ivec4
(
_mm_max_epi32
(
v1
.
val
,
v2
.
val
));
return
ivec4
(
_mm_max_epi32
(
v1
.
val
,
v2
.
val
));
}
}
static
inline
ivec4
abs
(
ivec4
v
)
{
return
ivec4
(
_mm_abs_epi32
(
v
.
val
));
}
// Mathematical operators involving a scalar and a vector.
static
inline
fvec4
operator
+
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
+
v2
;
}
static
inline
fvec4
operator
-
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
-
v2
;
}
static
inline
fvec4
operator
*
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
*
v2
;
}
static
inline
fvec4
operator
/
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
/
v2
;
}
#endif
/*OPENMM_VECTORIZE_H_*/
#endif
/*OPENMM_VECTORIZE_H_*/
platforms/cpu/include/CpuNeighborList.h
View file @
56902b45
...
@@ -13,19 +13,25 @@ class OPENMM_EXPORT_CPU CpuNeighborList {
...
@@ -13,19 +13,25 @@ class OPENMM_EXPORT_CPU CpuNeighborList {
public:
public:
class
ThreadData
;
class
ThreadData
;
class
VoxelHash
;
class
VoxelHash
;
static
const
int
BlockSize
;
CpuNeighborList
();
CpuNeighborList
();
~
CpuNeighborList
();
~
CpuNeighborList
();
void
computeNeighborList
(
int
numAtoms
,
const
std
::
vector
<
float
>&
atomLocations
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
void
computeNeighborList
(
int
numAtoms
,
const
std
::
vector
<
float
>&
atomLocations
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
);
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
);
const
std
::
vector
<
std
::
pair
<
int
,
int
>
>&
getNeighbors
();
int
getNumBlocks
()
const
;
const
std
::
vector
<
int
>&
getSortedAtoms
()
const
;
const
std
::
vector
<
int
>&
getBlockNeighbors
(
int
blockIndex
)
const
;
const
std
::
vector
<
char
>&
getBlockExclusions
(
int
blockIndex
)
const
;
/**
/**
* This routine contains the code executed by each thread.
* This routine contains the code executed by each thread.
*/
*/
void
runThread
(
int
index
,
std
::
vector
<
std
::
pair
<
int
,
int
>
>&
threadNeighbors
);
void
runThread
(
int
index
);
private:
private:
bool
isDeleted
;
bool
isDeleted
;
int
numThreads
,
waitCount
;
int
numThreads
,
waitCount
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
neighbors
;
std
::
vector
<
int
>
sortedAtoms
;
std
::
vector
<
std
::
vector
<
int
>
>
blockNeighbors
;
std
::
vector
<
std
::
vector
<
char
>
>
blockExclusions
;
std
::
vector
<
pthread_t
>
thread
;
std
::
vector
<
pthread_t
>
thread
;
std
::
vector
<
ThreadData
*>
threadData
;
std
::
vector
<
ThreadData
*>
threadData
;
pthread_cond_t
startCondition
,
endCondition
;
pthread_cond_t
startCondition
,
endCondition
;
...
...
platforms/cpu/include/CpuNonbondedForce.h
View file @
56902b45
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
#ifndef OPENMM_CPU_NONBONDED_FORCE_H__
#ifndef OPENMM_CPU_NONBONDED_FORCE_H__
#define OPENMM_CPU_NONBONDED_FORCE_H__
#define OPENMM_CPU_NONBONDED_FORCE_H__
#include "CpuNeighborList.h"
#include "ReferencePairIxn.h"
#include "ReferencePairIxn.h"
#include "openmm/internal/vectorize.h"
#include "openmm/internal/vectorize.h"
#include <pthread.h>
#include <pthread.h>
...
@@ -33,6 +34,8 @@
...
@@ -33,6 +34,8 @@
#include <vector>
#include <vector>
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
namespace
OpenMM
{
class
CpuNonbondedForce
{
class
CpuNonbondedForce
{
public:
public:
class
ThreadData
;
class
ThreadData
;
...
@@ -63,7 +66,7 @@ class CpuNonbondedForce {
...
@@ -63,7 +66,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
setUseCutoff
(
float
distance
,
const
std
::
vector
<
std
::
pair
<
int
,
int
>
>
&
neighbors
,
float
solventDielectric
);
void
setUseCutoff
(
float
distance
,
const
CpuNeighborList
&
neighbors
,
float
solventDielectric
);
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
...
@@ -127,9 +130,9 @@ class CpuNonbondedForce {
...
@@ -127,9 +130,9 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
calculateReciprocalIxn
(
int
numberOfAtoms
,
float
*
posq
,
std
::
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
void
calculateReciprocalIxn
(
int
numberOfAtoms
,
float
*
posq
,
std
::
vector
<
RealVec
>&
atomCoordinates
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
std
::
vector
<
OpenMM
::
RealVec
>&
forces
,
float
*
totalEnergy
)
const
;
std
::
vector
<
RealVec
>&
forces
,
float
*
totalEnergy
)
const
;
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
...
@@ -159,14 +162,14 @@ private:
...
@@ -159,14 +162,14 @@ private:
bool
periodic
;
bool
periodic
;
bool
ewald
;
bool
ewald
;
bool
pme
;
bool
pme
;
const
std
::
vector
<
std
::
pair
<
int
,
int
>
>
*
neighborList
;
const
CpuNeighborList
*
neighborList
;
float
periodicBoxSize
[
3
];
float
periodicBoxSize
[
3
];
float
cutoffDistance
,
switchingDistance
;
float
cutoffDistance
,
switchingDistance
;
float
krf
,
crf
;
float
krf
,
crf
;
float
alphaEwald
;
float
alphaEwald
;
int
numRx
,
numRy
,
numRz
;
int
numRx
,
numRy
,
numRz
;
int
meshDim
[
3
];
int
meshDim
[
3
];
std
::
vector
<
float
>
ewaldScale
X
,
ewaldScaleY
,
ewaldScaleDeriv
;
std
::
vector
<
float
>
ewaldScale
Table
;
float
ewaldDX
,
ewaldDXInv
;
float
ewaldDX
,
ewaldDXInv
;
bool
isDeleted
;
bool
isDeleted
;
int
numThreads
,
waitCount
;
int
numThreads
,
waitCount
;
...
@@ -199,16 +202,27 @@ private:
...
@@ -199,16 +202,27 @@ private:
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Calculate
LJ Coulomb pair ixn between two atoms
Calculate
all the interactions for one atom block.
@param atom1 the index of the first atom
@param blockIndex the index of the atom block
@param atom2 the index of the second atom
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param forces force array (forces added)
@param totalEnergy total energy
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
calculate
One
EwaldIxn
(
int
atom1
,
int
atom2
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
void
calculate
Block
EwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**
/**
* Compute the displacement and squared distance between two points, optionally using
* Compute the displacement and squared distance between two points, optionally using
...
@@ -219,7 +233,7 @@ private:
...
@@ -219,7 +233,7 @@ private:
/**
/**
* Compute a fast approximation to erfc(x).
* Compute a fast approximation to erfc(x).
*/
*/
static
f
loat
erfcApprox
(
f
loat
x
);
static
f
vec4
erfcApprox
(
f
vec4
x
);
/**
/**
* Create a lookup table for the scale factor used with Ewald and PME.
* Create a lookup table for the scale factor used with Ewald and PME.
...
@@ -229,9 +243,11 @@ private:
...
@@ -229,9 +243,11 @@ private:
/**
/**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
*/
f
loat
ewaldScaleFunction
(
f
loat
x
);
f
vec4
ewaldScaleFunction
(
f
vec4
x
);
};
};
}
// namespace OpenMM
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
#endif // OPENMM_CPU_NONBONDED_FORCE_H__
#endif // OPENMM_CPU_NONBONDED_FORCE_H__
platforms/cpu/src/CpuKernels.cpp
View file @
56902b45
...
@@ -234,7 +234,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -234,7 +234,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
neighborList
.
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
floatBoxSize
,
periodic
||
ewald
||
pme
,
nonbondedCutoff
+
padding
);
neighborList
.
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
floatBoxSize
,
periodic
||
ewald
||
pme
,
nonbondedCutoff
+
padding
);
lastPositions
=
posData
;
lastPositions
=
posData
;
}
}
nonbonded
.
setUseCutoff
(
nonbondedCutoff
,
neighborList
.
getNeighbors
()
,
rfDielectric
);
nonbonded
.
setUseCutoff
(
nonbondedCutoff
,
neighborList
,
rfDielectric
);
}
}
if
(
periodic
||
ewald
||
pme
)
{
if
(
periodic
||
ewald
||
pme
)
{
double
minAllowedSize
=
1.999999
*
nonbondedCutoff
;
double
minAllowedSize
=
1.999999
*
nonbondedCutoff
;
...
...
platforms/cpu/src/CpuNeighborList.cpp
View file @
56902b45
#include "CpuNeighborList.h"
#include "CpuNeighborList.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/vectorize.h"
#include "openmm/internal/vectorize.h"
#include "hilbert.h"
#include <algorithm>
#include <algorithm>
#include <set>
#include <set>
#include <map>
#include <map>
...
@@ -11,6 +12,8 @@ using namespace std;
...
@@ -11,6 +12,8 @@ using namespace std;
namespace
OpenMM
{
namespace
OpenMM
{
const
int
CpuNeighborList
::
BlockSize
=
4
;
class
VoxelIndex
class
VoxelIndex
{
{
public:
public:
...
@@ -77,23 +80,22 @@ public:
...
@@ -77,23 +80,22 @@ public:
return
VoxelIndex
(
x
,
y
,
z
);
return
VoxelIndex
(
x
,
y
,
z
);
}
}
void
getNeighbors
(
vector
<
pair
<
int
,
int
>
>&
neighbors
,
const
VoxelItem
&
referencePoint
,
const
vector
<
set
<
int
>
>&
exclusions
,
float
maxDistance
)
const
{
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
fvec4
blockCenter
,
fvec4
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>
blockAtoms
,
const
float
*
atomLocations
)
const
{
neighbors
.
resize
(
0
);
// Loop over neighboring voxels
exclusions
.
resize
(
0
);
// TODO use more clever selection of neighboring voxels
const
int
atomI
=
referencePoint
.
second
;
const
float
*
locationI
=
referencePoint
.
first
;
fvec4
posI
(
locationI
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
(
1
/
periodicBoxSize
[
0
],
1
/
periodicBoxSize
[
1
],
1
/
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
(
1
/
periodicBoxSize
[
0
],
1
/
periodicBoxSize
[
1
],
1
/
periodicBoxSize
[
2
],
0
);
float
maxDistanceSquared
=
maxDistance
*
maxDistance
;
float
maxDistanceSquared
=
maxDistance
*
maxDistance
;
float
refineCutoff
=
maxDistance
-
max
(
max
(
blockWidth
[
0
],
blockWidth
[
1
]),
blockWidth
[
2
]);
int
dIndexX
=
int
(
maxDistance
/
voxelSizeX
)
+
1
;
// How may voxels away do we have to look?
float
refineCutoffSquared
=
refineCutoff
*
refineCutoff
;
int
dIndexY
=
int
(
maxDistance
/
voxelSizeY
)
+
1
;
int
dIndexZ
=
int
(
maxDistance
/
voxelSizeZ
)
+
1
;
int
dIndexX
=
int
((
maxDistance
+
blockWidth
[
0
])
/
voxelSizeX
)
+
1
;
// How may voxels away do we have to look?
VoxelIndex
centerVoxelIndex
=
getVoxelIndex
(
locationI
);
int
dIndexY
=
int
((
maxDistance
+
blockWidth
[
1
])
/
voxelSizeY
)
+
1
;
int
dIndexZ
=
int
((
maxDistance
+
blockWidth
[
2
])
/
voxelSizeZ
)
+
1
;
float
centerPos
[
4
];
blockCenter
.
store
(
centerPos
);
VoxelIndex
centerVoxelIndex
=
getVoxelIndex
(
centerPos
);
int
lastx
=
centerVoxelIndex
.
x
+
dIndexX
;
int
lastx
=
centerVoxelIndex
.
x
+
dIndexX
;
int
lasty
=
centerVoxelIndex
.
y
+
dIndexY
;
int
lasty
=
centerVoxelIndex
.
y
+
dIndexY
;
int
lastz
=
centerVoxelIndex
.
z
+
dIndexZ
;
int
lastz
=
centerVoxelIndex
.
z
+
dIndexZ
;
...
@@ -102,6 +104,7 @@ public:
...
@@ -102,6 +104,7 @@ public:
lasty
=
min
(
lasty
,
centerVoxelIndex
.
y
-
dIndexY
+
ny
-
1
);
lasty
=
min
(
lasty
,
centerVoxelIndex
.
y
-
dIndexY
+
ny
-
1
);
lastz
=
min
(
lastz
,
centerVoxelIndex
.
z
-
dIndexZ
+
nz
-
1
);
lastz
=
min
(
lastz
,
centerVoxelIndex
.
z
-
dIndexZ
+
nz
-
1
);
}
}
int
lastSortedIndex
=
BlockSize
*
(
blockIndex
+
1
);
VoxelIndex
voxelIndex
(
0
,
0
,
0
);
VoxelIndex
voxelIndex
(
0
,
0
,
0
);
for
(
int
x
=
centerVoxelIndex
.
x
-
dIndexX
;
x
<=
lastx
;
++
x
)
{
for
(
int
x
=
centerVoxelIndex
.
x
-
dIndexX
;
x
<=
lastx
;
++
x
)
{
voxelIndex
.
x
=
x
;
voxelIndex
.
x
=
x
;
...
@@ -120,27 +123,52 @@ public:
...
@@ -120,27 +123,52 @@ public:
continue
;
// no such voxel; skip
continue
;
// no such voxel; skip
const
Voxel
&
voxel
=
voxelEntry
->
second
;
const
Voxel
&
voxel
=
voxelEntry
->
second
;
for
(
Voxel
::
const_iterator
itemIter
=
voxel
.
begin
();
itemIter
!=
voxel
.
end
();
++
itemIter
)
{
for
(
Voxel
::
const_iterator
itemIter
=
voxel
.
begin
();
itemIter
!=
voxel
.
end
();
++
itemIter
)
{
const
int
atomJ
=
itemIter
->
second
;
const
int
sortedIndex
=
itemIter
->
second
;
// Avoid duplicate entries.
// Avoid duplicate entries.
if
(
atomJ
>=
atomI
)
if
(
sortedIndex
>=
lastSortedIndex
)
break
;
break
;
fvec4
p
os
J
(
itemIter
->
first
);
fvec4
atomP
os
(
itemIter
->
first
);
fvec4
delta
=
posJ
-
p
os
I
;
fvec4
delta
=
atomPos
-
centerP
os
;
if
(
usePeriodic
)
{
if
(
usePeriodic
)
{
fvec4
base
=
round
(
delta
*
invBoxSize
)
*
boxSize
;
fvec4
base
=
round
(
delta
*
invBoxSize
)
*
boxSize
;
delta
=
delta
-
base
;
delta
=
delta
-
base
;
}
}
delta
=
max
(
0.0
f
,
abs
(
delta
)
-
blockWidth
);
float
dSquared
=
dot3
(
delta
,
delta
);
float
dSquared
=
dot3
(
delta
,
delta
);
if
(
dSquared
>
maxDistanceSquared
)
if
(
dSquared
>
maxDistanceSquared
)
continue
;
continue
;
// Ignore exclusions.
if
(
dSquared
>
refineCutoffSquared
)
{
if
(
exclusions
[
atomI
].
find
(
atomJ
)
!=
exclusions
[
atomI
].
end
())
// The distance is large enough that there might not be any actual interactions.
// Check individual atom pairs to be sure.
bool
any
=
false
;
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
++
)
{
fvec4
pos1
(
&
atomLocations
[
4
*
blockAtoms
[
k
]]);
delta
=
atomPos
-
pos1
;
if
(
usePeriodic
)
{
fvec4
base
=
round
(
delta
*
invBoxSize
)
*
boxSize
;
delta
=
delta
-
base
;
}
float
r2
=
dot3
(
delta
,
delta
);
if
(
r2
<
maxDistanceSquared
)
{
any
=
true
;
break
;
}
}
if
(
!
any
)
continue
;
continue
;
}
// Add this atom to the list of neighbors.
neighbors
.
push_back
(
make_pair
(
atomI
,
atomJ
));
neighbors
.
push_back
(
sortedAtoms
[
sortedIndex
]);
if
(
sortedIndex
<
BlockSize
*
blockIndex
)
exclusions
.
push_back
(
0
);
else
exclusions
.
push_back
(
0xF
&
(
0xF
<<
(
sortedIndex
-
BlockSize
*
blockIndex
)));
}
}
}
}
}
}
...
@@ -161,12 +189,11 @@ public:
...
@@ -161,12 +189,11 @@ public:
}
}
int
index
;
int
index
;
CpuNeighborList
&
owner
;
CpuNeighborList
&
owner
;
vector
<
pair
<
int
,
int
>
>
threadNeighbors
;
};
};
static
void
*
threadBody
(
void
*
args
)
{
static
void
*
threadBody
(
void
*
args
)
{
CpuNeighborList
::
ThreadData
&
data
=
*
reinterpret_cast
<
CpuNeighborList
::
ThreadData
*>
(
args
);
CpuNeighborList
::
ThreadData
&
data
=
*
reinterpret_cast
<
CpuNeighborList
::
ThreadData
*>
(
args
);
data
.
owner
.
runThread
(
data
.
index
,
data
.
threadNeighbors
);
data
.
owner
.
runThread
(
data
.
index
);
delete
&
data
;
delete
&
data
;
return
0
;
return
0
;
}
}
...
@@ -204,6 +231,45 @@ CpuNeighborList::~CpuNeighborList() {
...
@@ -204,6 +231,45 @@ CpuNeighborList::~CpuNeighborList() {
void
CpuNeighborList
::
computeNeighborList
(
int
numAtoms
,
const
vector
<
float
>&
atomLocations
,
const
vector
<
set
<
int
>
>&
exclusions
,
void
CpuNeighborList
::
computeNeighborList
(
int
numAtoms
,
const
vector
<
float
>&
atomLocations
,
const
vector
<
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
)
{
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
)
{
int
numBlocks
=
(
numAtoms
+
BlockSize
-
1
)
/
BlockSize
;
blockNeighbors
.
resize
(
numBlocks
);
blockExclusions
.
resize
(
numBlocks
);
sortedAtoms
.
resize
(
numAtoms
);
// Sort the atoms based on a Hilbert curve.
float
minx
=
atomLocations
[
0
],
maxx
=
atomLocations
[
0
];
float
miny
=
atomLocations
[
1
],
maxy
=
atomLocations
[
1
];
float
minz
=
atomLocations
[
2
],
maxz
=
atomLocations
[
2
];
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
const
float
*
pos
=
&
atomLocations
[
4
*
i
];
if
(
pos
[
0
]
<
minx
)
minx
=
pos
[
0
];
if
(
pos
[
1
]
<
miny
)
miny
=
pos
[
1
];
if
(
pos
[
2
]
<
minz
)
minz
=
pos
[
2
];
if
(
pos
[
0
]
>
maxx
)
maxx
=
pos
[
0
];
if
(
pos
[
1
]
>
maxy
)
maxy
=
pos
[
1
];
if
(
pos
[
2
]
>
maxz
)
maxz
=
pos
[
2
];
}
float
binWidth
=
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
f
;
float
invBinWidth
=
1.0
f
/
binWidth
;
vector
<
pair
<
int
,
int
>
>
atomBins
(
numAtoms
);
bitmask_t
coords
[
3
];
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
const
float
*
pos
=
&
atomLocations
[
4
*
i
];
coords
[
0
]
=
(
bitmask_t
)
((
pos
[
0
]
-
minx
)
*
invBinWidth
);
coords
[
1
]
=
(
bitmask_t
)
((
pos
[
1
]
-
miny
)
*
invBinWidth
);
coords
[
2
]
=
(
bitmask_t
)
((
pos
[
2
]
-
minz
)
*
invBinWidth
);
int
bin
=
(
int
)
hilbert_c2i
(
3
,
8
,
coords
);
atomBins
[
i
]
=
pair
<
int
,
int
>
(
bin
,
i
);
}
sort
(
atomBins
.
begin
(),
atomBins
.
end
());
// Build the voxel hash.
// Build the voxel hash.
float
edgeSizeX
,
edgeSizeY
,
edgeSizeZ
;
float
edgeSizeX
,
edgeSizeY
,
edgeSizeZ
;
...
@@ -215,8 +281,11 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato
...
@@ -215,8 +281,11 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato
edgeSizeZ
=
0.5
f
*
periodicBoxSize
[
2
]
/
floorf
(
periodicBoxSize
[
2
]
/
maxDistance
);
edgeSizeZ
=
0.5
f
*
periodicBoxSize
[
2
]
/
floorf
(
periodicBoxSize
[
2
]
/
maxDistance
);
}
}
VoxelHash
voxelHash
(
edgeSizeX
,
edgeSizeY
,
edgeSizeZ
,
periodicBoxSize
,
usePeriodic
);
VoxelHash
voxelHash
(
edgeSizeX
,
edgeSizeY
,
edgeSizeZ
,
periodicBoxSize
,
usePeriodic
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
voxelHash
.
insert
(
i
,
&
atomLocations
[
4
*
i
]);
int
atomIndex
=
atomBins
[
i
].
second
;
sortedAtoms
[
i
]
=
atomIndex
;
voxelHash
.
insert
(
i
,
&
atomLocations
[
4
*
atomIndex
]);
}
// Record the parameters for the threads.
// Record the parameters for the threads.
...
@@ -237,18 +306,37 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato
...
@@ -237,18 +306,37 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato
pthread_cond_wait
(
&
endCondition
,
&
lock
);
pthread_cond_wait
(
&
endCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
pthread_mutex_unlock
(
&
lock
);
//
Combine the results from all the threads
.
//
Add padding atoms to fill up the last block
.
neighbors
.
clear
();
int
numPadding
=
numBlocks
*
BlockSize
-
numAtoms
;
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
if
(
numPadding
>
0
)
{
neighbors
.
insert
(
neighbors
.
end
(),
threadData
[
i
]
->
threadNeighbors
.
begin
(),
threadData
[
i
]
->
threadNeighbors
.
end
());
char
mask
=
(
0xF0
>>
numPadding
)
&
0xF
;
for
(
int
i
=
0
;
i
<
numPadding
;
i
++
)
sortedAtoms
.
push_back
(
0
);
vector
<
char
>&
exc
=
blockExclusions
[
blockExclusions
.
size
()
-
1
];
for
(
int
i
=
0
;
i
<
(
int
)
exc
.
size
();
i
++
)
exc
[
i
]
|=
mask
;
}
}
int
CpuNeighborList
::
getNumBlocks
()
const
{
return
sortedAtoms
.
size
()
/
BlockSize
;
}
const
std
::
vector
<
int
>&
CpuNeighborList
::
getSortedAtoms
()
const
{
return
sortedAtoms
;
}
}
const
vector
<
pair
<
int
,
int
>
>&
CpuNeighborList
::
getNeighbors
(
)
{
const
std
::
vector
<
int
>&
CpuNeighborList
::
get
Block
Neighbors
(
int
blockIndex
)
const
{
return
n
eighbors
;
return
blockN
eighbors
[
blockIndex
]
;
}
}
void
CpuNeighborList
::
runThread
(
int
index
,
vector
<
pair
<
int
,
int
>
>&
threadNeighbors
)
{
const
std
::
vector
<
char
>&
CpuNeighborList
::
getBlockExclusions
(
int
blockIndex
)
const
{
return
blockExclusions
[
blockIndex
];
}
void
CpuNeighborList
::
runThread
(
int
index
)
{
while
(
true
)
{
while
(
true
)
{
// Wait for the signal to start running.
// Wait for the signal to start running.
...
@@ -262,9 +350,41 @@ void CpuNeighborList::runThread(int index, vector<pair<int, int> >& threadNeighb
...
@@ -262,9 +350,41 @@ void CpuNeighborList::runThread(int index, vector<pair<int, int> >& threadNeighb
// Compute this thread's subset of neighbors.
// Compute this thread's subset of neighbors.
threadNeighbors
.
clear
();
int
numBlocks
=
blockNeighbors
.
size
();
for
(
int
i
=
index
;
i
<
numAtoms
;
i
+=
numThreads
)
vector
<
int
>
blockAtoms
;
voxelHash
->
getNeighbors
(
threadNeighbors
,
VoxelItem
(
&
atomLocations
[
4
*
i
],
i
),
*
exclusions
,
maxDistance
);
for
(
int
i
=
index
;
i
<
numBlocks
;
i
+=
numThreads
)
{
{
int
firstIndex
=
BlockSize
*
i
;
int
atomsInBlock
=
min
(
BlockSize
,
numAtoms
-
firstIndex
);
blockAtoms
.
resize
(
atomsInBlock
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
}
int
firstIndex
=
BlockSize
*
i
;
fvec4
minPos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
]]);
fvec4
maxPos
=
minPos
;
int
atomsInBlock
=
min
(
BlockSize
,
numAtoms
-
firstIndex
);
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
fvec4
pos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
+
j
]]);
minPos
=
min
(
minPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
}
voxelHash
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
atomLocations
);
// Record the exclusions for this block.
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
{
const
set
<
int
>&
atomExclusions
=
(
*
exclusions
)[
sortedAtoms
[
firstIndex
+
j
]];
char
mask
=
1
<<
j
;
for
(
int
k
=
0
;
k
<
(
int
)
blockNeighbors
[
i
].
size
();
k
++
)
{
int
atomIndex
=
blockNeighbors
[
i
][
k
];
if
(
atomExclusions
.
find
(
atomIndex
)
!=
atomExclusions
.
end
())
blockExclusions
[
i
][
k
]
|=
mask
;
}
}
}
}
}
}
}
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
56902b45
...
@@ -114,7 +114,7 @@ CpuNonbondedForce::~CpuNonbondedForce(){
...
@@ -114,7 +114,7 @@ CpuNonbondedForce::~CpuNonbondedForce(){
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForce
::
setUseCutoff
(
float
distance
,
const
vector
<
pair
<
int
,
int
>
>
&
neighbors
,
float
solventDielectric
)
{
void
CpuNonbondedForce
::
setUseCutoff
(
float
distance
,
const
CpuNeighborList
&
neighbors
,
float
solventDielectric
)
{
cutoff
=
true
;
cutoff
=
true
;
cutoffDistance
=
distance
;
cutoffDistance
=
distance
;
...
@@ -200,23 +200,22 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
...
@@ -200,23 +200,22 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
void
CpuNonbondedForce
::
tabulateEwaldScaleFactor
()
{
void
CpuNonbondedForce
::
tabulateEwaldScaleFactor
()
{
ewaldDX
=
cutoffDistance
/
(
NUM_TABLE_POINTS
-
2
);
ewaldDX
=
cutoffDistance
/
(
NUM_TABLE_POINTS
-
2
);
ewaldDXInv
=
1.0
f
/
ewaldDX
;
ewaldDXInv
=
1.0
f
/
ewaldDX
;
vector
<
double
>
x
(
NUM_TABLE_POINTS
);
vector
<
double
>
x
(
NUM_TABLE_POINTS
+
1
);
vector
<
double
>
y
(
NUM_TABLE_POINTS
);
vector
<
double
>
y
(
NUM_TABLE_POINTS
+
1
);
vector
<
double
>
deriv
;
vector
<
double
>
deriv
;
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
;
i
++
)
{
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
1
;
i
++
)
{
double
r
=
i
*
cutoffDistance
/
(
NUM_TABLE_POINTS
-
2
);
double
r
=
i
*
cutoffDistance
/
(
NUM_TABLE_POINTS
-
2
);
double
alphaR
=
alphaEwald
*
r
;
double
alphaR
=
alphaEwald
*
r
;
x
[
i
]
=
r
;
x
[
i
]
=
r
;
y
[
i
]
=
erfc
(
alphaR
)
+
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
);
y
[
i
]
=
erfc
(
alphaR
)
+
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
);
}
}
SplineFitter
::
createNaturalSpline
(
x
,
y
,
deriv
);
SplineFitter
::
createNaturalSpline
(
x
,
y
,
deriv
);
ewaldScaleX
.
resize
(
NUM_TABLE_POINTS
);
ewaldScaleTable
.
resize
(
4
*
NUM_TABLE_POINTS
);
ewaldScaleY
.
resize
(
NUM_TABLE_POINTS
);
ewaldScaleDeriv
.
resize
(
NUM_TABLE_POINTS
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
;
i
++
)
{
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
;
i
++
)
{
ewaldScaleX
[
i
]
=
(
float
)
x
[
i
];
ewaldScaleTable
[
4
*
i
]
=
(
float
)
y
[
i
];
ewaldScaleY
[
i
]
=
(
float
)
y
[
i
];
ewaldScaleTable
[
4
*
i
+
1
]
=
(
float
)
y
[
i
+
1
];
ewaldScaleDeriv
[
i
]
=
(
float
)
(
deriv
[
i
]
*
ewaldDX
*
ewaldDX
/
6
);
ewaldScaleTable
[
4
*
i
+
2
]
=
(
float
)
(
deriv
[
i
]
*
ewaldDX
*
ewaldDX
/
6
);
ewaldScaleTable
[
4
*
i
+
3
]
=
(
float
)
(
deriv
[
i
+
1
]
*
ewaldDX
*
ewaldDX
/
6
);
}
}
}
}
...
@@ -384,7 +383,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
...
@@ -384,7 +383,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
float
inverseR
=
1
/
r
;
float
inverseR
=
1
/
r
;
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
ii
+
3
]
*
posq
[
4
*
jj
+
3
];
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
ii
+
3
]
*
posq
[
4
*
jj
+
3
];
float
alphaR
=
alphaEwald
*
r
;
float
alphaR
=
alphaEwald
*
r
;
float
erfcAlphaR
=
erfcApprox
(
alphaR
);
float
erfcAlphaR
=
erfcApprox
(
alphaR
)
[
0
]
;
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erfcAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erfcAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
fvec4
result
=
deltaR
*
dEdR
;
fvec4
result
=
deltaR
*
dEdR
;
...
@@ -424,18 +423,14 @@ void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double&
...
@@ -424,18 +423,14 @@ void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double&
if
(
ewald
||
pme
)
{
if
(
ewald
||
pme
)
{
// Compute the interactions from the neighbor list.
// Compute the interactions from the neighbor list.
for
(
int
i
=
index
;
i
<
(
int
)
neighborList
->
size
();
i
+=
numThreads
)
{
for
(
int
i
=
index
;
i
<
neighborList
->
getNumBlocks
();
i
+=
numThreads
)
pair
<
int
,
int
>
pair
=
(
*
neighborList
)[
i
];
calculateBlockEwaldIxn
(
i
,
&
threadForce
[
0
],
energyPtr
,
boxSize
,
invBoxSize
);
calculateOneEwaldIxn
(
pair
.
first
,
pair
.
second
,
&
threadForce
[
0
],
energyPtr
,
boxSize
,
invBoxSize
);
}
}
}
else
if
(
cutoff
)
{
else
if
(
cutoff
)
{
// Compute the interactions from the neighbor list.
// Compute the interactions from the neighbor list.
for
(
int
i
=
index
;
i
<
(
int
)
neighborList
->
size
();
i
+=
numThreads
)
{
for
(
int
i
=
index
;
i
<
neighborList
->
getNumBlocks
();
i
+=
numThreads
)
pair
<
int
,
int
>
pair
=
(
*
neighborList
)[
i
];
calculateBlockIxn
(
i
,
&
threadForce
[
0
],
energyPtr
,
boxSize
,
invBoxSize
);
calculateOneIxn
(
pair
.
first
,
pair
.
second
,
&
threadForce
[
0
],
energyPtr
,
boxSize
,
invBoxSize
);
}
}
}
else
{
else
{
// Loop over all atom pairs
// Loop over all atom pairs
...
@@ -503,49 +498,196 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
...
@@ -503,49 +498,196 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
(
fvec4
(
forces
+
4
*
jj
)
-
result
).
store
(
forces
+
4
*
jj
);
(
fvec4
(
forces
+
4
*
jj
)
-
result
).
store
(
forces
+
4
*
jj
);
}
}
void
CpuNonbondedForce
::
calculateOneEwaldIxn
(
int
ii
,
int
jj
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForce
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
fvec4
deltaR
;
// Load the positions and parameters of the atoms in the block.
fvec4
posI
(
posq
+
4
*
ii
);
fvec4
posJ
(
posq
+
4
*
jj
);
int
blockAtom
[
4
];
float
r2
;
fvec4
blockAtomPosq
[
4
];
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
true
,
boxSize
,
invBoxSize
);
fvec4
blockAtomForce
[
4
];
if
(
r2
<
cutoffDistance
*
cutoffDistance
)
{
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
float
r
=
sqrtf
(
r2
);
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
float
inverseR
=
1
/
r
;
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
float
switchValue
=
1
,
switchDeriv
=
0
;
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
if
(
useSwitch
&&
r
>
switchingDistance
)
{
float
t
=
(
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
);
switchValue
=
1
+
t
*
t
*
t
*
(
-
10
+
t
*
(
15
-
t
*
6
));
switchDeriv
=
t
*
t
*
(
-
30
+
t
*
(
60
-
t
*
30
))
/
(
cutoffDistance
-
switchingDistance
);
}
}
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
ii
+
3
]
*
posq
[
4
*
jj
+
3
];
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
float
dEdR
=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
float
sig
=
atomParameters
[
ii
].
first
+
atomParameters
[
jj
].
first
;
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
float
sig2
=
inverseR
*
sig
;
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
float
blockAtomR2
[
4
];
bool
include
[
4
];
fvec4
blockAtomDelta
[
4
];
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
bool
any
=
false
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
getDeltaR
(
atomPosq
,
blockAtomPosq
[
j
],
blockAtomDelta
[
j
],
blockAtomR2
[
j
],
periodic
,
boxSize
,
invBoxSize
);
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
(
!
cutoff
||
blockAtomR2
[
j
]
<
cutoffDistance
*
cutoffDistance
));
any
|=
include
[
j
];
}
if
(
!
any
)
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r2
(
blockAtomR2
);
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
switchValue
(
1.0
f
),
switchDeriv
(
0.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
}
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
float
eps
=
atomParameters
[
ii
].
second
*
atomParameters
[
jj
].
second
;
fvec4
eps
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
;
dEdR
+=
switchValue
*
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
fvec4
dEdR
=
switchValue
*
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
f
loat
energy
=
eps
*
(
sig6
-
1.0
f
)
*
sig6
;
f
vec4
energy
=
eps
*
(
sig6
-
1.0
f
)
*
sig6
;
if
(
useSwitch
)
{
if
(
useSwitch
)
{
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
energy
*=
switchValue
;
}
}
//
a
ccumulate
forc
es
//
A
ccumulate
energi
es
.
fvec4
result
=
deltaR
*
dEdR
;
if
(
totalEnergy
)
{
(
fvec4
(
forces
+
4
*
ii
)
+
result
).
store
(
forces
+
4
*
ii
);
if
(
cutoff
)
(
fvec4
(
forces
+
4
*
jj
)
-
result
).
store
(
forces
+
4
*
jj
);
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
if
(
include
[
j
])
*
totalEnergy
+=
energy
[
j
];
}
// accumulate energies
// Accumulate forces.
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
fvec4
result
=
blockAtomDelta
[
j
]
*
dEdR
[
j
];
blockAtomForce
[
j
]
+=
result
;
atomForce
-=
result
;
}
}
atomForce
.
store
(
forces
+
4
*
atom
);
}
// Record the forces on the block atoms.
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForce
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForce
[
4
];
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
float
blockAtomR2
[
4
];
bool
include
[
4
];
fvec4
blockAtomDelta
[
4
];
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
bool
any
=
false
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
getDeltaR
(
atomPosq
,
blockAtomPosq
[
j
],
blockAtomDelta
[
j
],
blockAtomR2
[
j
],
periodic
,
boxSize
,
invBoxSize
);
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
blockAtomR2
[
j
]
<
cutoffDistance
*
cutoffDistance
);
any
|=
include
[
j
];
}
if
(
!
any
)
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r2
(
blockAtomR2
);
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
switchValue
(
1.0
f
),
switchDeriv
(
0.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
fvec4
dEdR
=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
eps
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
;
dEdR
+=
switchValue
*
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
dEdR
*=
inverseR
*
inverseR
;
fvec4
energy
=
eps
*
(
sig6
-
1.0
f
)
*
sig6
;
if
(
useSwitch
)
{
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
}
// Accumulate energies.
if
(
totalEnergy
)
{
if
(
totalEnergy
)
{
energy
+=
(
float
)
(
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
));
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
*
totalEnergy
+=
energy
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
if
(
include
[
j
])
*
totalEnergy
+=
energy
[
j
];
}
}
// Accumulate forces.
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
fvec4
result
=
blockAtomDelta
[
j
]
*
dEdR
[
j
];
blockAtomForce
[
j
]
+=
result
;
atomForce
-=
result
;
}
}
}
atomForce
.
store
(
forces
+
4
*
atom
);
}
// Record the forces on the block atoms.
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
}
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
...
@@ -557,24 +699,33 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d
...
@@ -557,24 +699,33 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d
r2
=
dot3
(
deltaR
,
deltaR
);
r2
=
dot3
(
deltaR
,
deltaR
);
}
}
f
loat
CpuNonbondedForce
::
erfcApprox
(
f
loat
x
)
{
f
vec4
CpuNonbondedForce
::
erfcApprox
(
f
vec4
x
)
{
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 3e-7.
// error of 3e-7.
f
loat
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
f
vec4
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
return
1.0
f
/
(
t
*
t
);
return
1.0
f
/
(
t
*
t
);
}
}
f
loat
CpuNonbondedForce
::
ewaldScaleFunction
(
f
loat
x
)
{
f
vec4
CpuNonbondedForce
::
ewaldScaleFunction
(
f
vec4
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
int
lower
=
(
int
)
(
x
*
ewaldDXInv
);
float
y
[
4
];
int
upper
=
lower
+
1
;
fvec4
x1
=
x
*
ewaldDXInv
;
float
a
=
(
ewaldScaleX
[
upper
]
-
x
)
*
ewaldDXInv
;
ivec4
index
=
floor
(
x1
);
float
b
=
1.0
f
-
a
;
fvec4
coeff
[
4
];
return
a
*
ewaldScaleY
[
lower
]
+
b
*
ewaldScaleY
[
upper
]
+
((
a
*
a
*
a
-
a
)
*
ewaldScaleDeriv
[
lower
]
+
(
b
*
b
*
b
-
b
)
*
ewaldScaleDeriv
[
upper
]);
coeff
[
1
]
=
x1
-
index
;
coeff
[
0
]
=
1.0
f
-
coeff
[
1
];
coeff
[
2
]
=
coeff
[
0
]
*
coeff
[
0
]
*
coeff
[
0
]
-
coeff
[
0
];
coeff
[
3
]
=
coeff
[
1
]
*
coeff
[
1
]
*
coeff
[
1
]
-
coeff
[
1
];
_MM_TRANSPOSE4_PS
(
coeff
[
0
],
coeff
[
1
],
coeff
[
2
],
coeff
[
3
]);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
if
(
index
[
i
]
<
NUM_TABLE_POINTS
)
y
[
i
]
=
dot4
(
coeff
[
i
],
fvec4
(
&
ewaldScaleTable
[
4
*
index
[
i
]]));
}
return
fvec4
(
y
);
}
}
platforms/cpu/tests/TestCpuNeighborList.cpp
View file @
56902b45
...
@@ -39,6 +39,7 @@
...
@@ -39,6 +39,7 @@
#include "sfmt/SFMT.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <iostream>
#include <set>
#include <set>
#include <utility>
#include <vector>
#include <vector>
using
namespace
OpenMM
;
using
namespace
OpenMM
;
...
@@ -68,11 +69,20 @@ void testNeighborList(bool periodic) {
...
@@ -68,11 +69,20 @@ void testNeighborList(bool periodic) {
// Convert the neighbor list to a set for faster lookup.
// Convert the neighbor list to a set for faster lookup.
set
<
pair
<
int
,
int
>
>
neighbors
;
set
<
pair
<
int
,
int
>
>
neighbors
;
for
(
int
i
=
0
;
i
<
(
int
)
neighborList
.
getNeighbors
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
neighborList
.
getSortedAtoms
().
size
();
i
++
)
{
pair
<
int
,
int
>
entry
=
neighborList
.
getNeighbors
()[
i
];
int
blockIndex
=
i
/
CpuNeighborList
::
BlockSize
;
int
indexInBlock
=
i
-
blockIndex
*
CpuNeighborList
::
BlockSize
;
char
mask
=
1
<<
indexInBlock
;
for
(
int
j
=
0
;
j
<
(
int
)
neighborList
.
getBlockExclusions
(
blockIndex
).
size
();
j
++
)
{
if
((
neighborList
.
getBlockExclusions
(
blockIndex
)[
j
]
&
mask
)
==
0
)
{
int
atom1
=
neighborList
.
getSortedAtoms
()[
i
];
int
atom2
=
neighborList
.
getBlockNeighbors
(
blockIndex
)[
j
];
pair
<
int
,
int
>
entry
=
make_pair
(
min
(
atom1
,
atom2
),
max
(
atom1
,
atom2
));
ASSERT
(
neighbors
.
find
(
entry
)
==
neighbors
.
end
()
&&
neighbors
.
find
(
make_pair
(
entry
.
second
,
entry
.
first
))
==
neighbors
.
end
());
// No duplicates
ASSERT
(
neighbors
.
find
(
entry
)
==
neighbors
.
end
()
&&
neighbors
.
find
(
make_pair
(
entry
.
second
,
entry
.
first
))
==
neighbors
.
end
());
// No duplicates
neighbors
.
insert
(
entry
);
neighbors
.
insert
(
entry
);
}
}
}
}
// Check each particle pair and figure out whether they should be in the neighbor list.
// Check each particle pair and figure out whether they should be in the neighbor list.
...
@@ -90,7 +100,8 @@ void testNeighborList(bool periodic) {
...
@@ -90,7 +100,8 @@ void testNeighborList(bool periodic) {
if
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
>
cutoff
*
cutoff
)
if
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
>
cutoff
*
cutoff
)
shouldInclude
=
false
;
shouldInclude
=
false
;
bool
isIncluded
=
(
neighbors
.
find
(
make_pair
(
i
,
j
))
!=
neighbors
.
end
()
||
neighbors
.
find
(
make_pair
(
j
,
i
))
!=
neighbors
.
end
());
bool
isIncluded
=
(
neighbors
.
find
(
make_pair
(
i
,
j
))
!=
neighbors
.
end
()
||
neighbors
.
find
(
make_pair
(
j
,
i
))
!=
neighbors
.
end
());
ASSERT_EQUAL
(
shouldInclude
,
isIncluded
);
if
(
shouldInclude
)
ASSERT
(
isIncluded
);
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment