Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
e22808f3
"platforms/reference/vscode:/vscode.git/clone" did not exist on "421b8f67f5537fa6b2bda7d48585938a448fe365"
Commit
e22808f3
authored
Dec 16, 2013
by
peastman
Browse files
Created AVX implementation of CPU nonbonded force
parent
ddad7c6c
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
1017 additions
and
27 deletions
+1017
-27
openmmapi/include/openmm/internal/vectorize8.h
openmmapi/include/openmm/internal/vectorize8.h
+6
-0
platforms/cpu/include/CpuNeighborList.h
platforms/cpu/include/CpuNeighborList.h
+2
-2
platforms/cpu/include/CpuNonbondedForceVec8.h
platforms/cpu/include/CpuNonbondedForceVec8.h
+253
-0
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+24
-22
platforms/cpu/src/CpuNonbondedForceVec8.cpp
platforms/cpu/src/CpuNonbondedForceVec8.cpp
+728
-0
platforms/cpu/tests/TestCpuNeighborList.cpp
platforms/cpu/tests/TestCpuNeighborList.cpp
+4
-3
No files found.
openmmapi/include/openmm/internal/vectorize8.h
View file @
e22808f3
...
@@ -132,6 +132,12 @@ public:
...
@@ -132,6 +132,12 @@ public:
operator
__m256i
()
const
{
operator
__m256i
()
const
{
return
val
;
return
val
;
}
}
ivec4
lowerVec
()
const
{
return
_mm256_castsi256_si128
(
val
);
}
ivec4
upperVec
()
const
{
return
_mm256_extractf128_si256
(
val
,
1
);
}
void
store
(
int
*
v
)
const
{
void
store
(
int
*
v
)
const
{
_mm256_storeu_si256
((
__m256i
*
)
v
,
val
);
_mm256_storeu_si256
((
__m256i
*
)
v
,
val
);
}
}
...
...
platforms/cpu/include/CpuNeighborList.h
View file @
e22808f3
...
@@ -45,8 +45,7 @@ class OPENMM_EXPORT_CPU CpuNeighborList {
...
@@ -45,8 +45,7 @@ class OPENMM_EXPORT_CPU CpuNeighborList {
public:
public:
class
ThreadTask
;
class
ThreadTask
;
class
Voxels
;
class
Voxels
;
static
const
int
BlockSize
;
CpuNeighborList
(
int
blockSize
);
CpuNeighborList
();
void
computeNeighborList
(
int
numAtoms
,
const
AlignedArray
<
float
>&
atomLocations
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
void
computeNeighborList
(
int
numAtoms
,
const
AlignedArray
<
float
>&
atomLocations
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
);
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
);
int
getNumBlocks
()
const
;
int
getNumBlocks
()
const
;
...
@@ -59,6 +58,7 @@ public:
...
@@ -59,6 +58,7 @@ public:
void
threadComputeNeighborList
(
ThreadPool
&
threads
,
int
threadIndex
);
void
threadComputeNeighborList
(
ThreadPool
&
threads
,
int
threadIndex
);
void
runThread
(
int
index
);
void
runThread
(
int
index
);
private:
private:
int
blockSize
;
std
::
vector
<
int
>
sortedAtoms
;
std
::
vector
<
int
>
sortedAtoms
;
std
::
vector
<
std
::
vector
<
int
>
>
blockNeighbors
;
std
::
vector
<
std
::
vector
<
int
>
>
blockNeighbors
;
std
::
vector
<
std
::
vector
<
char
>
>
blockExclusions
;
std
::
vector
<
std
::
vector
<
char
>
>
blockExclusions
;
...
...
platforms/cpu/include/CpuNonbondedForceVec8.h
0 → 100644
View file @
e22808f3
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_NONBONDED_FORCE_H__
#define OPENMM_CPU_NONBONDED_FORCE_H__
#include "AlignedArray.h"
#include "CpuNeighborList.h"
#include "ReferencePairIxn.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize8.h"
#include <set>
#include <utility>
#include <vector>
// ---------------------------------------------------------------------------------------
namespace
OpenMM
{
class
CpuNonbondedForceVec8
{
public:
class
ComputeDirectTask
;
/**---------------------------------------------------------------------------------------
Constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec8
();
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
@param distance the cutoff distance
@param neighbors the neighbor list to use
@param solventDielectric the dielectric constant of the bulk solvent
--------------------------------------------------------------------------------------- */
void
setUseCutoff
(
float
distance
,
const
CpuNeighborList
&
neighbors
,
float
solventDielectric
);
/**---------------------------------------------------------------------------------------
Set the force to use a switching function on the Lennard-Jones interaction.
@param distance the switching distance
--------------------------------------------------------------------------------------- */
void
setUseSwitchingFunction
(
float
distance
);
/**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has
already been set, and the smallest side of the periodic box is at least twice the cutoff
distance.
@param boxSize the X, Y, and Z widths of the periodic box
--------------------------------------------------------------------------------------- */
void
setPeriodic
(
float
*
periodicBoxSize
);
/**---------------------------------------------------------------------------------------
Set the force to use Ewald summation.
@param alpha the Ewald separation parameter
@param kmaxx the largest wave vector in the x direction
@param kmaxy the largest wave vector in the y direction
@param kmaxz the largest wave vector in the z direction
--------------------------------------------------------------------------------------- */
void
setUseEwald
(
float
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
);
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void
setUsePME
(
float
alpha
,
int
meshSize
[
3
]);
/**---------------------------------------------------------------------------------------
Calculate Ewald ixn
@param numberOfAtoms number of atoms
@param posq atom coordinates and charges
@param atomCoordinates atom coordinates (in format needed by PME)
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param exclusions atom exclusion indices
exclusions[atomIndex] contains the list of exclusions for that atom
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateReciprocalIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
std
::
vector
<
RealVec
>&
atomCoordinates
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
std
::
vector
<
RealVec
>&
forces
,
double
*
totalEnergy
)
const
;
/**---------------------------------------------------------------------------------------
Calculate LJ Coulomb pair ixn
@param numberOfAtoms number of atoms
@param posq atom coordinates and charges
@param atomCoordinates atom coordinates (periodic boundary conditions not applied)
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param exclusions atom exclusion indices
exclusions[atomIndex] contains the list of exclusions for that atom
@param forces force array (forces added)
@param totalEnergy total energy
@param threads the thread pool to use
--------------------------------------------------------------------------------------- */
void
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
std
::
vector
<
RealVec
>&
atomCoordinates
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
std
::
vector
<
AlignedArray
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
);
/**
* This routine contains the code executed by each thread.
*/
void
threadComputeDirect
(
ThreadPool
&
threads
,
int
threadIndex
);
private:
bool
cutoff
;
bool
useSwitch
;
bool
periodic
;
bool
ewald
;
bool
pme
;
bool
tableIsValid
;
const
CpuNeighborList
*
neighborList
;
float
periodicBoxSize
[
3
];
float
cutoffDistance
,
switchingDistance
;
float
krf
,
crf
;
float
alphaEwald
;
int
numRx
,
numRy
,
numRz
;
int
meshDim
[
3
];
std
::
vector
<
float
>
ewaldScaleTable
;
float
ewaldDX
,
ewaldDXInv
;
std
::
vector
<
double
>
threadEnergy
;
// The following variables are used to make information accessible to the individual threads.
int
numberOfAtoms
;
float
*
posq
;
RealVec
const
*
atomCoordinates
;
std
::
pair
<
float
,
float
>
const
*
atomParameters
;
std
::
set
<
int
>
const
*
exclusions
;
std
::
vector
<
AlignedArray
<
float
>
>*
threadForce
;
bool
includeEnergy
;
void
*
atomicCounter
;
static
const
float
TWO_OVER_SQRT_PI
;
static
const
int
NUM_TABLE_POINTS
;
/**---------------------------------------------------------------------------------------
Calculate LJ Coulomb pair ixn between two atoms
@param atom1 the index of the first atom
@param atom2 the index of the second atom
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateOneIxn
(
int
atom1
,
int
atom2
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**
* Compute the displacement and squared distance between two points, optionally using
* periodic boundary conditions.
*/
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute a fast approximation to erfc(x).
*/
static
fvec8
erfcApprox
(
fvec8
x
);
/**
* Create a lookup table for the scale factor used with Ewald and PME.
*/
void
tabulateEwaldScaleFactor
();
/**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
fvec8
ewaldScaleFunction
(
fvec8
x
);
};
}
// namespace OpenMM
// ---------------------------------------------------------------------------------------
#endif // OPENMM_CPU_NONBONDED_FORCE_H__
platforms/cpu/src/CpuNeighborList.cpp
View file @
e22808f3
...
@@ -43,8 +43,6 @@ using namespace std;
...
@@ -43,8 +43,6 @@ using namespace std;
namespace
OpenMM
{
namespace
OpenMM
{
const
int
CpuNeighborList
::
BlockSize
=
4
;
class
VoxelIndex
class
VoxelIndex
{
{
public:
public:
...
@@ -62,8 +60,8 @@ public:
...
@@ -62,8 +60,8 @@ public:
*/
*/
class
CpuNeighborList
::
Voxels
{
class
CpuNeighborList
::
Voxels
{
public:
public:
Voxels
(
float
vsx
,
float
vsy
,
float
minx
,
float
maxx
,
float
miny
,
float
maxy
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
)
:
Voxels
(
int
blockSize
,
float
vsx
,
float
vsy
,
float
minx
,
float
maxx
,
float
miny
,
float
maxy
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
)
:
voxelSizeX
(
vsx
),
voxelSizeY
(
vsy
),
minx
(
minx
),
maxx
(
maxx
),
miny
(
miny
),
maxy
(
maxy
),
periodicBoxSize
(
periodicBoxSize
),
usePeriodic
(
usePeriodic
)
{
blockSize
(
blockSize
),
voxelSizeX
(
vsx
),
voxelSizeY
(
vsy
),
minx
(
minx
),
maxx
(
maxx
),
miny
(
miny
),
maxy
(
maxy
),
periodicBoxSize
(
periodicBoxSize
),
usePeriodic
(
usePeriodic
)
{
if
(
usePeriodic
)
{
if
(
usePeriodic
)
{
nx
=
(
int
)
floorf
(
periodicBoxSize
[
0
]
/
voxelSizeX
+
0.5
f
);
nx
=
(
int
)
floorf
(
periodicBoxSize
[
0
]
/
voxelSizeX
+
0.5
f
);
ny
=
(
int
)
floorf
(
periodicBoxSize
[
1
]
/
voxelSizeY
+
0.5
f
);
ny
=
(
int
)
floorf
(
periodicBoxSize
[
1
]
/
voxelSizeY
+
0.5
f
);
...
@@ -156,7 +154,7 @@ public:
...
@@ -156,7 +154,7 @@ public:
return
VoxelIndex
(
x
,
y
);
return
VoxelIndex
(
x
,
y
);
}
}
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
fvec4
blockCenter
,
fvec4
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>
blockAtoms
,
const
float
*
atomLocations
)
const
{
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
fvec4
blockCenter
,
fvec4
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>
&
blockAtoms
,
const
float
*
atomLocations
,
const
vector
<
VoxelIndex
>&
atomVoxelIndex
)
const
{
neighbors
.
resize
(
0
);
neighbors
.
resize
(
0
);
exclusions
.
resize
(
0
);
exclusions
.
resize
(
0
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
...
@@ -175,9 +173,6 @@ public:
...
@@ -175,9 +173,6 @@ public:
float
centerPos
[
4
];
float
centerPos
[
4
];
blockCenter
.
store
(
centerPos
);
blockCenter
.
store
(
centerPos
);
VoxelIndex
centerVoxelIndex
=
getVoxelIndex
(
centerPos
);
VoxelIndex
centerVoxelIndex
=
getVoxelIndex
(
centerPos
);
VoxelIndex
atomVoxelIndex
[
BlockSize
];
for
(
int
i
=
0
;
i
<
(
int
)
blockAtoms
.
size
();
i
++
)
atomVoxelIndex
[
i
]
=
getVoxelIndex
(
&
atomLocations
[
4
*
blockAtoms
[
i
]]);
int
startx
=
centerVoxelIndex
.
x
-
dIndexX
;
int
startx
=
centerVoxelIndex
.
x
-
dIndexX
;
int
starty
=
centerVoxelIndex
.
y
-
dIndexY
;
int
starty
=
centerVoxelIndex
.
y
-
dIndexY
;
int
endx
=
centerVoxelIndex
.
x
+
dIndexX
;
int
endx
=
centerVoxelIndex
.
x
+
dIndexX
;
...
@@ -193,7 +188,7 @@ public:
...
@@ -193,7 +188,7 @@ public:
endx
=
min
(
endx
,
nx
-
1
);
endx
=
min
(
endx
,
nx
-
1
);
endy
=
min
(
endy
,
ny
-
1
);
endy
=
min
(
endy
,
ny
-
1
);
}
}
int
lastSortedIndex
=
B
lockSize
*
(
blockIndex
+
1
);
int
lastSortedIndex
=
b
lockSize
*
(
blockIndex
+
1
);
VoxelIndex
voxelIndex
(
0
,
0
);
VoxelIndex
voxelIndex
(
0
,
0
);
for
(
int
x
=
startx
;
x
<=
endx
;
++
x
)
{
for
(
int
x
=
startx
;
x
<=
endx
;
++
x
)
{
voxelIndex
.
x
=
x
;
voxelIndex
.
x
=
x
;
...
@@ -300,10 +295,12 @@ public:
...
@@ -300,10 +295,12 @@ public:
// Add this atom to the list of neighbors.
// Add this atom to the list of neighbors.
neighbors
.
push_back
(
sortedAtoms
[
sortedIndex
]);
neighbors
.
push_back
(
sortedAtoms
[
sortedIndex
]);
if
(
sortedIndex
<
B
lockSize
*
blockIndex
)
if
(
sortedIndex
<
b
lockSize
*
blockIndex
)
exclusions
.
push_back
(
0
);
exclusions
.
push_back
(
0
);
else
else
{
exclusions
.
push_back
(
0xF
&
(
0xF
<<
(
sortedIndex
-
BlockSize
*
blockIndex
)));
int
mask
=
(
1
<<
blockSize
)
-
1
;
exclusions
.
push_back
(
mask
&
(
mask
<<
(
sortedIndex
-
blockSize
*
blockIndex
)));
}
}
}
}
}
}
}
...
@@ -311,6 +308,7 @@ public:
...
@@ -311,6 +308,7 @@ public:
}
}
private:
private:
int
blockSize
;
float
voxelSizeX
,
voxelSizeY
;
float
voxelSizeX
,
voxelSizeY
;
float
minx
,
maxx
,
miny
,
maxy
;
float
minx
,
maxx
,
miny
,
maxy
;
int
nx
,
ny
;
int
nx
,
ny
;
...
@@ -329,12 +327,12 @@ public:
...
@@ -329,12 +327,12 @@ public:
CpuNeighborList
&
owner
;
CpuNeighborList
&
owner
;
};
};
CpuNeighborList
::
CpuNeighborList
()
{
CpuNeighborList
::
CpuNeighborList
(
int
blockSize
)
:
blockSize
(
blockSize
)
{
}
}
void
CpuNeighborList
::
computeNeighborList
(
int
numAtoms
,
const
AlignedArray
<
float
>&
atomLocations
,
const
vector
<
set
<
int
>
>&
exclusions
,
void
CpuNeighborList
::
computeNeighborList
(
int
numAtoms
,
const
AlignedArray
<
float
>&
atomLocations
,
const
vector
<
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
)
{
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
)
{
int
numBlocks
=
(
numAtoms
+
B
lockSize
-
1
)
/
B
lockSize
;
int
numBlocks
=
(
numAtoms
+
b
lockSize
-
1
)
/
b
lockSize
;
blockNeighbors
.
resize
(
numBlocks
);
blockNeighbors
.
resize
(
numBlocks
);
blockExclusions
.
resize
(
numBlocks
);
blockExclusions
.
resize
(
numBlocks
);
sortedAtoms
.
resize
(
numAtoms
);
sortedAtoms
.
resize
(
numAtoms
);
...
@@ -381,7 +379,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
...
@@ -381,7 +379,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
edgeSizeX
=
0.6
f
*
periodicBoxSize
[
0
]
/
floorf
(
periodicBoxSize
[
0
]
/
maxDistance
);
edgeSizeX
=
0.6
f
*
periodicBoxSize
[
0
]
/
floorf
(
periodicBoxSize
[
0
]
/
maxDistance
);
edgeSizeY
=
0.6
f
*
periodicBoxSize
[
1
]
/
floorf
(
periodicBoxSize
[
1
]
/
maxDistance
);
edgeSizeY
=
0.6
f
*
periodicBoxSize
[
1
]
/
floorf
(
periodicBoxSize
[
1
]
/
maxDistance
);
}
}
Voxels
voxels
(
edgeSizeX
,
edgeSizeY
,
minx
,
maxx
,
miny
,
maxy
,
periodicBoxSize
,
usePeriodic
);
Voxels
voxels
(
blockSize
,
edgeSizeX
,
edgeSizeY
,
minx
,
maxx
,
miny
,
maxy
,
periodicBoxSize
,
usePeriodic
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
atomIndex
=
atomBins
[
i
].
second
;
int
atomIndex
=
atomBins
[
i
].
second
;
sortedAtoms
[
i
]
=
atomIndex
;
sortedAtoms
[
i
]
=
atomIndex
;
...
@@ -397,9 +395,9 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
...
@@ -397,9 +395,9 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
// Add padding atoms to fill up the last block.
// Add padding atoms to fill up the last block.
int
numPadding
=
numBlocks
*
B
lockSize
-
numAtoms
;
int
numPadding
=
numBlocks
*
b
lockSize
-
numAtoms
;
if
(
numPadding
>
0
)
{
if
(
numPadding
>
0
)
{
char
mask
=
(
0xF
0
>>
numPadding
)
&
0xF
;
char
mask
=
(
(
0xF
FFF
-
(
1
<<
blockSize
)
+
1
)
>>
numPadding
);
for
(
int
i
=
0
;
i
<
numPadding
;
i
++
)
for
(
int
i
=
0
;
i
<
numPadding
;
i
++
)
sortedAtoms
.
push_back
(
0
);
sortedAtoms
.
push_back
(
0
);
vector
<
char
>&
exc
=
blockExclusions
[
blockExclusions
.
size
()
-
1
];
vector
<
char
>&
exc
=
blockExclusions
[
blockExclusions
.
size
()
-
1
];
...
@@ -409,7 +407,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
...
@@ -409,7 +407,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
}
}
int
CpuNeighborList
::
getNumBlocks
()
const
{
int
CpuNeighborList
::
getNumBlocks
()
const
{
return
sortedAtoms
.
size
()
/
B
lockSize
;
return
sortedAtoms
.
size
()
/
b
lockSize
;
}
}
const
std
::
vector
<
int
>&
CpuNeighborList
::
getSortedAtoms
()
const
{
const
std
::
vector
<
int
>&
CpuNeighborList
::
getSortedAtoms
()
const
{
...
@@ -446,14 +444,18 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
...
@@ -446,14 +444,18 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
int
numBlocks
=
blockNeighbors
.
size
();
int
numBlocks
=
blockNeighbors
.
size
();
vector
<
int
>
blockAtoms
;
vector
<
int
>
blockAtoms
;
vector
<
VoxelIndex
>
atomVoxelIndex
;
for
(
int
i
=
threadIndex
;
i
<
numBlocks
;
i
+=
numThreads
)
{
for
(
int
i
=
threadIndex
;
i
<
numBlocks
;
i
+=
numThreads
)
{
// Find the atoms in this block and compute their bounding box.
// Find the atoms in this block and compute their bounding box.
int
firstIndex
=
B
lockSize
*
i
;
int
firstIndex
=
b
lockSize
*
i
;
int
atomsInBlock
=
min
(
B
lockSize
,
numAtoms
-
firstIndex
);
int
atomsInBlock
=
min
(
b
lockSize
,
numAtoms
-
firstIndex
);
blockAtoms
.
resize
(
atomsInBlock
);
blockAtoms
.
resize
(
atomsInBlock
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
atomVoxelIndex
.
resize
(
atomsInBlock
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
{
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
atomVoxelIndex
[
j
]
=
voxels
->
getVoxelIndex
(
&
atomLocations
[
4
*
blockAtoms
[
j
]]);
}
fvec4
minPos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
]]);
fvec4
minPos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
]]);
fvec4
maxPos
=
minPos
;
fvec4
maxPos
=
minPos
;
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
...
@@ -461,7 +463,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
...
@@ -461,7 +463,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
minPos
=
min
(
minPos
,
pos
);
minPos
=
min
(
minPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
}
}
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
atomLocations
);
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
atomLocations
,
atomVoxelIndex
);
// Record the exclusions for this block.
// Record the exclusions for this block.
...
...
platforms/cpu/src/CpuNonbondedForceVec8.cpp
0 → 100644
View file @
e22808f3
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <complex>
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec8.h"
#include "ReferenceForce.h"
#include "ReferencePME.h"
#include "openmm/internal/vectorize.h"
#include "gmx_atomic.h"
// In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined.
#include "openmm/internal/MSVC_erfc.h"
using
namespace
std
;
using
namespace
OpenMM
;
const
float
CpuNonbondedForceVec8
::
TWO_OVER_SQRT_PI
=
(
float
)
(
2
/
sqrt
(
PI_M
));
const
int
CpuNonbondedForceVec8
::
NUM_TABLE_POINTS
=
2048
;
class
CpuNonbondedForceVec8
::
ComputeDirectTask
:
public
ThreadPool
::
Task
{
public:
ComputeDirectTask
(
CpuNonbondedForceVec8
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadComputeDirect
(
threads
,
threadIndex
);
}
CpuNonbondedForceVec8
&
owner
;
};
/**---------------------------------------------------------------------------------------
CpuNonbondedForceVec8 constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec8
::
CpuNonbondedForceVec8
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
),
tableIsValid
(
false
)
{
}
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
@param distance the cutoff distance
@param neighbors the neighbor list to use
@param solventDielectric the dielectric constant of the bulk solvent
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForceVec8
::
setUseCutoff
(
float
distance
,
const
CpuNeighborList
&
neighbors
,
float
solventDielectric
)
{
if
(
distance
!=
cutoffDistance
)
tableIsValid
=
false
;
cutoff
=
true
;
cutoffDistance
=
distance
;
neighborList
=
&
neighbors
;
krf
=
pow
(
cutoffDistance
,
-
3.0
f
)
*
(
solventDielectric
-
1.0
)
/
(
2.0
*
solventDielectric
+
1.0
);
crf
=
(
1.0
/
cutoffDistance
)
*
(
3.0
*
solventDielectric
)
/
(
2.0
*
solventDielectric
+
1.0
);
}
/**---------------------------------------------------------------------------------------
Set the force to use a switching function on the Lennard-Jones interaction.
@param distance the switching distance
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForceVec8
::
setUseSwitchingFunction
(
float
distance
)
{
useSwitch
=
true
;
switchingDistance
=
distance
;
}
/**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has
also been set, and the smallest side of the periodic box is at least twice the cutoff
distance.
@param boxSize the X, Y, and Z widths of the periodic box
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForceVec8
::
setPeriodic
(
float
*
periodicBoxSize
)
{
assert
(
cutoff
);
assert
(
periodicBoxSize
[
0
]
>=
2
*
cutoffDistance
);
assert
(
periodicBoxSize
[
1
]
>=
2
*
cutoffDistance
);
assert
(
periodicBoxSize
[
2
]
>=
2
*
cutoffDistance
);
periodic
=
true
;
this
->
periodicBoxSize
[
0
]
=
periodicBoxSize
[
0
];
this
->
periodicBoxSize
[
1
]
=
periodicBoxSize
[
1
];
this
->
periodicBoxSize
[
2
]
=
periodicBoxSize
[
2
];
}
/**---------------------------------------------------------------------------------------
Set the force to use Ewald summation.
@param alpha the Ewald separation parameter
@param kmaxx the largest wave vector in the x direction
@param kmaxy the largest wave vector in the y direction
@param kmaxz the largest wave vector in the z direction
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForceVec8
::
setUseEwald
(
float
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
)
{
if
(
alpha
!=
alphaEwald
)
tableIsValid
=
false
;
alphaEwald
=
alpha
;
numRx
=
kmaxx
;
numRy
=
kmaxy
;
numRz
=
kmaxz
;
ewald
=
true
;
tabulateEwaldScaleFactor
();
}
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForceVec8
::
setUsePME
(
float
alpha
,
int
meshSize
[
3
])
{
if
(
alpha
!=
alphaEwald
)
tableIsValid
=
false
;
alphaEwald
=
alpha
;
meshDim
[
0
]
=
meshSize
[
0
];
meshDim
[
1
]
=
meshSize
[
1
];
meshDim
[
2
]
=
meshSize
[
2
];
pme
=
true
;
tabulateEwaldScaleFactor
();
}
void
CpuNonbondedForceVec8
::
tabulateEwaldScaleFactor
()
{
if
(
tableIsValid
)
return
;
tableIsValid
=
true
;
ewaldDX
=
cutoffDistance
/
NUM_TABLE_POINTS
;
ewaldDXInv
=
1.0
f
/
ewaldDX
;
ewaldScaleTable
.
resize
(
NUM_TABLE_POINTS
+
4
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
4
;
i
++
)
{
double
r
=
i
*
ewaldDX
;
double
alphaR
=
alphaEwald
*
r
;
ewaldScaleTable
[
i
]
=
erfc
(
alphaR
)
+
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
);
}
}
void
CpuNonbondedForceVec8
::
calculateReciprocalIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
vector
<
RealVec
>&
atomCoordinates
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
const
vector
<
set
<
int
>
>&
exclusions
,
vector
<
RealVec
>&
forces
,
double
*
totalEnergy
)
const
{
typedef
std
::
complex
<
float
>
d_complex
;
static
const
float
epsilon
=
1.0
;
int
kmax
=
(
ewald
?
std
::
max
(
numRx
,
std
::
max
(
numRy
,
numRz
))
:
0
);
float
factorEwald
=
-
1
/
(
4
*
alphaEwald
*
alphaEwald
);
float
TWO_PI
=
2.0
*
PI_M
;
float
recipCoeff
=
(
float
)(
ONE_4PI_EPS0
*
4
*
PI_M
/
(
periodicBoxSize
[
0
]
*
periodicBoxSize
[
1
]
*
periodicBoxSize
[
2
])
/
epsilon
);
if
(
pme
)
{
pme_t
pmedata
;
RealOpenMM
virial
[
3
][
3
];
pme_init
(
&
pmedata
,
alphaEwald
,
numberOfAtoms
,
meshDim
,
5
,
1
);
vector
<
RealOpenMM
>
charges
(
numberOfAtoms
);
for
(
int
i
=
0
;
i
<
numberOfAtoms
;
i
++
)
charges
[
i
]
=
posq
[
4
*
i
+
3
];
RealOpenMM
boxSize
[
3
]
=
{
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
]};
RealOpenMM
recipEnergy
=
0.0
;
pme_exec
(
pmedata
,
atomCoordinates
,
forces
,
charges
,
boxSize
,
&
recipEnergy
,
virial
);
if
(
totalEnergy
)
*
totalEnergy
+=
recipEnergy
;
pme_destroy
(
pmedata
);
}
// Ewald method
else
if
(
ewald
)
{
// setup reciprocal box
float
recipBoxSize
[
3
]
=
{
TWO_PI
/
periodicBoxSize
[
0
],
TWO_PI
/
periodicBoxSize
[
1
],
TWO_PI
/
periodicBoxSize
[
2
]};
// setup K-vectors
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
vector
<
d_complex
>
eir
(
kmax
*
numberOfAtoms
*
3
);
vector
<
d_complex
>
tab_xy
(
numberOfAtoms
);
vector
<
d_complex
>
tab_qxyz
(
numberOfAtoms
);
for
(
int
i
=
0
;
(
i
<
numberOfAtoms
);
i
++
)
{
float
*
pos
=
posq
+
4
*
i
;
for
(
int
m
=
0
;
(
m
<
3
);
m
++
)
EIR
(
0
,
i
,
m
)
=
d_complex
(
1
,
0
);
for
(
int
m
=
0
;
(
m
<
3
);
m
++
)
EIR
(
1
,
i
,
m
)
=
d_complex
(
cos
(
pos
[
m
]
*
recipBoxSize
[
m
]),
sin
(
pos
[
m
]
*
recipBoxSize
[
m
]));
for
(
int
j
=
2
;
(
j
<
kmax
);
j
++
)
for
(
int
m
=
0
;
(
m
<
3
);
m
++
)
EIR
(
j
,
i
,
m
)
=
EIR
(
j
-
1
,
i
,
m
)
*
EIR
(
1
,
i
,
m
);
}
// calculate reciprocal space energy and forces
int
lowry
=
0
;
int
lowrz
=
1
;
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
float
kx
=
rx
*
recipBoxSize
[
0
];
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
float
ky
=
ry
*
recipBoxSize
[
1
];
if
(
ry
>=
0
)
{
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
tab_xy
[
n
]
=
EIR
(
rx
,
n
,
0
)
*
EIR
(
ry
,
n
,
1
);
}
else
{
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
tab_xy
[
n
]
=
EIR
(
rx
,
n
,
0
)
*
conj
(
EIR
(
-
ry
,
n
,
1
));
}
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
if
(
rz
>=
0
)
{
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
tab_qxyz
[
n
]
=
posq
[
4
*
n
+
3
]
*
(
tab_xy
[
n
]
*
EIR
(
rz
,
n
,
2
));
}
else
{
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
tab_qxyz
[
n
]
=
posq
[
4
*
n
+
3
]
*
(
tab_xy
[
n
]
*
conj
(
EIR
(
-
rz
,
n
,
2
)));
}
float
cs
=
0.0
f
;
float
ss
=
0.0
f
;
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
{
cs
+=
tab_qxyz
[
n
].
real
();
ss
+=
tab_qxyz
[
n
].
imag
();
}
float
kz
=
rz
*
recipBoxSize
[
2
];
float
k2
=
kx
*
kx
+
ky
*
ky
+
kz
*
kz
;
float
ak
=
exp
(
k2
*
factorEwald
)
/
k2
;
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
{
float
force
=
ak
*
(
cs
*
tab_qxyz
[
n
].
imag
()
-
ss
*
tab_qxyz
[
n
].
real
());
forces
[
n
][
0
]
+=
2
*
recipCoeff
*
force
*
kx
;
forces
[
n
][
1
]
+=
2
*
recipCoeff
*
force
*
ky
;
forces
[
n
][
2
]
+=
2
*
recipCoeff
*
force
*
kz
;
}
if
(
totalEnergy
)
*
totalEnergy
+=
recipCoeff
*
ak
*
(
cs
*
cs
+
ss
*
ss
);
lowrz
=
1
-
numRz
;
}
lowry
=
1
-
numRy
;
}
}
}
}
void
CpuNonbondedForceVec8
::
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
vector
<
RealVec
>&
atomCoordinates
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
const
vector
<
set
<
int
>
>&
exclusions
,
vector
<
AlignedArray
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
)
{
// Record the parameters for the threads.
this
->
numberOfAtoms
=
numberOfAtoms
;
this
->
posq
=
posq
;
this
->
atomCoordinates
=
&
atomCoordinates
[
0
];
this
->
atomParameters
=
&
atomParameters
[
0
];
this
->
exclusions
=
&
exclusions
[
0
];
this
->
threadForce
=
&
threadForce
;
includeEnergy
=
(
totalEnergy
!=
NULL
);
threadEnergy
.
resize
(
threads
.
getNumThreads
());
gmx_atomic_t
counter
;
gmx_atomic_set
(
&
counter
,
0
);
this
->
atomicCounter
=
&
counter
;
// Signal the threads to start running and wait for them to finish.
ComputeDirectTask
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
// Combine the energies from all the threads.
if
(
totalEnergy
!=
NULL
)
{
double
directEnergy
=
0
;
int
numThreads
=
threads
.
getNumThreads
();
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
directEnergy
+=
threadEnergy
[
i
];
*
totalEnergy
+=
directEnergy
;
}
}
void
CpuNonbondedForceVec8
::
threadComputeDirect
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Compute this thread's subset of interactions.
int
numThreads
=
threads
.
getNumThreads
();
threadEnergy
[
threadIndex
]
=
0
;
double
*
energyPtr
=
(
includeEnergy
?
&
threadEnergy
[
threadIndex
]
:
NULL
);
float
*
forces
=
&
(
*
threadForce
)[
threadIndex
][
0
];
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
((
1
/
periodicBoxSize
[
0
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
2
]),
0
);
if
(
ewald
||
pme
)
{
// Compute the interactions from the neighbor list.
while
(
true
)
{
int
nextBlock
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
nextBlock
>=
neighborList
->
getNumBlocks
())
break
;
calculateBlockEwaldIxn
(
nextBlock
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
}
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
((
1
/
periodicBoxSize
[
0
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
2
]),
0
);
for
(
int
i
=
threadIndex
;
i
<
numberOfAtoms
;
i
+=
numThreads
)
{
fvec4
posI
((
float
)
atomCoordinates
[
i
][
0
],
(
float
)
atomCoordinates
[
i
][
1
],
(
float
)
atomCoordinates
[
i
][
2
],
0.0
f
);
for
(
set
<
int
>::
const_iterator
iter
=
exclusions
[
i
].
begin
();
iter
!=
exclusions
[
i
].
end
();
++
iter
)
{
if
(
*
iter
>
i
)
{
int
j
=
*
iter
;
fvec4
deltaR
;
fvec4
posJ
((
float
)
atomCoordinates
[
j
][
0
],
(
float
)
atomCoordinates
[
j
][
1
],
(
float
)
atomCoordinates
[
j
][
2
],
0.0
f
);
float
r2
;
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
false
,
boxSize
,
invBoxSize
);
float
r
=
sqrtf
(
r2
);
float
inverseR
=
1
/
r
;
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]
*
posq
[
4
*
j
+
3
];
float
alphaR
=
alphaEwald
*
r
;
float
erfcAlphaR
=
erfcApprox
(
alphaR
).
lowerVec
()[
0
];
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erfcAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
fvec4
result
=
deltaR
*
dEdR
;
(
fvec4
(
forces
+
4
*
i
)
-
result
).
store
(
forces
+
4
*
i
);
(
fvec4
(
forces
+
4
*
j
)
+
result
).
store
(
forces
+
4
*
j
);
if
(
includeEnergy
)
threadEnergy
[
threadIndex
]
-=
chargeProd
*
inverseR
*
(
1.0
f
-
erfcAlphaR
);
}
}
}
}
else
if
(
cutoff
)
{
// Compute the interactions from the neighbor list.
while
(
true
)
{
int
nextBlock
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
nextBlock
>=
neighborList
->
getNumBlocks
())
break
;
calculateBlockIxn
(
nextBlock
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
}
}
else
{
// Loop over all atom pairs
while
(
true
)
{
int
i
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
i
>=
numberOfAtoms
)
break
;
for
(
int
j
=
i
+
1
;
j
<
numberOfAtoms
;
j
++
)
if
(
exclusions
[
j
].
find
(
i
)
==
exclusions
[
j
].
end
())
calculateOneIxn
(
i
,
j
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
}
}
}
void
CpuNonbondedForceVec8
::
calculateOneIxn
(
int
ii
,
int
jj
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// get deltaR, R2, and R between 2 atoms
fvec4
deltaR
;
fvec4
posI
(
posq
+
4
*
ii
);
fvec4
posJ
(
posq
+
4
*
jj
);
float
r2
;
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
periodic
,
boxSize
,
invBoxSize
);
if
(
cutoff
&&
r2
>=
cutoffDistance
*
cutoffDistance
)
return
;
float
r
=
sqrtf
(
r2
);
float
inverseR
=
1
/
r
;
float
switchValue
=
1
,
switchDeriv
=
0
;
if
(
useSwitch
&&
r
>
switchingDistance
)
{
float
t
=
(
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
);
switchValue
=
1
+
t
*
t
*
t
*
(
-
10
+
t
*
(
15
-
t
*
6
));
switchDeriv
=
t
*
t
*
(
-
30
+
t
*
(
60
-
t
*
30
))
/
(
cutoffDistance
-
switchingDistance
);
}
float
sig
=
atomParameters
[
ii
].
first
+
atomParameters
[
jj
].
first
;
float
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
eps
=
atomParameters
[
ii
].
second
*
atomParameters
[
jj
].
second
;
float
dEdR
=
switchValue
*
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
ii
+
3
]
*
posq
[
4
*
jj
+
3
];
if
(
cutoff
)
dEdR
+=
(
float
)
(
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
));
else
dEdR
+=
(
float
)
(
chargeProd
*
inverseR
);
dEdR
*=
inverseR
*
inverseR
;
float
energy
=
eps
*
(
sig6
-
1.0
f
)
*
sig6
;
if
(
useSwitch
)
{
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
}
// accumulate energies
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
(
float
)
(
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
));
else
energy
+=
(
float
)
(
chargeProd
*
inverseR
);
*
totalEnergy
+=
energy
;
}
// accumulate forces
fvec4
result
=
deltaR
*
dEdR
;
(
fvec4
(
forces
+
4
*
ii
)
+
result
).
store
(
forces
+
4
*
ii
);
(
fvec4
(
forces
+
4
*
jj
)
-
result
).
store
(
forces
+
4
*
jj
);
}
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec4
blockAtomForce
[
8
];
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
fvec8
blockAtomX
=
fvec8
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
],
blockAtomPosq
[
4
][
0
],
blockAtomPosq
[
5
][
0
],
blockAtomPosq
[
6
][
0
],
blockAtomPosq
[
7
][
0
]);
fvec8
blockAtomY
=
fvec8
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
],
blockAtomPosq
[
4
][
1
],
blockAtomPosq
[
5
][
1
],
blockAtomPosq
[
6
][
1
],
blockAtomPosq
[
7
][
1
]);
fvec8
blockAtomZ
=
fvec8
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
],
blockAtomPosq
[
4
][
2
],
blockAtomPosq
[
5
][
2
],
blockAtomPosq
[
6
][
2
],
blockAtomPosq
[
7
][
2
]);
fvec8
blockAtomCharge
=
fvec8
(
ONE_4PI_EPS0
)
*
fvec8
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
],
blockAtomPosq
[
4
][
3
],
blockAtomPosq
[
5
][
3
],
blockAtomPosq
[
6
][
3
],
blockAtomPosq
[
7
][
3
]);
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
false
;
if
(
periodic
)
{
for
(
int
i
=
0
;
i
<
8
&&
!
needPeriodic
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
if
(
blockAtomPosq
[
i
][
j
]
-
cutoffDistance
<
0.0
||
blockAtomPosq
[
i
][
j
]
+
cutoffDistance
>
boxSize
[
j
])
{
needPeriodic
=
true
;
break
;
}
}
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec8
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
,
excl
&
16
?
0
:
-
1
,
excl
&
32
?
0
:
-
1
,
excl
&
64
?
0
:
-
1
,
excl
&
128
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec8
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec8
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot8
(
energy
,
1.0
f
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec8
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
fvec4
rt
[
8
];
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
],
rt
[
0
],
rt
[
1
],
rt
[
2
],
rt
[
3
],
rt
[
4
],
rt
[
5
],
rt
[
6
],
rt
[
7
]);
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
8
;
j
++
)
{
blockAtomForce
[
j
]
+=
rt
[
j
];
atomForce
-=
rt
[
j
];
}
atomForce
.
store
(
forces
+
4
*
atom
);
}
// Record the forces on the block atoms.
for
(
int
j
=
0
;
j
<
8
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec4
blockAtomForce
[
8
];
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
fvec8
blockAtomX
=
fvec8
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
],
blockAtomPosq
[
4
][
0
],
blockAtomPosq
[
5
][
0
],
blockAtomPosq
[
6
][
0
],
blockAtomPosq
[
7
][
0
]);
fvec8
blockAtomY
=
fvec8
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
],
blockAtomPosq
[
4
][
1
],
blockAtomPosq
[
5
][
1
],
blockAtomPosq
[
6
][
1
],
blockAtomPosq
[
7
][
1
]);
fvec8
blockAtomZ
=
fvec8
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
],
blockAtomPosq
[
4
][
2
],
blockAtomPosq
[
5
][
2
],
blockAtomPosq
[
6
][
2
],
blockAtomPosq
[
7
][
2
]);
fvec8
blockAtomCharge
=
fvec8
(
ONE_4PI_EPS0
)
*
fvec8
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
],
blockAtomPosq
[
4
][
3
],
blockAtomPosq
[
5
][
3
],
blockAtomPosq
[
6
][
3
],
blockAtomPosq
[
7
][
3
]);
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
false
;
if
(
periodic
)
{
for
(
int
i
=
0
;
i
<
8
&&
!
needPeriodic
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
if
(
blockAtomPosq
[
i
][
j
]
-
cutoffDistance
<
0.0
||
blockAtomPosq
[
i
][
j
]
+
cutoffDistance
>
boxSize
[
j
])
{
needPeriodic
=
true
;
break
;
}
}
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec8
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
,
excl
&
16
?
0
:
-
1
,
excl
&
32
?
0
:
-
1
,
excl
&
64
?
0
:
-
1
,
excl
&
128
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec8
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec8
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot8
(
energy
,
1.0
f
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec8
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
fvec4
rt
[
8
];
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
],
rt
[
0
],
rt
[
1
],
rt
[
2
],
rt
[
3
],
rt
[
4
],
rt
[
5
],
rt
[
6
],
rt
[
7
]);
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
8
;
j
++
)
{
blockAtomForce
[
j
]
+=
rt
[
j
];
atomForce
-=
rt
[
j
];
}
atomForce
.
store
(
forces
+
4
*
atom
);
}
// Record the forces on the block atoms.
for
(
int
j
=
0
;
j
<
8
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
deltaR
=
posJ
-
posI
;
if
(
periodic
)
{
fvec4
base
=
round
(
deltaR
*
invBoxSize
)
*
boxSize
;
deltaR
=
deltaR
-
base
;
}
r2
=
dot3
(
deltaR
,
deltaR
);
}
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
fvec8
CpuNonbondedForceVec8
::
erfcApprox
(
fvec8
x
)
{
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 3e-7.
fvec8
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
return
1.0
f
/
(
t
*
t
);
}
fvec8
CpuNonbondedForceVec8
::
ewaldScaleFunction
(
fvec8
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
fvec8
x1
=
x
*
ewaldDXInv
;
ivec8
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
fvec8
coeff2
=
x1
-
index
;
fvec8
coeff1
=
1.0
f
-
coeff2
;
ivec4
indexLower
=
index
.
lowerVec
();
ivec4
indexUpper
=
index
.
upperVec
();
fvec4
t1
(
&
ewaldScaleTable
[
indexLower
[
0
]]);
fvec4
t2
(
&
ewaldScaleTable
[
indexLower
[
1
]]);
fvec4
t3
(
&
ewaldScaleTable
[
indexLower
[
2
]]);
fvec4
t4
(
&
ewaldScaleTable
[
indexLower
[
3
]]);
fvec4
t5
(
&
ewaldScaleTable
[
indexUpper
[
0
]]);
fvec4
t6
(
&
ewaldScaleTable
[
indexUpper
[
1
]]);
fvec4
t7
(
&
ewaldScaleTable
[
indexUpper
[
2
]]);
fvec4
t8
(
&
ewaldScaleTable
[
indexUpper
[
3
]]);
fvec8
s1
,
s2
,
s3
,
s4
;
transpose
(
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
s1
,
s2
,
s3
,
s4
);
return
coeff1
*
s1
+
coeff2
*
s2
;
}
platforms/cpu/tests/TestCpuNeighborList.cpp
View file @
e22808f3
...
@@ -51,6 +51,7 @@ void testNeighborList(bool periodic) {
...
@@ -51,6 +51,7 @@ void testNeighborList(bool periodic) {
const
int
numParticles
=
500
;
const
int
numParticles
=
500
;
const
float
cutoff
=
2.0
f
;
const
float
cutoff
=
2.0
f
;
const
float
boxSize
[
3
]
=
{
20.0
f
,
15.0
f
,
22.0
f
};
const
float
boxSize
[
3
]
=
{
20.0
f
,
15.0
f
,
22.0
f
};
const
int
blockSize
=
8
;
OpenMM_SFMT
::
SFMT
sfmt
;
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
init_gen_rand
(
0
,
sfmt
);
AlignedArray
<
float
>
positions
(
4
*
numParticles
);
AlignedArray
<
float
>
positions
(
4
*
numParticles
);
...
@@ -66,15 +67,15 @@ void testNeighborList(bool periodic) {
...
@@ -66,15 +67,15 @@ void testNeighborList(bool periodic) {
}
}
}
}
ThreadPool
threads
;
ThreadPool
threads
;
CpuNeighborList
neighborList
;
CpuNeighborList
neighborList
(
blockSize
)
;
neighborList
.
computeNeighborList
(
numParticles
,
positions
,
exclusions
,
boxSize
,
periodic
,
cutoff
,
threads
);
neighborList
.
computeNeighborList
(
numParticles
,
positions
,
exclusions
,
boxSize
,
periodic
,
cutoff
,
threads
);
// Convert the neighbor list to a set for faster lookup.
// Convert the neighbor list to a set for faster lookup.
set
<
pair
<
int
,
int
>
>
neighbors
;
set
<
pair
<
int
,
int
>
>
neighbors
;
for
(
int
i
=
0
;
i
<
(
int
)
neighborList
.
getSortedAtoms
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
neighborList
.
getSortedAtoms
().
size
();
i
++
)
{
int
blockIndex
=
i
/
CpuNeighborList
::
B
lockSize
;
int
blockIndex
=
i
/
b
lockSize
;
int
indexInBlock
=
i
-
blockIndex
*
CpuNeighborList
::
B
lockSize
;
int
indexInBlock
=
i
-
blockIndex
*
b
lockSize
;
char
mask
=
1
<<
indexInBlock
;
char
mask
=
1
<<
indexInBlock
;
for
(
int
j
=
0
;
j
<
(
int
)
neighborList
.
getBlockExclusions
(
blockIndex
).
size
();
j
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
neighborList
.
getBlockExclusions
(
blockIndex
).
size
();
j
++
)
{
if
((
neighborList
.
getBlockExclusions
(
blockIndex
)[
j
]
&
mask
)
==
0
)
{
if
((
neighborList
.
getBlockExclusions
(
blockIndex
)[
j
]
&
mask
)
==
0
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment