Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
d4af8747
Commit
d4af8747
authored
Dec 18, 2013
by
peastman
Browse files
Automatically select whether to use the SSE or AVX version of CpuNonbondedForce
parent
5cd23acb
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
486 additions
and
867 deletions
+486
-867
platforms/cpu/include/CpuKernels.h
platforms/cpu/include/CpuKernels.h
+3
-5
platforms/cpu/include/CpuNonbondedForce.h
platforms/cpu/include/CpuNonbondedForce.h
+12
-17
platforms/cpu/include/CpuNonbondedForceVec4.h
platforms/cpu/include/CpuNonbondedForceVec4.h
+89
-0
platforms/cpu/include/CpuNonbondedForceVec8.h
platforms/cpu/include/CpuNonbondedForceVec8.h
+12
-176
platforms/cpu/sharedTarget/CMakeLists.txt
platforms/cpu/sharedTarget/CMakeLists.txt
+6
-2
platforms/cpu/src/CpuKernels.cpp
platforms/cpu/src/CpuKernels.cpp
+28
-8
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+6
-246
platforms/cpu/src/CpuNonbondedForceVec4.cpp
platforms/cpu/src/CpuNonbondedForceVec4.cpp
+301
-0
platforms/cpu/src/CpuNonbondedForceVec8.cpp
platforms/cpu/src/CpuNonbondedForceVec8.cpp
+29
-413
No files found.
platforms/cpu/include/CpuKernels.h
View file @
d4af8747
...
...
@@ -88,9 +88,7 @@ private:
*/
class
CpuCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
CpuCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
data
(
data
),
bonded14IndexArray
(
NULL
),
bonded14ParamArray
(
NULL
),
hasInitializedPme
(
false
)
{
}
CpuCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
);
~
CpuCalcNonbondedForceKernel
();
/**
* Initialize the kernel.
...
...
@@ -130,8 +128,8 @@ private:
std
::
vector
<
std
::
pair
<
float
,
float
>
>
particleParams
;
std
::
vector
<
RealVec
>
lastPositions
;
NonbondedMethod
nonbondedMethod
;
CpuNeighborList
neighborList
;
CpuNonbondedForce
nonbonded
;
CpuNeighborList
*
neighborList
;
CpuNonbondedForce
*
nonbonded
;
Kernel
optimizedPme
;
};
...
...
platforms/cpu/include/CpuNonbondedForce.h
View file @
d4af8747
...
...
@@ -49,6 +49,12 @@ class CpuNonbondedForce {
CpuNonbondedForce
();
/**
* Virtual destructor.
*/
virtual
~
CpuNonbondedForce
();
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
...
...
@@ -151,7 +157,7 @@ class CpuNonbondedForce {
*/
void
threadComputeDirect
(
ThreadPool
&
threads
,
int
threadIndex
);
pr
ivate
:
pr
otected
:
bool
cutoff
;
bool
useSwitch
;
bool
periodic
;
...
...
@@ -204,7 +210,7 @@ private:
--------------------------------------------------------------------------------------- */
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
virtual
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
=
0
;
/**---------------------------------------------------------------------------------------
...
...
@@ -216,7 +222,7 @@ private:
--------------------------------------------------------------------------------------- */
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
virtual
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
=
0
;
/**
* Compute the displacement and squared distance between two points, optionally using
...
...
@@ -224,26 +230,15 @@ private:
*/
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
void
getDeltaR
(
const
float
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute a fast approximation to erfc(x).
*/
static
fvec4
erfcApprox
(
fvec4
x
);
/**
* Create a lookup table for the scale factor used with Ewald and PME.
*/
void
tabulateEwaldScaleFactor
();
/**
*
Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*
Compute a fast approximation to erfc(x).
*/
fvec4
ewaldScaleFunction
(
fvec4
x
);
static
float
erfcApprox
(
float
x
);
};
}
// namespace OpenMM
...
...
platforms/cpu/include/CpuNonbondedForceVec4.h
0 → 100644
View file @
d4af8747
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_NONBONDED_FORCE_VEC4_H__
#define OPENMM_CPU_NONBONDED_FORCE_VEC4_H__
#include "CpuNonbondedForce.h"
// ---------------------------------------------------------------------------------------
namespace
OpenMM
{
class
CpuNonbondedForceVec4
:
public
CpuNonbondedForce
{
public:
/**---------------------------------------------------------------------------------------
Constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec4
();
protected:
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
void
getDeltaR
(
const
float
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute a fast approximation to erfc(x).
*/
static
fvec4
erfcApprox
(
fvec4
x
);
/**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
fvec4
ewaldScaleFunction
(
fvec4
x
);
};
}
// namespace OpenMM
// ---------------------------------------------------------------------------------------
#endif // OPENMM_CPU_NONBONDED_FORCE_VEC4_H__
platforms/cpu/include/CpuNonbondedForceVec8.h
View file @
d4af8747
...
...
@@ -22,178 +22,23 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_NONBONDED_FORCE_H__
#define OPENMM_CPU_NONBONDED_FORCE_H__
#ifndef OPENMM_CPU_NONBONDED_FORCE_
VEC8_
H__
#define OPENMM_CPU_NONBONDED_FORCE_
VEC8_
H__
#include "AlignedArray.h"
#include "CpuNeighborList.h"
#include "ReferencePairIxn.h"
#include "openmm/internal/ThreadPool.h"
#ifdef __AVX__
#include "CpuNonbondedForce.h"
#include "openmm/internal/vectorize8.h"
#include <set>
#include <utility>
#include <vector>
// ---------------------------------------------------------------------------------------
namespace
OpenMM
{
class
CpuNonbondedForceVec8
{
public:
class
ComputeDirectTask
;
/**---------------------------------------------------------------------------------------
Constructor
--------------------------------------------------------------------------------------- */
class
CpuNonbondedForceVec8
:
public
CpuNonbondedForce
{
public:
CpuNonbondedForceVec8
();
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
@param distance the cutoff distance
@param neighbors the neighbor list to use
@param solventDielectric the dielectric constant of the bulk solvent
--------------------------------------------------------------------------------------- */
void
setUseCutoff
(
float
distance
,
const
CpuNeighborList
&
neighbors
,
float
solventDielectric
);
/**---------------------------------------------------------------------------------------
Set the force to use a switching function on the Lennard-Jones interaction.
@param distance the switching distance
--------------------------------------------------------------------------------------- */
void
setUseSwitchingFunction
(
float
distance
);
/**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has
already been set, and the smallest side of the periodic box is at least twice the cutoff
distance.
@param boxSize the X, Y, and Z widths of the periodic box
--------------------------------------------------------------------------------------- */
void
setPeriodic
(
float
*
periodicBoxSize
);
/**---------------------------------------------------------------------------------------
Set the force to use Ewald summation.
@param alpha the Ewald separation parameter
@param kmaxx the largest wave vector in the x direction
@param kmaxy the largest wave vector in the y direction
@param kmaxz the largest wave vector in the z direction
--------------------------------------------------------------------------------------- */
void
setUseEwald
(
float
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
);
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void
setUsePME
(
float
alpha
,
int
meshSize
[
3
]);
/**---------------------------------------------------------------------------------------
Calculate Ewald ixn
@param numberOfAtoms number of atoms
@param posq atom coordinates and charges
@param atomCoordinates atom coordinates (in format needed by PME)
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param exclusions atom exclusion indices
exclusions[atomIndex] contains the list of exclusions for that atom
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateReciprocalIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
std
::
vector
<
RealVec
>&
atomCoordinates
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
std
::
vector
<
RealVec
>&
forces
,
double
*
totalEnergy
)
const
;
/**---------------------------------------------------------------------------------------
Calculate LJ Coulomb pair ixn
@param numberOfAtoms number of atoms
@param posq atom coordinates and charges
@param atomCoordinates atom coordinates (periodic boundary conditions not applied)
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param exclusions atom exclusion indices
exclusions[atomIndex] contains the list of exclusions for that atom
@param forces force array (forces added)
@param totalEnergy total energy
@param threads the thread pool to use
--------------------------------------------------------------------------------------- */
void
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
std
::
vector
<
RealVec
>&
atomCoordinates
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
std
::
vector
<
AlignedArray
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
);
/**
* This routine contains the code executed by each thread.
*/
void
threadComputeDirect
(
ThreadPool
&
threads
,
int
threadIndex
);
private:
bool
cutoff
;
bool
useSwitch
;
bool
periodic
;
bool
ewald
;
bool
pme
;
bool
tableIsValid
;
const
CpuNeighborList
*
neighborList
;
float
periodicBoxSize
[
3
];
float
cutoffDistance
,
switchingDistance
;
float
krf
,
crf
;
float
alphaEwald
;
int
numRx
,
numRy
,
numRz
;
int
meshDim
[
3
];
std
::
vector
<
float
>
ewaldScaleTable
;
float
ewaldDX
,
ewaldDXInv
;
std
::
vector
<
double
>
threadEnergy
;
// The following variables are used to make information accessible to the individual threads.
int
numberOfAtoms
;
float
*
posq
;
RealVec
const
*
atomCoordinates
;
std
::
pair
<
float
,
float
>
const
*
atomParameters
;
std
::
set
<
int
>
const
*
exclusions
;
std
::
vector
<
AlignedArray
<
float
>
>*
threadForce
;
bool
includeEnergy
;
void
*
atomicCounter
;
static
const
float
TWO_OVER_SQRT_PI
;
static
const
int
NUM_TABLE_POINTS
;
/**---------------------------------------------------------------------------------------
Calculate LJ Coulomb pair ixn between two atoms
@param atom1 the index of the first atom
@param atom2 the index of the second atom
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateOneIxn
(
int
atom1
,
int
atom2
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
protected:
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
...
...
@@ -218,12 +63,6 @@ private:
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**
* Compute the displacement and squared distance between two points, optionally using
* periodic boundary conditions.
*/
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
...
...
@@ -235,11 +74,6 @@ private:
*/
static
fvec8
erfcApprox
(
fvec8
x
);
/**
* Create a lookup table for the scale factor used with Ewald and PME.
*/
void
tabulateEwaldScaleFactor
();
/**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
...
...
@@ -250,4 +84,6 @@ private:
// ---------------------------------------------------------------------------------------
#endif // OPENMM_CPU_NONBONDED_FORCE_H__
#endif // __AVX__
#endif // OPENMM_CPU_NONBONDED_FORCE_VEC8_H__
platforms/cpu/sharedTarget/CMakeLists.txt
View file @
d4af8747
SET_SOURCE_FILES_PROPERTIES
(
${
SOURCE_FILES
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
FOREACH
(
file
${
SOURCE_FILES
}
)
IF
(
file MATCHES
".*Vec8.*"
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1 -mavx"
)
ENDIF
(
file MATCHES
".*Vec8.*"
)
ENDFOREACH
(
file
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
IF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
...
...
@@ -7,6 +11,6 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
MAIN_OPENMM_LIB
}
${
PTHREADS_LIB
}
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_CPU_BUILDING_SHARED_LIBRARY"
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1
-DOPENMM_CPU_BUILDING_SHARED_LIBRARY"
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_TARGET
}
)
platforms/cpu/src/CpuKernels.cpp
View file @
d4af8747
...
...
@@ -145,6 +145,22 @@ private:
int
numParticles
;
};
bool
isVec8Supported
();
CpuNonbondedForce
*
createCpuNonbondedForceVec4
();
CpuNonbondedForce
*
createCpuNonbondedForceVec8
();
CpuCalcNonbondedForceKernel
::
CpuCalcNonbondedForceKernel
(
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
data
(
data
),
bonded14IndexArray
(
NULL
),
bonded14ParamArray
(
NULL
),
hasInitializedPme
(
false
),
neighborList
(
NULL
),
nonbonded
(
NULL
)
{
if
(
isVec8Supported
)
{
neighborList
=
new
CpuNeighborList
(
8
);
nonbonded
=
createCpuNonbondedForceVec8
();
}
else
{
neighborList
=
new
CpuNeighborList
(
4
);
nonbonded
=
createCpuNonbondedForceVec4
();
}
}
CpuCalcNonbondedForceKernel
::~
CpuCalcNonbondedForceKernel
()
{
if
(
bonded14ParamArray
!=
NULL
)
{
for
(
int
i
=
0
;
i
<
num14
;
i
++
)
{
...
...
@@ -154,6 +170,10 @@ CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
delete
bonded14IndexArray
;
delete
bonded14ParamArray
;
}
if
(
nonbonded
!=
NULL
)
delete
nonbonded
;
if
(
neighborList
!=
NULL
)
delete
neighborList
;
}
void
CpuCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
...
...
@@ -305,26 +325,26 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
}
}
if
(
needRecompute
)
{
neighborList
.
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
floatBoxSize
,
data
.
isPeriodic
,
nonbondedCutoff
+
padding
,
data
.
threads
);
neighborList
->
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
floatBoxSize
,
data
.
isPeriodic
,
nonbondedCutoff
+
padding
,
data
.
threads
);
lastPositions
=
posData
;
}
nonbonded
.
setUseCutoff
(
nonbondedCutoff
,
neighborList
,
rfDielectric
);
nonbonded
->
setUseCutoff
(
nonbondedCutoff
,
*
neighborList
,
rfDielectric
);
}
if
(
data
.
isPeriodic
)
{
double
minAllowedSize
=
1.999999
*
nonbondedCutoff
;
if
(
boxSize
[
0
]
<
minAllowedSize
||
boxSize
[
1
]
<
minAllowedSize
||
boxSize
[
2
]
<
minAllowedSize
)
throw
OpenMMException
(
"The periodic box size has decreased to less than twice the nonbonded cutoff."
);
nonbonded
.
setPeriodic
(
floatBoxSize
);
nonbonded
->
setPeriodic
(
floatBoxSize
);
}
if
(
ewald
)
nonbonded
.
setUseEwald
(
ewaldAlpha
,
kmax
[
0
],
kmax
[
1
],
kmax
[
2
]);
nonbonded
->
setUseEwald
(
ewaldAlpha
,
kmax
[
0
],
kmax
[
1
],
kmax
[
2
]);
if
(
pme
)
nonbonded
.
setUsePME
(
ewaldAlpha
,
gridSize
);
nonbonded
->
setUsePME
(
ewaldAlpha
,
gridSize
);
if
(
useSwitchingFunction
)
nonbonded
.
setUseSwitchingFunction
(
switchingDistance
);
nonbonded
->
setUseSwitchingFunction
(
switchingDistance
);
double
nonbondedEnergy
=
0
;
if
(
includeDirect
)
nonbonded
.
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
data
.
threadForce
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
,
data
.
threads
);
nonbonded
->
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
data
.
threadForce
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
,
data
.
threads
);
if
(
includeReciprocal
)
{
if
(
useOptimizedPme
)
{
PmeIO
io
(
&
posq
[
0
],
&
data
.
threadForce
[
0
][
0
],
numParticles
);
...
...
@@ -333,7 +353,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbondedEnergy
+=
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
}
else
nonbonded
.
calculateReciprocalIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
forceData
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
nonbonded
->
calculateReciprocalIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
forceData
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
}
energy
+=
nonbondedEnergy
;
if
(
includeDirect
)
{
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
d4af8747
...
...
@@ -29,7 +29,6 @@
#include "CpuNonbondedForce.h"
#include "ReferenceForce.h"
#include "ReferencePME.h"
#include "openmm/internal/vectorize.h"
#include "gmx_atomic.h"
// In case we're using some primitive version of Visual Studio this will
...
...
@@ -61,6 +60,9 @@ public:
CpuNonbondedForce
::
CpuNonbondedForce
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
),
tableIsValid
(
false
)
{
}
CpuNonbondedForce
::~
CpuNonbondedForce
()
{
}
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
...
...
@@ -356,7 +358,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
float
inverseR
=
1
/
r
;
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]
*
posq
[
4
*
j
+
3
];
float
alphaR
=
alphaEwald
*
r
;
float
erfcAlphaR
=
erfcApprox
(
alphaR
)
[
0
]
;
float
erfcAlphaR
=
erfcApprox
(
alphaR
);
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erfcAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
fvec4
result
=
deltaR
*
dEdR
;
...
...
@@ -446,222 +448,6 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
(
fvec4
(
forces
+
4
*
jj
)
-
result
).
store
(
forces
+
4
*
jj
);
}
void
CpuNonbondedForce
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec4
one
(
1.0
f
);
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
fx
=
dx
*
dEdR
;
fvec4
fy
=
dy
*
dEdR
;
fvec4
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForce
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec4
one
(
1.0
f
);
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
fx
=
dx
*
dEdR
;
fvec4
fy
=
dy
*
dEdR
;
fvec4
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
deltaR
=
posJ
-
posI
;
if
(
periodic
)
{
...
...
@@ -671,41 +457,15 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d
r2
=
dot3
(
deltaR
,
deltaR
);
}
void
CpuNonbondedForce
::
getDeltaR
(
const
float
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
fvec4
CpuNonbondedForce
::
erfcApprox
(
fvec4
x
)
{
float
CpuNonbondedForce
::
erfcApprox
(
float
x
)
{
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 3e-7.
f
vec4
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
f
loat
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
return
1.0
f
/
(
t
*
t
);
}
fvec4
CpuNonbondedForce
::
ewaldScaleFunction
(
fvec4
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
fvec4
x1
=
x
*
ewaldDXInv
;
ivec4
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
ewaldScaleTable
[
index
[
0
]]);
fvec4
t2
(
&
ewaldScaleTable
[
index
[
1
]]);
fvec4
t3
(
&
ewaldScaleTable
[
index
[
2
]]);
fvec4
t4
(
&
ewaldScaleTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
platforms/cpu/src/CpuNonbondedForceVec4.cpp
0 → 100644
View file @
d4af8747
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec4.h"
using
namespace
std
;
using
namespace
OpenMM
;
/**
* Factory method to create a CpuNonbondedForceVec4.
*/
CpuNonbondedForce
*
createCpuNonbondedForceVec4
()
{
return
new
CpuNonbondedForceVec4
();
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForceVec4 constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec4
::
CpuNonbondedForceVec4
()
{
}
void
CpuNonbondedForceVec4
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec4
one
(
1.0
f
);
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
fx
=
dx
*
dEdR
;
fvec4
fy
=
dy
*
dEdR
;
fvec4
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec4
one
(
1.0
f
);
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
fx
=
dx
*
dEdR
;
fvec4
fy
=
dy
*
dEdR
;
fvec4
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec4
::
getDeltaR
(
const
float
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
fvec4
CpuNonbondedForceVec4
::
erfcApprox
(
fvec4
x
)
{
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 3e-7.
fvec4
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
return
1.0
f
/
(
t
*
t
);
}
fvec4
CpuNonbondedForceVec4
::
ewaldScaleFunction
(
fvec4
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
fvec4
x1
=
x
*
ewaldDXInv
;
ivec4
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
ewaldScaleTable
[
index
[
0
]]);
fvec4
t2
(
&
ewaldScaleTable
[
index
[
1
]]);
fvec4
t3
(
&
ewaldScaleTable
[
index
[
2
]]);
fvec4
t4
(
&
ewaldScaleTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
platforms/cpu/src/CpuNonbondedForceVec8.cpp
View file @
d4af8747
...
...
@@ -22,430 +22,54 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <complex>
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec8.h"
#include "ReferenceForce.h"
#include "ReferencePME.h"
#include "openmm/internal/vectorize.h"
#include "gmx_atomic.h"
// In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined.
#include "openmm/internal/MSVC_erfc.h"
#include "openmm/internal/hardware.h"
using
namespace
std
;
using
namespace
OpenMM
;
const
float
CpuNonbondedForceVec8
::
TWO_OVER_SQRT_PI
=
(
float
)
(
2
/
sqrt
(
PI_M
));
const
int
CpuNonbondedForceVec8
::
NUM_TABLE_POINTS
=
2048
;
class
CpuNonbondedForceVec8
::
ComputeDirectTask
:
public
ThreadPool
::
Task
{
public:
ComputeDirectTask
(
CpuNonbondedForceVec8
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadComputeDirect
(
threads
,
threadIndex
);
}
CpuNonbondedForceVec8
&
owner
;
};
/**---------------------------------------------------------------------------------------
CpuNonbondedForceVec8 constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec8
::
CpuNonbondedForceVec8
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
),
tableIsValid
(
false
)
{
#ifndef __AVX__
bool
isVec8Supported
()
{
return
false
;
}
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
@param distance the cutoff distance
@param neighbors the neighbor list to use
@param solventDielectric the dielectric constant of the bulk solvent
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForceVec8
::
setUseCutoff
(
float
distance
,
const
CpuNeighborList
&
neighbors
,
float
solventDielectric
)
{
if
(
distance
!=
cutoffDistance
)
tableIsValid
=
false
;
cutoff
=
true
;
cutoffDistance
=
distance
;
neighborList
=
&
neighbors
;
krf
=
pow
(
cutoffDistance
,
-
3.0
f
)
*
(
solventDielectric
-
1.0
)
/
(
2.0
*
solventDielectric
+
1.0
);
crf
=
(
1.0
/
cutoffDistance
)
*
(
3.0
*
solventDielectric
)
/
(
2.0
*
solventDielectric
+
1.0
);
}
/**---------------------------------------------------------------------------------------
Set the force to use a switching function on the Lennard-Jones interaction.
@param distance the switching distance
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForceVec8
::
setUseSwitchingFunction
(
float
distance
)
{
useSwitch
=
true
;
switchingDistance
=
distance
;
CpuNonbondedForce
*
createCpuNonbondedForceVec8
()
{
throw
OpenMMException
(
"Internal error: OpenMM was compiled without AVX support"
);
}
#else
/**
* Check whether 8 component vectors are supported with the current CPU.
*/
bool
isVec8Supported
()
{
// Make sure the CPU supports AVX.
/**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has
also been set, and the smallest side of the periodic box is at least twice the cutoff
distance.
@param boxSize the X, Y, and Z widths of the periodic box
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForceVec8
::
setPeriodic
(
float
*
periodicBoxSize
)
{
assert
(
cutoff
);
assert
(
periodicBoxSize
[
0
]
>=
2
*
cutoffDistance
);
assert
(
periodicBoxSize
[
1
]
>=
2
*
cutoffDistance
);
assert
(
periodicBoxSize
[
2
]
>=
2
*
cutoffDistance
);
periodic
=
true
;
this
->
periodicBoxSize
[
0
]
=
periodicBoxSize
[
0
];
this
->
periodicBoxSize
[
1
]
=
periodicBoxSize
[
1
];
this
->
periodicBoxSize
[
2
]
=
periodicBoxSize
[
2
];
}
/**---------------------------------------------------------------------------------------
Set the force to use Ewald summation.
@param alpha the Ewald separation parameter
@param kmaxx the largest wave vector in the x direction
@param kmaxy the largest wave vector in the y direction
@param kmaxz the largest wave vector in the z direction
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForceVec8
::
setUseEwald
(
float
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
)
{
if
(
alpha
!=
alphaEwald
)
tableIsValid
=
false
;
alphaEwald
=
alpha
;
numRx
=
kmaxx
;
numRy
=
kmaxy
;
numRz
=
kmaxz
;
ewald
=
true
;
tabulateEwaldScaleFactor
();
}
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForceVec8
::
setUsePME
(
float
alpha
,
int
meshSize
[
3
])
{
if
(
alpha
!=
alphaEwald
)
tableIsValid
=
false
;
alphaEwald
=
alpha
;
meshDim
[
0
]
=
meshSize
[
0
];
meshDim
[
1
]
=
meshSize
[
1
];
meshDim
[
2
]
=
meshSize
[
2
];
pme
=
true
;
tabulateEwaldScaleFactor
();
}
void
CpuNonbondedForceVec8
::
tabulateEwaldScaleFactor
()
{
if
(
tableIsValid
)
return
;
tableIsValid
=
true
;
ewaldDX
=
cutoffDistance
/
NUM_TABLE_POINTS
;
ewaldDXInv
=
1.0
f
/
ewaldDX
;
ewaldScaleTable
.
resize
(
NUM_TABLE_POINTS
+
4
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
4
;
i
++
)
{
double
r
=
i
*
ewaldDX
;
double
alphaR
=
alphaEwald
*
r
;
ewaldScaleTable
[
i
]
=
erfc
(
alphaR
)
+
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
);
int
cpuInfo
[
4
];
cpuid
(
cpuInfo
,
0
);
if
(
cpuInfo
[
0
]
>=
1
)
{
cpuid
(
cpuInfo
,
1
);
return
((
cpuInfo
[
2
]
&
((
int
)
1
<<
28
))
!=
0
);
}
return
false
;
}
void
CpuNonbondedForceVec8
::
calculateReciprocalIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
vector
<
RealVec
>&
atomCoordinates
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
const
vector
<
set
<
int
>
>&
exclusions
,
vector
<
RealVec
>&
forces
,
double
*
totalEnergy
)
const
{
typedef
std
::
complex
<
float
>
d_complex
;
static
const
float
epsilon
=
1.0
;
int
kmax
=
(
ewald
?
std
::
max
(
numRx
,
std
::
max
(
numRy
,
numRz
))
:
0
);
float
factorEwald
=
-
1
/
(
4
*
alphaEwald
*
alphaEwald
);
float
TWO_PI
=
2.0
*
PI_M
;
float
recipCoeff
=
(
float
)(
ONE_4PI_EPS0
*
4
*
PI_M
/
(
periodicBoxSize
[
0
]
*
periodicBoxSize
[
1
]
*
periodicBoxSize
[
2
])
/
epsilon
);
if
(
pme
)
{
pme_t
pmedata
;
RealOpenMM
virial
[
3
][
3
];
pme_init
(
&
pmedata
,
alphaEwald
,
numberOfAtoms
,
meshDim
,
5
,
1
);
vector
<
RealOpenMM
>
charges
(
numberOfAtoms
);
for
(
int
i
=
0
;
i
<
numberOfAtoms
;
i
++
)
charges
[
i
]
=
posq
[
4
*
i
+
3
];
RealOpenMM
boxSize
[
3
]
=
{
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
]};
RealOpenMM
recipEnergy
=
0.0
;
pme_exec
(
pmedata
,
atomCoordinates
,
forces
,
charges
,
boxSize
,
&
recipEnergy
,
virial
);
if
(
totalEnergy
)
*
totalEnergy
+=
recipEnergy
;
pme_destroy
(
pmedata
);
}
// Ewald method
else
if
(
ewald
)
{
// setup reciprocal box
float
recipBoxSize
[
3
]
=
{
TWO_PI
/
periodicBoxSize
[
0
],
TWO_PI
/
periodicBoxSize
[
1
],
TWO_PI
/
periodicBoxSize
[
2
]};
// setup K-vectors
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
vector
<
d_complex
>
eir
(
kmax
*
numberOfAtoms
*
3
);
vector
<
d_complex
>
tab_xy
(
numberOfAtoms
);
vector
<
d_complex
>
tab_qxyz
(
numberOfAtoms
);
for
(
int
i
=
0
;
(
i
<
numberOfAtoms
);
i
++
)
{
float
*
pos
=
posq
+
4
*
i
;
for
(
int
m
=
0
;
(
m
<
3
);
m
++
)
EIR
(
0
,
i
,
m
)
=
d_complex
(
1
,
0
);
for
(
int
m
=
0
;
(
m
<
3
);
m
++
)
EIR
(
1
,
i
,
m
)
=
d_complex
(
cos
(
pos
[
m
]
*
recipBoxSize
[
m
]),
sin
(
pos
[
m
]
*
recipBoxSize
[
m
]));
for
(
int
j
=
2
;
(
j
<
kmax
);
j
++
)
for
(
int
m
=
0
;
(
m
<
3
);
m
++
)
EIR
(
j
,
i
,
m
)
=
EIR
(
j
-
1
,
i
,
m
)
*
EIR
(
1
,
i
,
m
);
}
// calculate reciprocal space energy and forces
int
lowry
=
0
;
int
lowrz
=
1
;
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
float
kx
=
rx
*
recipBoxSize
[
0
];
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
float
ky
=
ry
*
recipBoxSize
[
1
];
if
(
ry
>=
0
)
{
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
tab_xy
[
n
]
=
EIR
(
rx
,
n
,
0
)
*
EIR
(
ry
,
n
,
1
);
}
else
{
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
tab_xy
[
n
]
=
EIR
(
rx
,
n
,
0
)
*
conj
(
EIR
(
-
ry
,
n
,
1
));
}
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
if
(
rz
>=
0
)
{
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
tab_qxyz
[
n
]
=
posq
[
4
*
n
+
3
]
*
(
tab_xy
[
n
]
*
EIR
(
rz
,
n
,
2
));
}
else
{
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
tab_qxyz
[
n
]
=
posq
[
4
*
n
+
3
]
*
(
tab_xy
[
n
]
*
conj
(
EIR
(
-
rz
,
n
,
2
)));
}
float
cs
=
0.0
f
;
float
ss
=
0.0
f
;
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
{
cs
+=
tab_qxyz
[
n
].
real
();
ss
+=
tab_qxyz
[
n
].
imag
();
}
float
kz
=
rz
*
recipBoxSize
[
2
];
float
k2
=
kx
*
kx
+
ky
*
ky
+
kz
*
kz
;
float
ak
=
exp
(
k2
*
factorEwald
)
/
k2
;
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
{
float
force
=
ak
*
(
cs
*
tab_qxyz
[
n
].
imag
()
-
ss
*
tab_qxyz
[
n
].
real
());
forces
[
n
][
0
]
+=
2
*
recipCoeff
*
force
*
kx
;
forces
[
n
][
1
]
+=
2
*
recipCoeff
*
force
*
ky
;
forces
[
n
][
2
]
+=
2
*
recipCoeff
*
force
*
kz
;
}
if
(
totalEnergy
)
*
totalEnergy
+=
recipCoeff
*
ak
*
(
cs
*
cs
+
ss
*
ss
);
lowrz
=
1
-
numRz
;
}
lowry
=
1
-
numRy
;
}
}
}
/**
* Factory method to create a CpuNonbondedForceVec8.
*/
CpuNonbondedForce
*
createCpuNonbondedForceVec8
()
{
return
new
CpuNonbondedForceVec8
();
}
/**---------------------------------------------------------------------------------------
void
CpuNonbondedForceVec8
::
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
vector
<
RealVec
>&
atomCoordinates
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
const
vector
<
set
<
int
>
>&
exclusions
,
vector
<
AlignedArray
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
)
{
// Record the parameters for the threads.
this
->
numberOfAtoms
=
numberOfAtoms
;
this
->
posq
=
posq
;
this
->
atomCoordinates
=
&
atomCoordinates
[
0
];
this
->
atomParameters
=
&
atomParameters
[
0
];
this
->
exclusions
=
&
exclusions
[
0
];
this
->
threadForce
=
&
threadForce
;
includeEnergy
=
(
totalEnergy
!=
NULL
);
threadEnergy
.
resize
(
threads
.
getNumThreads
());
gmx_atomic_t
counter
;
gmx_atomic_set
(
&
counter
,
0
);
this
->
atomicCounter
=
&
counter
;
// Signal the threads to start running and wait for them to finish.
ComputeDirectTask
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
// Combine the energies from all the threads.
if
(
totalEnergy
!=
NULL
)
{
double
directEnergy
=
0
;
int
numThreads
=
threads
.
getNumThreads
();
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
directEnergy
+=
threadEnergy
[
i
];
*
totalEnergy
+=
directEnergy
;
}
}
CpuNonbondedForceVec8 constructor
void
CpuNonbondedForceVec8
::
threadComputeDirect
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Compute this thread's subset of interactions.
int
numThreads
=
threads
.
getNumThreads
();
threadEnergy
[
threadIndex
]
=
0
;
double
*
energyPtr
=
(
includeEnergy
?
&
threadEnergy
[
threadIndex
]
:
NULL
);
float
*
forces
=
&
(
*
threadForce
)[
threadIndex
][
0
];
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
((
1
/
periodicBoxSize
[
0
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
2
]),
0
);
if
(
ewald
||
pme
)
{
// Compute the interactions from the neighbor list.
while
(
true
)
{
int
nextBlock
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
nextBlock
>=
neighborList
->
getNumBlocks
())
break
;
calculateBlockEwaldIxn
(
nextBlock
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
}
--------------------------------------------------------------------------------------- */
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
((
1
/
periodicBoxSize
[
0
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
2
]),
0
);
for
(
int
i
=
threadIndex
;
i
<
numberOfAtoms
;
i
+=
numThreads
)
{
fvec4
posI
((
float
)
atomCoordinates
[
i
][
0
],
(
float
)
atomCoordinates
[
i
][
1
],
(
float
)
atomCoordinates
[
i
][
2
],
0.0
f
);
for
(
set
<
int
>::
const_iterator
iter
=
exclusions
[
i
].
begin
();
iter
!=
exclusions
[
i
].
end
();
++
iter
)
{
if
(
*
iter
>
i
)
{
int
j
=
*
iter
;
fvec4
deltaR
;
fvec4
posJ
((
float
)
atomCoordinates
[
j
][
0
],
(
float
)
atomCoordinates
[
j
][
1
],
(
float
)
atomCoordinates
[
j
][
2
],
0.0
f
);
float
r2
;
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
false
,
boxSize
,
invBoxSize
);
float
r
=
sqrtf
(
r2
);
float
inverseR
=
1
/
r
;
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]
*
posq
[
4
*
j
+
3
];
float
alphaR
=
alphaEwald
*
r
;
float
erfcAlphaR
=
erfcApprox
(
alphaR
).
lowerVec
()[
0
];
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erfcAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
fvec4
result
=
deltaR
*
dEdR
;
(
fvec4
(
forces
+
4
*
i
)
-
result
).
store
(
forces
+
4
*
i
);
(
fvec4
(
forces
+
4
*
j
)
+
result
).
store
(
forces
+
4
*
j
);
if
(
includeEnergy
)
threadEnergy
[
threadIndex
]
-=
chargeProd
*
inverseR
*
(
1.0
f
-
erfcAlphaR
);
}
}
}
}
else
if
(
cutoff
)
{
// Compute the interactions from the neighbor list.
while
(
true
)
{
int
nextBlock
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
nextBlock
>=
neighborList
->
getNumBlocks
())
break
;
calculateBlockIxn
(
nextBlock
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
}
}
else
{
// Loop over all atom pairs
while
(
true
)
{
int
i
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
i
>=
numberOfAtoms
)
break
;
for
(
int
j
=
i
+
1
;
j
<
numberOfAtoms
;
j
++
)
if
(
exclusions
[
j
].
find
(
i
)
==
exclusions
[
j
].
end
())
calculateOneIxn
(
i
,
j
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
}
}
CpuNonbondedForceVec8
::
CpuNonbondedForceVec8
()
{
}
void
CpuNonbondedForceVec8
::
calculateOneIxn
(
int
ii
,
int
jj
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// get deltaR, R2, and R between 2 atoms
fvec4
deltaR
;
fvec4
posI
(
posq
+
4
*
ii
);
fvec4
posJ
(
posq
+
4
*
jj
);
float
r2
;
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
periodic
,
boxSize
,
invBoxSize
);
if
(
cutoff
&&
r2
>=
cutoffDistance
*
cutoffDistance
)
return
;
float
r
=
sqrtf
(
r2
);
float
inverseR
=
1
/
r
;
float
switchValue
=
1
,
switchDeriv
=
0
;
if
(
useSwitch
&&
r
>
switchingDistance
)
{
float
t
=
(
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
);
switchValue
=
1
+
t
*
t
*
t
*
(
-
10
+
t
*
(
15
-
t
*
6
));
switchDeriv
=
t
*
t
*
(
-
30
+
t
*
(
60
-
t
*
30
))
/
(
cutoffDistance
-
switchingDistance
);
}
float
sig
=
atomParameters
[
ii
].
first
+
atomParameters
[
jj
].
first
;
float
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
eps
=
atomParameters
[
ii
].
second
*
atomParameters
[
jj
].
second
;
float
dEdR
=
switchValue
*
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
ii
+
3
]
*
posq
[
4
*
jj
+
3
];
if
(
cutoff
)
dEdR
+=
(
float
)
(
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
));
else
dEdR
+=
(
float
)
(
chargeProd
*
inverseR
);
dEdR
*=
inverseR
*
inverseR
;
float
energy
=
eps
*
(
sig6
-
1.0
f
)
*
sig6
;
if
(
useSwitch
)
{
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
}
// accumulate energies
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
(
float
)
(
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
));
else
energy
+=
(
float
)
(
chargeProd
*
inverseR
);
*
totalEnergy
+=
energy
;
}
// accumulate forces
fvec4
result
=
deltaR
*
dEdR
;
(
fvec4
(
forces
+
4
*
ii
)
+
result
).
store
(
forces
+
4
*
ii
);
(
fvec4
(
forces
+
4
*
jj
)
-
result
).
store
(
forces
+
4
*
jj
);
}
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
...
...
@@ -660,15 +284,6 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
deltaR
=
posJ
-
posI
;
if
(
periodic
)
{
fvec4
base
=
round
(
deltaR
*
invBoxSize
)
*
boxSize
;
deltaR
=
deltaR
-
base
;
}
r2
=
dot3
(
deltaR
,
deltaR
);
}
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
float
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
...
...
@@ -714,3 +329,4 @@ fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(fvec8 x) {
transpose
(
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
s1
,
s2
,
s3
,
s4
);
return
coeff1
*
s1
+
coeff2
*
s2
;
}
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment