Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
588a4c9d
Commit
588a4c9d
authored
Dec 10, 2013
by
peastman
Browse files
Merged changes from main branch
parents
bbf3de23
ff6af025
Changes
77
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2173 additions
and
212 deletions
+2173
-212
CMakeLists.txt
CMakeLists.txt
+14
-10
examples/CMakeLists.txt
examples/CMakeLists.txt
+17
-11
openmmapi/include/openmm/internal/vectorize.h
openmmapi/include/openmm/internal/vectorize.h
+12
-0
platforms/cpu/CMakeLists.txt
platforms/cpu/CMakeLists.txt
+0
-4
platforms/cpu/include/AlignedArray.h
platforms/cpu/include/AlignedArray.h
+104
-0
platforms/cpu/include/CpuGBSAOBCForce.h
platforms/cpu/include/CpuGBSAOBCForce.h
+6
-2
platforms/cpu/include/CpuNeighborList.h
platforms/cpu/include/CpuNeighborList.h
+2
-1
platforms/cpu/include/CpuNonbondedForce.h
platforms/cpu/include/CpuNonbondedForce.h
+5
-2
platforms/cpu/include/CpuPlatform.h
platforms/cpu/include/CpuPlatform.h
+3
-2
platforms/cpu/include/CpuSETTLE.h
platforms/cpu/include/CpuSETTLE.h
+78
-0
platforms/cpu/sharedTarget/CMakeLists.txt
platforms/cpu/sharedTarget/CMakeLists.txt
+2
-3
platforms/cpu/src/CpuGBSAOBCForce.cpp
platforms/cpu/src/CpuGBSAOBCForce.cpp
+49
-45
platforms/cpu/src/CpuKernels.cpp
platforms/cpu/src/CpuKernels.cpp
+6
-6
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+38
-13
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+125
-108
platforms/cpu/src/CpuPlatform.cpp
platforms/cpu/src/CpuPlatform.cpp
+9
-2
platforms/cpu/src/CpuSETTLE.cpp
platforms/cpu/src/CpuSETTLE.cpp
+104
-0
platforms/cpu/src/gmx_atomic.h
platforms/cpu/src/gmx_atomic.h
+1596
-0
platforms/cpu/tests/CMakeLists.txt
platforms/cpu/tests/CMakeLists.txt
+1
-2
platforms/cpu/tests/TestCpuNeighborList.cpp
platforms/cpu/tests/TestCpuNeighborList.cpp
+2
-1
No files found.
CMakeLists.txt
View file @
588a4c9d
...
...
@@ -105,16 +105,20 @@ ELSE( CMAKE_SIZEOF_VOID_P EQUAL 8 )
SET
(
LIB64
)
ENDIF
(
CMAKE_SIZEOF_VOID_P EQUAL 8
)
# Build universal binaries compatible with OS X 10.7
IF
(
APPLE AND NOT CMAKE_OSX_DEPLOYMENT_TARGET
)
SET
(
CMAKE_OSX_ARCHITECTURES
"i386;x86_64"
CACHE STRING
"The processor architectures to build for"
FORCE
)
SET
(
CMAKE_OSX_DEPLOYMENT_TARGET
"10.7"
CACHE STRING
"The minimum version of OS X to support"
FORCE
)
ENDIF
(
APPLE AND NOT CMAKE_OSX_DEPLOYMENT_TARGET
)
# Improve the linking behavior of Mac libraries
IF
(
APPLE
)
# Build universal binaries compatible with OS X 10.7
IF
(
NOT CMAKE_OSX_DEPLOYMENT_TARGET
)
SET
(
CMAKE_OSX_DEPLOYMENT_TARGET
"10.7"
CACHE STRING
"The minimum version of OS X to support"
FORCE
)
ENDIF
(
NOT CMAKE_OSX_DEPLOYMENT_TARGET
)
IF
(
NOT CMAKE_OSX_ARCHITECTURES
)
SET
(
CMAKE_OSX_ARCHITECTURES
"i386;x86_64"
CACHE STRING
"The processor architectures to build for"
FORCE
)
ENDIF
(
NOT CMAKE_OSX_ARCHITECTURES
)
# Improve the linking behavior of Mac libraries
SET
(
CMAKE_INSTALL_NAME_DIR
"@rpath"
)
SET_PROPERTY
(
GLOBAL PROPERTY COMPILE_FLAGS
"-stdlib=libc++ -mmacosx-version-min=10.7"
)
SET
(
EXTRA_COMPILE_FLAGS
"-stdlib=libc++"
)
ELSE
(
APPLE
)
SET
(
EXTRA_COMPILE_FLAGS
)
ENDIF
(
APPLE
)
IF
(
UNIX AND NOT CMAKE_BUILD_TYPE
)
...
...
@@ -266,7 +270,7 @@ ENDIF(OPENMM_BUILD_C_AND_FORTRAN_WRAPPERS)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES
COMPILE_FLAGS
"
-DOPENMM_BUILDING_SHARED_LIBRARY -DLEPTON_BUILDING_SHARED_LIBRARY -DOPENMM_VALIDATE_BUILDING_SHARED_LIBRARY"
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES
LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_BUILDING_SHARED_LIBRARY -DLEPTON_BUILDING_SHARED_LIBRARY -DOPENMM_VALIDATE_BUILDING_SHARED_LIBRARY"
)
IF
(
WIN32
)
ADD_DEPENDENCIES
(
${
SHARED_TARGET
}
PthreadsLibraries
)
ENDIF
(
WIN32
)
...
...
@@ -274,7 +278,7 @@ ENDIF(WIN32)
SET
(
OPENMM_BUILD_STATIC_LIB OFF CACHE BOOL
"Whether to build static OpenMM libraries"
)
IF
(
OPENMM_BUILD_STATIC_LIB
)
ADD_LIBRARY
(
${
STATIC_TARGET
}
STATIC
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES
COMPILE_FLAGS
"
-DOPENMM_USE_STATIC_LIBRARIES -DOPENMM_BUILDING_STATIC_LIBRARY -DLEPTON_USE_STATIC_LIBRARIES -DLEPTON_BUILDING_STATIC_LIBRARY -DOPENMMM_VALIDATE_BUILDING_STATIC_LIBRARY -DOPENMM_VALIDATE_BUILDING_STATIC_LIBRARY"
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES
LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_USE_STATIC_LIBRARIES -DOPENMM_BUILDING_STATIC_LIBRARY -DLEPTON_USE_STATIC_LIBRARIES -DLEPTON_BUILDING_STATIC_LIBRARY -DOPENMMM_VALIDATE_BUILDING_STATIC_LIBRARY -DOPENMM_VALIDATE_BUILDING_STATIC_LIBRARY"
)
ENDIF
(
OPENMM_BUILD_STATIC_LIB
)
IF
(
OPENMM_BUILD_C_AND_FORTRAN_WRAPPERS
)
...
...
examples/CMakeLists.txt
View file @
588a4c9d
...
...
@@ -26,14 +26,16 @@ SET(BUILD_TESTING_STATIC OFF)
IF
(
OPENMM_BUILD_STATIC_LIB
)
SET
(
BUILD_TESTING_STATIC ON
)
ENDIF
(
OPENMM_BUILD_STATIC_LIB
)
FOREACH
(
EX_ROOT
${
CPP_EXAMPLES
}
)
IF
(
BUILD_TESTING_SHARED
)
# Link with shared library
ADD_EXECUTABLE
(
${
EX_ROOT
}
${
EX_ROOT
}
.cpp
)
SET_TARGET_PROPERTIES
(
${
EX_ROOT
}
PROPERTIES
PROJECT_LABEL
"Example -
${
EX_ROOT
}
"
)
PROPERTIES
PROJECT_LABEL
"Example -
${
EX_ROOT
}
"
LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
TARGET_LINK_LIBRARIES
(
${
EX_ROOT
}
${
SHARED_TARGET
}
)
ENDIF
(
BUILD_TESTING_SHARED
)
...
...
@@ -42,9 +44,10 @@ FOREACH(EX_ROOT ${CPP_EXAMPLES})
SET
(
EX_STATIC
${
EX_ROOT
}
Static
)
ADD_EXECUTABLE
(
${
EX_STATIC
}
${
EX_ROOT
}
.cpp
)
SET_TARGET_PROPERTIES
(
${
EX_STATIC
}
PROPERTIES
COMPILE_FLAGS
"-DOPENMM_USE_STATIC_LIBRARIES"
PROJECT_LABEL
"Example -
${
EX_STATIC
}
"
)
PROPERTIES
PROJECT_LABEL
"Example -
${
EX_STATIC
}
"
LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_USE_STATIC_LIBRARIES"
)
TARGET_LINK_LIBRARIES
(
${
EX_STATIC
}
${
STATIC_TARGET
}
)
ENDIF
(
BUILD_TESTING_STATIC
)
...
...
@@ -63,8 +66,10 @@ IF(OPENMM_BUILD_C_AND_FORTRAN_WRAPPERS)
# C++ libraries on the link line.
ADD_EXECUTABLE
(
${
EX_ROOT
}
${
EX_ROOT
}
.c Empty.cpp
)
SET_TARGET_PROPERTIES
(
${
EX_ROOT
}
PROPERTIES
PROJECT_LABEL
"Example C -
${
EX_ROOT
}
"
)
PROPERTIES
PROJECT_LABEL
"Example C -
${
EX_ROOT
}
"
LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
TARGET_LINK_LIBRARIES
(
${
EX_ROOT
}
${
SHARED_TARGET
}
)
ADD_DEPENDENCIES
(
${
EX_ROOT
}
ApiWrappers
)
ENDIF
(
BUILD_TESTING_SHARED
)
...
...
@@ -76,9 +81,10 @@ IF(OPENMM_BUILD_C_AND_FORTRAN_WRAPPERS)
# C++ libraries on the static link line.
ADD_EXECUTABLE
(
${
EX_STATIC
}
${
EX_ROOT
}
.c Empty.cpp
)
SET_TARGET_PROPERTIES
(
${
EX_STATIC
}
PROPERTIES
COMPILE_FLAGS
"-DOPENMM_USE_STATIC_LIBRARIES"
PROJECT_LABEL
"Example C -
${
EX_STATIC
}
"
)
PROPERTIES
PROJECT_LABEL
"Example C -
${
EX_STATIC
}
"
LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_USE_STATIC_LIBRARIES"
)
TARGET_LINK_LIBRARIES
(
${
EX_STATIC
}
${
STATIC_TARGET
}
)
ADD_DEPENDENCIES
(
${
EX_STATIC
}
ApiWrappers
)
ENDIF
(
BUILD_TESTING_STATIC
)
...
...
openmmapi/include/openmm/internal/vectorize.h
View file @
588a4c9d
...
...
@@ -91,6 +91,9 @@ public:
fvec4
operator
&
(
fvec4
other
)
const
{
return
_mm_and_ps
(
val
,
other
);
}
fvec4
operator
|
(
fvec4
other
)
const
{
return
_mm_or_ps
(
val
,
other
);
}
fvec4
operator
==
(
fvec4
other
)
const
{
return
_mm_cmpeq_ps
(
val
,
other
);
}
...
...
@@ -157,6 +160,9 @@ public:
ivec4
operator
&
(
ivec4
other
)
const
{
return
_mm_and_si128
(
val
,
other
);
}
ivec4
operator
|
(
ivec4
other
)
const
{
return
_mm_or_si128
(
val
,
other
);
}
ivec4
operator
==
(
ivec4
other
)
const
{
return
_mm_cmpeq_epi32
(
val
,
other
);
}
...
...
@@ -267,5 +273,11 @@ static inline fvec4 operator/(float v1, fvec4 v2) {
return
fvec4
(
v1
)
/
v2
;
}
// Operations for blending fvec4s based on an ivec4.
static
inline
fvec4
blend
(
fvec4
v1
,
fvec4
v2
,
ivec4
mask
)
{
return
fvec4
(
_mm_blendv_ps
(
v1
.
val
,
v2
.
val
,
_mm_castsi128_ps
(
mask
.
val
)));
}
#endif
/*OPENMM_VECTORIZE_H_*/
platforms/cpu/CMakeLists.txt
View file @
588a4c9d
...
...
@@ -14,10 +14,6 @@
# libOpenMMCPU_static[_d].a
#----------------------------------------------------
IF
(
APPLE
)
SET
(
CMAKE_OSX_DEPLOYMENT_TARGET
"10.6"
)
ENDIF
(
APPLE
)
SUBDIRS
(
tests
)
# The source is organized into subdirectories, but we handle them all from
...
...
platforms/cpu/include/AlignedArray.h
0 → 100644
View file @
588a4c9d
#ifndef OPENMM_ALIGNEDARRAY_H_
#define OPENMM_ALIGNEDARRAY_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
namespace
OpenMM
{
/**
* This class represents an array in memory whose starting point is guaranteed to
* be aligned with a 16 byte boundary. This can improve the performance of vectorized
* code, since loads and stores are more efficient.
*/
template
<
class
T
>
class
AlignedArray
{
public:
/**
* Default constructor, to allow AlignedArrays to be used inside collections.
*/
AlignedArray
()
:
dataSize
(
0
),
baseData
(
0
),
data
(
0
)
{
}
/**
* Create an Aligned array that contains a specified number of elements.
*/
AlignedArray
(
int
size
)
{
allocate
(
size
);
}
~
AlignedArray
()
{
if
(
baseData
!=
0
)
delete
[]
baseData
;
}
/**
* Get the number of elements in the array.
*/
int
size
()
const
{
return
dataSize
;
}
/**
* Change the size of the array. This may cause all contents to be lost.
*/
void
resize
(
int
size
)
{
if
(
dataSize
==
size
)
return
;
if
(
baseData
!=
0
)
delete
[]
baseData
;
allocate
(
size
);
}
/**
* Get a reference to an element of the array.
*/
T
&
operator
[](
int
i
)
{
return
data
[
i
];
}
/**
* Get a const reference to an element of the array.
*/
const
T
&
operator
[](
int
i
)
const
{
return
data
[
i
];
}
private:
void
allocate
(
int
size
)
{
dataSize
=
size
;
baseData
=
new
char
[
size
*
sizeof
(
T
)
+
16
];
char
*
offsetData
=
baseData
+
15
;
offsetData
-=
(
long
long
)
offsetData
&
0xF
;
data
=
(
T
*
)
offsetData
;
}
int
dataSize
;
char
*
baseData
;
T
*
data
;
};
}
// namespace OpenMM
#endif
/*OPENMM_ALIGNEDARRAY_H_*/
platforms/cpu/include/CpuGBSAOBCForce.h
View file @
588a4c9d
...
...
@@ -25,6 +25,7 @@
#ifndef OPENMM_CPU_GBSAOBC_FORCE_H__
#define OPENMM_CPU_GBSAOBC_FORCE_H__
#include "AlignedArray.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h"
#include <set>
...
...
@@ -84,7 +85,7 @@ public:
* @param totalEnergy total energy
* @param threads the thread pool to use
*/
void
computeForce
(
const
std
::
vector
<
float
>&
posq
,
std
::
vector
<
std
::
vector
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
);
void
computeForce
(
const
AlignedArray
<
float
>&
posq
,
std
::
vector
<
AlignedArray
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
);
/**
* This routine contains the code executed by each thread.
...
...
@@ -105,10 +106,13 @@ private:
float
logDX
,
logDXInv
;
// The following variables are used to make information accessible to the individual threads.
float
const
*
posq
;
std
::
vector
<
std
::
vector
<
float
>
>*
threadForce
;
std
::
vector
<
AlignedArray
<
float
>
>*
threadForce
;
bool
includeEnergy
;
void
*
atomicCounter
;
static
const
int
NUM_TABLE_POINTS
;
static
const
float
TABLE_MIN
;
static
const
float
TABLE_MAX
;
/**
* Compute the displacement and squared distance between a collection of points, optionally using
...
...
platforms/cpu/include/CpuNeighborList.h
View file @
588a4c9d
...
...
@@ -32,6 +32,7 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "AlignedArray.h"
#include "windowsExportCpu.h"
#include "openmm/internal/ThreadPool.h"
#include <set>
...
...
@@ -46,7 +47,7 @@ public:
class
Voxels
;
static
const
int
BlockSize
;
CpuNeighborList
();
void
computeNeighborList
(
int
numAtoms
,
const
std
::
vector
<
float
>&
atomLocations
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
void
computeNeighborList
(
int
numAtoms
,
const
AlignedArray
<
float
>&
atomLocations
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
);
int
getNumBlocks
()
const
;
const
std
::
vector
<
int
>&
getSortedAtoms
()
const
;
...
...
platforms/cpu/include/CpuNonbondedForce.h
View file @
588a4c9d
...
...
@@ -25,6 +25,7 @@
#ifndef OPENMM_CPU_NONBONDED_FORCE_H__
#define OPENMM_CPU_NONBONDED_FORCE_H__
#include "AlignedArray.h"
#include "CpuNeighborList.h"
#include "ReferencePairIxn.h"
#include "openmm/internal/ThreadPool.h"
...
...
@@ -143,7 +144,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
std
::
vector
<
RealVec
>&
atomCoordinates
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
std
::
vector
<
std
::
vector
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
);
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
std
::
vector
<
AlignedArray
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
);
/**
* This routine contains the code executed by each thread.
...
...
@@ -156,6 +157,7 @@ private:
bool
periodic
;
bool
ewald
;
bool
pme
;
bool
tableIsValid
;
const
CpuNeighborList
*
neighborList
;
float
periodicBoxSize
[
3
];
float
cutoffDistance
,
switchingDistance
;
...
...
@@ -172,8 +174,9 @@ private:
RealVec
const
*
atomCoordinates
;
std
::
pair
<
float
,
float
>
const
*
atomParameters
;
std
::
set
<
int
>
const
*
exclusions
;
std
::
vector
<
std
::
vector
<
float
>
>*
threadForce
;
std
::
vector
<
AlignedArray
<
float
>
>*
threadForce
;
bool
includeEnergy
;
void
*
atomicCounter
;
static
const
float
TWO_OVER_SQRT_PI
;
static
const
int
NUM_TABLE_POINTS
;
...
...
platforms/cpu/include/CpuPlatform.h
View file @
588a4c9d
...
...
@@ -32,6 +32,7 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "AlignedArray.h"
#include "ReferencePlatform.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/ThreadPool.h"
...
...
@@ -69,8 +70,8 @@ private:
class
CpuPlatform
::
PlatformData
{
public:
PlatformData
(
int
numParticles
);
std
::
vector
<
float
>
posq
;
std
::
vector
<
std
::
vector
<
float
>
>
threadForce
;
AlignedArray
<
float
>
posq
;
std
::
vector
<
AlignedArray
<
float
>
>
threadForce
;
ThreadPool
threads
;
bool
isPeriodic
;
};
...
...
platforms/cpu/include/CpuSETTLE.h
0 → 100644
View file @
588a4c9d
#ifndef OPENMM_CPUSETTLE_H_
#define OPENMM_CPUSETTLE_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "ReferenceSETTLEAlgorithm.h"
#include "openmm/System.h"
#include "openmm/internal/ThreadPool.h"
#include <vector>
namespace
OpenMM
{
/**
* This class uses multiple ReferenceSETTLEAlgorithm objects to execute the algorithm in parallel.
*/
class
OPENMM_EXPORT
CpuSETTLE
:
public
ReferenceConstraintAlgorithm
{
public:
class
ApplyToPositionsTask
;
class
ApplyToVelocitiesTask
;
CpuSETTLE
(
const
System
&
system
,
const
ReferenceSETTLEAlgorithm
&
settle
,
ThreadPool
&
threads
);
~
CpuSETTLE
();
/**
* Apply the constraint algorithm.
*
* @param atomCoordinates the original atom coordinates
* @param atomCoordinatesP the new atom coordinates
* @param inverseMasses 1/mass
* @param tolerance the constraint tolerance
*/
void
apply
(
std
::
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
std
::
vector
<
OpenMM
::
RealVec
>&
atomCoordinatesP
,
std
::
vector
<
RealOpenMM
>&
inverseMasses
,
RealOpenMM
tolerance
);
/**
* Apply the constraint algorithm to velocities.
*
* @param atomCoordinates the atom coordinates
* @param atomCoordinatesP the velocities to modify
* @param inverseMasses 1/mass
* @param tolerance the constraint tolerance
*/
void
applyToVelocities
(
std
::
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
std
::
vector
<
OpenMM
::
RealVec
>&
velocities
,
std
::
vector
<
RealOpenMM
>&
inverseMasses
,
RealOpenMM
tolerance
);
private:
std
::
vector
<
ReferenceSETTLEAlgorithm
*>
threadSettle
;
ThreadPool
&
threads
;
};
}
// namespace OpenMM
#endif
/*OPENMM_CPUSETTLE_H_*/
platforms/cpu/sharedTarget/CMakeLists.txt
View file @
588a4c9d
GET_PROPERTY
(
COMPILE_FLAGS GLOBAL PROPERTY COMPILE_FLAGS
)
SET_SOURCE_FILES_PROPERTIES
(
${
SOURCE_FILES
}
PROPERTIES COMPILE_FLAGS
"
${
COMPILE_FLAGS
}
-msse4.1"
)
SET_SOURCE_FILES_PROPERTIES
(
${
SOURCE_FILES
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
IF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
...
...
@@ -8,6 +7,6 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
MAIN_OPENMM_LIB
}
${
PTHREADS_LIB
}
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES
COMPILE_FLAGS
"
-DOPENMM_CPU_BUILDING_SHARED_LIBRARY"
LINK_FLAGS
"
${
COMPILE_FLAGS
}
"
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES
LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_CPU_BUILDING_SHARED_LIBRARY"
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_TARGET
}
)
platforms/cpu/src/CpuGBSAOBCForce.cpp
View file @
588a4c9d
...
...
@@ -24,14 +24,16 @@
#include "CpuGBSAOBCForce.h"
#include "SimTKOpenMMRealType.h"
#include "openmm/internal/SplineFitter.h"
#include "openmm/internal/vectorize.h"
#include "gmx_atomic.h"
#include <cmath>
using
namespace
std
;
using
namespace
OpenMM
;
const
int
CpuGBSAOBCForce
::
NUM_TABLE_POINTS
=
1025
;
const
int
CpuGBSAOBCForce
::
NUM_TABLE_POINTS
=
4096
;
const
float
CpuGBSAOBCForce
::
TABLE_MIN
=
0.25
f
;
const
float
CpuGBSAOBCForce
::
TABLE_MAX
=
1.5
f
;
class
CpuGBSAOBCForce
::
ComputeTask
:
public
ThreadPool
::
Task
{
public:
...
...
@@ -44,22 +46,12 @@ public:
};
CpuGBSAOBCForce
::
CpuGBSAOBCForce
()
:
cutoff
(
false
),
periodic
(
false
)
{
logDX
=
0.5
/
NUM_TABLE_POINTS
;
logDX
=
(
TABLE_MAX
-
TABLE_MIN
)
/
NUM_TABLE_POINTS
;
logDXInv
=
1.0
f
/
logDX
;
vector
<
double
>
x
(
NUM_TABLE_POINTS
+
1
);
vector
<
double
>
y
(
NUM_TABLE_POINTS
+
1
);
vector
<
double
>
deriv
;
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
1
;
i
++
)
{
x
[
i
]
=
0.5
+
i
*
0.5
/
NUM_TABLE_POINTS
;
y
[
i
]
=
log
(
x
[
i
]);
}
SplineFitter
::
createNaturalSpline
(
x
,
y
,
deriv
);
logTable
.
resize
(
4
*
NUM_TABLE_POINTS
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
;
i
++
)
{
logTable
[
4
*
i
]
=
(
float
)
y
[
i
];
logTable
[
4
*
i
+
1
]
=
(
float
)
y
[
i
+
1
];
logTable
[
4
*
i
+
2
]
=
(
float
)
(
deriv
[
i
]
*
logDX
*
logDX
/
6
);
logTable
[
4
*
i
+
3
]
=
(
float
)
(
deriv
[
i
+
1
]
*
logDX
*
logDX
/
6
);
logTable
.
resize
(
NUM_TABLE_POINTS
+
4
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
4
;
i
++
)
{
double
x
=
TABLE_MIN
+
i
*
logDX
;
logTable
[
i
]
=
log
(
x
);
}
}
...
...
@@ -93,7 +85,7 @@ void CpuGBSAOBCForce::setParticleParameters(const std::vector<std::pair<float, f
obcChain
.
resize
(
params
.
size
()
+
3
);
}
void
CpuGBSAOBCForce
::
computeForce
(
const
std
::
vector
<
float
>&
posq
,
vector
<
vector
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
)
{
void
CpuGBSAOBCForce
::
computeForce
(
const
AlignedArray
<
float
>&
posq
,
vector
<
AlignedArray
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
)
{
// Record the parameters for the threads.
this
->
posq
=
&
posq
[
0
];
...
...
@@ -104,16 +96,22 @@ void CpuGBSAOBCForce::computeForce(const std::vector<float>& posq, vector<vector
threadBornForces
.
resize
(
numThreads
);
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
threadBornForces
[
i
].
resize
(
particleParams
.
size
()
+
3
);
gmx_atomic_t
counter
;
this
->
atomicCounter
=
&
counter
;
// Signal the threads to start running and wait for them to finish.
ComputeTask
task
(
*
this
);
gmx_atomic_set
(
&
counter
,
0
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
// Compute Born radii
gmx_atomic_set
(
&
counter
,
0
);
threads
.
resumeThreads
();
threads
.
waitForThreads
();
// Compute surface area term
gmx_atomic_set
(
&
counter
,
0
);
threads
.
resumeThreads
();
threads
.
waitForThreads
();
// First loop
gmx_atomic_set
(
&
counter
,
0
);
threads
.
resumeThreads
();
threads
.
waitForThreads
();
// Second loop
...
...
@@ -141,8 +139,11 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
// Calculate Born radii
for
(
int
blockStart
=
start
;
blockStart
<
end
;
blockStart
+=
4
)
{
int
numInBlock
=
min
(
4
,
end
-
blockStart
);
while
(
true
)
{
int
blockStart
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
4
);
if
(
blockStart
>=
numParticles
)
break
;
int
numInBlock
=
min
(
4
,
numParticles
-
blockStart
);
ivec4
blockAtomIndex
(
blockStart
,
blockStart
+
1
,
blockStart
+
2
,
blockStart
+
3
);
float
atomRadius
[
4
],
atomx
[
4
],
atomy
[
4
],
atomz
[
4
];
int
blockMask
[
4
]
=
{
0
,
0
,
0
,
0
};
...
...
@@ -213,7 +214,10 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
vector
<
float
>&
bornForces
=
threadBornForces
[
threadIndex
];
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
bornForces
[
i
]
=
0.0
f
;
for
(
int
atomI
=
start
;
atomI
<
end
;
atomI
++
)
{
while
(
true
)
{
int
atomI
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
atomI
>=
numParticles
)
break
;
if
(
bornRadii
[
atomI
]
>
0
)
{
float
radiusI
=
particleParams
[
atomI
].
first
+
dielectricOffset
;
float
r
=
radiusI
+
probeRadius
;
...
...
@@ -235,8 +239,11 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
preFactor
=
ONE_4PI_EPS0
*
((
1.0
f
/
solventDielectric
)
-
(
1.0
f
/
soluteDielectric
));
else
preFactor
=
0.0
f
;
for
(
int
blockStart
=
start
;
blockStart
<
end
;
blockStart
+=
4
)
{
int
numInBlock
=
min
(
4
,
end
-
blockStart
);
while
(
true
)
{
int
blockStart
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
4
);
if
(
blockStart
>=
numParticles
)
break
;
int
numInBlock
=
min
(
4
,
numParticles
-
blockStart
);
ivec4
blockAtomIndex
(
blockStart
,
blockStart
+
1
,
blockStart
+
2
,
blockStart
+
3
);
float
atomCharge
[
4
],
atomx
[
4
],
atomy
[
4
],
atomz
[
4
];
int
blockMask
[
4
]
=
{
0
,
0
,
0
,
0
};
...
...
@@ -303,13 +310,16 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
// Second loop of Born energy computation.
for
(
int
blockStart
=
start
;
blockStart
<
end
;
blockStart
+=
4
)
{
while
(
true
)
{
int
blockStart
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
4
);
if
(
blockStart
>=
numParticles
)
break
;
fvec4
bornForce
(
0.0
f
);
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
bornForce
+=
fvec4
(
&
threadBornForces
[
i
][
blockStart
]);
fvec4
radii
(
&
bornRadii
[
blockStart
]);
bornForce
*=
radii
*
radii
*
fvec4
(
&
obcChain
[
blockStart
]);
int
numInBlock
=
min
(
4
,
end
-
blockStart
);
int
numInBlock
=
min
(
4
,
numParticles
-
blockStart
);
ivec4
blockAtomIndex
(
blockStart
,
blockStart
+
1
,
blockStart
+
2
,
blockStart
+
3
);
float
atomRadius
[
4
],
atomx
[
4
],
atomy
[
4
],
atomz
[
4
];
int
blockMask
[
4
]
=
{
0
,
0
,
0
,
0
};
...
...
@@ -351,14 +361,13 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
fvec4
logRatio
=
fastLog
(
u_ij
/
l_ij
);
fvec4
t3
=
0.125
f
*
(
1.0
f
+
scaledRadiusJ2
*
r2Inverse
)
*
(
l_ij2
-
u_ij2
)
+
0.25
f
*
logRatio
*
r2Inverse
;
fvec4
de
=
bornForce
*
t3
*
rInverse
;
de
=
blend
(
0.0
f
,
de
,
include
);
fvec4
result
[
4
]
=
{
dx
*
de
,
dy
*
de
,
dz
*
de
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atomJ
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
}
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
}
atomForce
.
store
(
forces
+
4
*
atomJ
);
}
...
...
@@ -385,21 +394,16 @@ void CpuGBSAOBCForce::getDeltaR(const fvec4& posI, const fvec4& x, const fvec4&
fvec4
CpuGBSAOBCForce
::
fastLog
(
fvec4
x
)
{
// Evaluate log(x) using a lookup table for speed.
float
y
[
4
];
fvec4
x1
=
(
x
-
0.5
f
)
*
logDXInv
;
if
(
any
((
x
<
TABLE_MIN
)
|
(
x
>=
TABLE_MAX
)))
return
fvec4
(
logf
(
x
[
0
]),
logf
(
x
[
1
]),
logf
(
x
[
2
]),
logf
(
x
[
3
]));
fvec4
x1
=
(
x
-
TABLE_MIN
)
*
logDXInv
;
ivec4
index
=
floor
(
x1
);
fvec4
coeff
[
4
];
coeff
[
1
]
=
x1
-
index
;
coeff
[
0
]
=
1.0
f
-
coeff
[
1
];
coeff
[
2
]
=
coeff
[
0
]
*
coeff
[
0
]
*
coeff
[
0
]
-
coeff
[
0
];
coeff
[
3
]
=
coeff
[
1
]
*
coeff
[
1
]
*
coeff
[
1
]
-
coeff
[
1
];
transpose
(
coeff
[
0
],
coeff
[
1
],
coeff
[
2
],
coeff
[
3
]);
static
float
maxdiff
=
0.0
f
;
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
if
(
index
[
i
]
>=
0
&&
index
[
i
]
<
NUM_TABLE_POINTS
)
y
[
i
]
=
dot4
(
coeff
[
i
],
fvec4
(
&
logTable
[
4
*
index
[
i
]]));
else
y
[
i
]
=
logf
(
x
[
i
]);
}
return
fvec4
(
y
);
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
logTable
[
index
[
0
]]);
fvec4
t2
(
&
logTable
[
index
[
1
]]);
fvec4
t3
(
&
logTable
[
index
[
2
]]);
fvec4
t4
(
&
logTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
platforms/cpu/src/CpuKernels.cpp
View file @
588a4c9d
...
...
@@ -81,16 +81,16 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
// Convert the positions to single precision and apply periodic boundary conditions
vector
<
float
>&
posq
=
data
.
posq
;
AlignedArray
<
float
>&
posq
=
data
.
posq
;
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
RealVec
boxSize
=
extractBoxSize
(
context
);
float
float
BoxSize
[
3
]
=
{
(
float
)
boxSize
[
0
],
(
float
)
boxSize
[
1
],
(
float
)
boxSize
[
2
]};
double
inv
BoxSize
[
3
]
=
{
1
/
boxSize
[
0
],
1
/
boxSize
[
1
],
1
/
boxSize
[
2
]};
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
if
(
data
.
isPeriodic
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
RealOpenMM
x
=
posData
[
i
][
j
];
double
base
=
floor
(
x
/
b
oxSize
[
j
])
*
boxSize
[
j
];
double
base
=
floor
(
x
*
invB
oxSize
[
j
])
*
boxSize
[
j
];
posq
[
4
*
i
+
j
]
=
(
float
)
(
x
-
base
);
}
else
...
...
@@ -255,12 +255,12 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
}
}
}
vector
<
float
>&
posq
=
data
.
posq
;
AlignedArray
<
float
>&
posq
=
data
.
posq
;
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
RealVec
boxSize
=
extractBoxSize
(
context
);
float
floatBoxSize
[
3
]
=
{(
float
)
boxSize
[
0
],
(
float
)
boxSize
[
1
],
(
float
)
boxSize
[
2
]};
double
energy
=
ewaldSelfEnergy
;
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
)
;
bool
ewald
=
(
nonbondedMethod
==
Ewald
);
bool
pme
=
(
nonbondedMethod
==
PME
);
if
(
nonbondedMethod
!=
NoCutoff
)
{
...
...
@@ -330,7 +330,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
PmeIO
io
(
&
posq
[
0
],
&
data
.
threadForce
[
0
][
0
],
numParticles
);
Vec3
periodicBoxSize
(
boxSize
[
0
],
boxSize
[
1
],
boxSize
[
2
]);
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
beginComputation
(
io
,
periodicBoxSize
,
includeEnergy
);
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
nonbondedEnergy
+=
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
}
else
nonbonded
.
calculateReciprocalIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
forceData
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
...
...
platforms/cpu/src/CpuNeighborList.cpp
View file @
588a4c9d
...
...
@@ -48,6 +48,8 @@ const int CpuNeighborList::BlockSize = 4;
class
VoxelIndex
{
public:
VoxelIndex
()
:
x
(
0
),
y
(
0
)
{
}
VoxelIndex
(
int
x
,
int
y
)
:
x
(
x
),
y
(
y
)
{
}
int
x
;
...
...
@@ -173,6 +175,9 @@ public:
float
centerPos
[
4
];
blockCenter
.
store
(
centerPos
);
VoxelIndex
centerVoxelIndex
=
getVoxelIndex
(
centerPos
);
VoxelIndex
atomVoxelIndex
[
BlockSize
];
for
(
int
i
=
0
;
i
<
(
int
)
blockAtoms
.
size
();
i
++
)
atomVoxelIndex
[
i
]
=
getVoxelIndex
(
&
atomLocations
[
4
*
blockAtoms
[
i
]]);
int
startx
=
centerVoxelIndex
.
x
-
dIndexX
;
int
starty
=
centerVoxelIndex
.
y
-
dIndexY
;
int
endx
=
centerVoxelIndex
.
x
+
dIndexX
;
...
...
@@ -194,39 +199,59 @@ public:
voxelIndex
.
x
=
x
;
if
(
usePeriodic
)
voxelIndex
.
x
=
(
x
<
0
?
x
+
nx
:
(
x
>=
nx
?
x
-
nx
:
x
));
float
dx
=
max
(
0.0
f
,
voxelSizeX
*
max
(
0
,
abs
(
centerVoxelIndex
.
x
-
x
)
-
1
)
-
blockWidth
[
0
]);
for
(
int
y
=
starty
;
y
<=
endy
;
++
y
)
{
voxelIndex
.
y
=
y
;
if
(
usePeriodic
)
voxelIndex
.
y
=
(
y
<
0
?
y
+
ny
:
(
y
>=
ny
?
y
-
ny
:
y
));
float
dy
=
max
(
0.0
f
,
voxelSizeY
*
max
(
0
,
abs
(
centerVoxelIndex
.
y
-
y
)
-
1
)
-
blockWidth
[
1
]);
// Identify the range of atoms within this bin we need to search. When using periodic boundary
// conditions, there may be two separate ranges.
float
dz
=
maxDistance
+
blockWidth
[
2
];
dz
=
sqrtf
(
max
(
0.0
f
,
dz
*
dz
-
dx
*
dx
-
dy
*
dy
));
float
minz
=
centerPos
[
2
];
float
maxz
=
centerPos
[
2
];
fvec4
offset
(
voxelSizeX
*
x
+
(
usePeriodic
?
0.0
f
:
minx
),
voxelSizeY
*
y
+
(
usePeriodic
?
0.0
f
:
miny
),
0
,
0
);
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
++
)
{
const
float
*
atomPos
=
&
atomLocations
[
4
*
blockAtoms
[
k
]];
fvec4
posVec
(
atomPos
);
fvec4
delta1
=
offset
-
posVec
;
fvec4
delta2
=
delta1
+
fvec4
(
voxelSizeX
,
voxelSizeY
,
0
,
0
);
if
(
usePeriodic
)
{
delta1
-=
round
(
delta1
*
invBoxSize
)
*
boxSize
;
delta2
-=
round
(
delta2
*
invBoxSize
)
*
boxSize
;
}
fvec4
delta
=
min
(
abs
(
delta1
),
abs
(
delta2
));
float
dx
=
(
x
==
atomVoxelIndex
[
k
].
x
?
0.0
f
:
delta
[
0
]);
float
dy
=
(
y
==
atomVoxelIndex
[
k
].
y
?
0.0
f
:
delta
[
1
]);
float
dist2
=
maxDistanceSquared
-
dx
*
dx
-
dy
*
dy
;
if
(
dist2
>
0
)
{
float
dist
=
sqrtf
(
dist2
);
minz
=
min
(
minz
,
atomPos
[
2
]
-
dist
);
maxz
=
max
(
maxz
,
atomPos
[
2
]
+
dist
);
}
}
if
(
minz
==
maxz
)
continue
;
bool
needPeriodic
=
(
centerPos
[
0
]
-
blockWidth
[
0
]
<
maxDistance
||
centerPos
[
0
]
+
blockWidth
[
0
]
>
periodicBoxSize
[
0
]
-
maxDistance
||
centerPos
[
1
]
-
blockWidth
[
1
]
<
maxDistance
||
centerPos
[
1
]
+
blockWidth
[
1
]
>
periodicBoxSize
[
1
]
-
maxDistance
||
centerPos
[
2
]
-
dz
<
0.0
f
||
centerPos
[
2
]
+
d
z
>
periodicBoxSize
[
2
]);
minz
<
0.0
f
||
max
z
>
periodicBoxSize
[
2
]);
int
rangeStart
[
2
];
int
rangeEnd
[
2
];
rangeStart
[
0
]
=
findLowerBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
centerPos
[
2
]
-
d
z
);
rangeStart
[
0
]
=
findLowerBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
min
z
);
if
(
needPeriodic
)
{
numRanges
=
2
;
rangeEnd
[
0
]
=
findUpperBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
centerPos
[
2
]
+
d
z
);
rangeEnd
[
0
]
=
findUpperBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
max
z
);
if
(
rangeStart
[
0
]
>
0
)
{
rangeStart
[
1
]
=
0
;
rangeEnd
[
1
]
=
min
(
findUpperBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
centerPos
[
2
]
+
d
z
-
periodicBoxSize
[
2
]),
rangeStart
[
0
]);
rangeEnd
[
1
]
=
min
(
findUpperBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
max
z
-
periodicBoxSize
[
2
]),
rangeStart
[
0
]);
}
else
{
rangeStart
[
1
]
=
max
(
findLowerBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
centerPos
[
2
]
-
d
z
+
periodicBoxSize
[
2
]),
rangeEnd
[
0
]);
rangeStart
[
1
]
=
max
(
findLowerBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
min
z
+
periodicBoxSize
[
2
]),
rangeEnd
[
0
]);
rangeEnd
[
1
]
=
bins
[
voxelIndex
.
x
][
voxelIndex
.
y
].
size
();
}
}
else
{
numRanges
=
1
;
rangeEnd
[
0
]
=
findUpperBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
centerPos
[
2
]
+
d
z
);
rangeEnd
[
0
]
=
findUpperBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
max
z
);
}
// Loop over atoms and check to see if they are neighbors of this block.
...
...
@@ -307,7 +332,7 @@ public:
CpuNeighborList
::
CpuNeighborList
()
{
}
void
CpuNeighborList
::
computeNeighborList
(
int
numAtoms
,
const
vector
<
float
>&
atomLocations
,
const
vector
<
set
<
int
>
>&
exclusions
,
void
CpuNeighborList
::
computeNeighborList
(
int
numAtoms
,
const
AlignedArray
<
float
>&
atomLocations
,
const
vector
<
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
)
{
int
numBlocks
=
(
numAtoms
+
BlockSize
-
1
)
/
BlockSize
;
blockNeighbors
.
resize
(
numBlocks
);
...
...
@@ -353,8 +378,8 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato
if
(
!
usePeriodic
)
edgeSizeX
=
edgeSizeY
=
maxDistance
;
// TODO - adjust this as needed
else
{
edgeSizeX
=
0.
5
f
*
periodicBoxSize
[
0
]
/
floorf
(
periodicBoxSize
[
0
]
/
maxDistance
);
edgeSizeY
=
0.
5
f
*
periodicBoxSize
[
1
]
/
floorf
(
periodicBoxSize
[
1
]
/
maxDistance
);
edgeSizeX
=
0.
6
f
*
periodicBoxSize
[
0
]
/
floorf
(
periodicBoxSize
[
0
]
/
maxDistance
);
edgeSizeY
=
0.
6
f
*
periodicBoxSize
[
1
]
/
floorf
(
periodicBoxSize
[
1
]
/
maxDistance
);
}
Voxels
voxels
(
edgeSizeX
,
edgeSizeY
,
minx
,
maxx
,
miny
,
maxy
,
periodicBoxSize
,
usePeriodic
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
588a4c9d
...
...
@@ -29,8 +29,8 @@
#include "CpuNonbondedForce.h"
#include "ReferenceForce.h"
#include "ReferencePME.h"
#include "openmm/internal/SplineFitter.h"
#include "openmm/internal/vectorize.h"
#include "gmx_atomic.h"
// In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined.
...
...
@@ -40,7 +40,7 @@ using namespace std;
using
namespace
OpenMM
;
const
float
CpuNonbondedForce
::
TWO_OVER_SQRT_PI
=
(
float
)
(
2
/
sqrt
(
PI_M
));
const
int
CpuNonbondedForce
::
NUM_TABLE_POINTS
=
1025
;
const
int
CpuNonbondedForce
::
NUM_TABLE_POINTS
=
2048
;
class
CpuNonbondedForce
::
ComputeDirectTask
:
public
ThreadPool
::
Task
{
public:
...
...
@@ -58,21 +58,22 @@ public:
--------------------------------------------------------------------------------------- */
CpuNonbondedForce
::
CpuNonbondedForce
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
)
{
CpuNonbondedForce
::
CpuNonbondedForce
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
)
,
tableIsValid
(
false
)
{
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
Set the force to use a cutoff.
@param distance the cutoff distance
@param neighbors the neighbor list to use
@param solventDielectric the dielectric constant of the bulk solvent
@param distance the cutoff distance
@param neighbors the neighbor list to use
@param solventDielectric the dielectric constant of the bulk solvent
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForce
::
setUseCutoff
(
float
distance
,
const
CpuNeighborList
&
neighbors
,
float
solventDielectric
)
{
void
CpuNonbondedForce
::
setUseCutoff
(
float
distance
,
const
CpuNeighborList
&
neighbors
,
float
solventDielectric
)
{
if
(
distance
!=
cutoffDistance
)
tableIsValid
=
false
;
cutoff
=
true
;
cutoffDistance
=
distance
;
neighborList
=
&
neighbors
;
...
...
@@ -127,6 +128,8 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForce
::
setUseEwald
(
float
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
)
{
if
(
alpha
!=
alphaEwald
)
tableIsValid
=
false
;
alphaEwald
=
alpha
;
numRx
=
kmaxx
;
numRy
=
kmaxy
;
...
...
@@ -145,6 +148,8 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForce
::
setUsePME
(
float
alpha
,
int
meshSize
[
3
])
{
if
(
alpha
!=
alphaEwald
)
tableIsValid
=
false
;
alphaEwald
=
alpha
;
meshDim
[
0
]
=
meshSize
[
0
];
meshDim
[
1
]
=
meshSize
[
1
];
...
...
@@ -155,24 +160,16 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
void
CpuNonbondedForce
::
tabulateEwaldScaleFactor
()
{
ewaldDX
=
cutoffDistance
/
(
NUM_TABLE_POINTS
-
2
);
if
(
tableIsValid
)
return
;
tableIsValid
=
true
;
ewaldDX
=
cutoffDistance
/
NUM_TABLE_POINTS
;
ewaldDXInv
=
1.0
f
/
ewaldDX
;
vector
<
double
>
x
(
NUM_TABLE_POINTS
+
1
);
vector
<
double
>
y
(
NUM_TABLE_POINTS
+
1
);
vector
<
double
>
deriv
;
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
1
;
i
++
)
{
double
r
=
i
*
cutoffDistance
/
(
NUM_TABLE_POINTS
-
2
);
ewaldScaleTable
.
resize
(
NUM_TABLE_POINTS
+
4
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
4
;
i
++
)
{
double
r
=
i
*
ewaldDX
;
double
alphaR
=
alphaEwald
*
r
;
x
[
i
]
=
r
;
y
[
i
]
=
erfc
(
alphaR
)
+
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
);
}
SplineFitter
::
createNaturalSpline
(
x
,
y
,
deriv
);
ewaldScaleTable
.
resize
(
4
*
NUM_TABLE_POINTS
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
;
i
++
)
{
ewaldScaleTable
[
4
*
i
]
=
(
float
)
y
[
i
];
ewaldScaleTable
[
4
*
i
+
1
]
=
(
float
)
y
[
i
+
1
];
ewaldScaleTable
[
4
*
i
+
2
]
=
(
float
)
(
deriv
[
i
]
*
ewaldDX
*
ewaldDX
/
6
);
ewaldScaleTable
[
4
*
i
+
3
]
=
(
float
)
(
deriv
[
i
+
1
]
*
ewaldDX
*
ewaldDX
/
6
);
ewaldScaleTable
[
i
]
=
erfc
(
alphaR
)
+
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
);
}
}
...
...
@@ -291,7 +288,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
void
CpuNonbondedForce
::
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
vector
<
RealVec
>&
atomCoordinates
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
const
vector
<
set
<
int
>
>&
exclusions
,
vector
<
vector
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
)
{
const
vector
<
set
<
int
>
>&
exclusions
,
vector
<
AlignedArray
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
)
{
// Record the parameters for the threads.
this
->
numberOfAtoms
=
numberOfAtoms
;
...
...
@@ -302,6 +299,9 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
this
->
threadForce
=
&
threadForce
;
includeEnergy
=
(
totalEnergy
!=
NULL
);
threadEnergy
.
resize
(
threads
.
getNumThreads
());
gmx_atomic_t
counter
;
gmx_atomic_set
(
&
counter
,
0
);
this
->
atomicCounter
=
&
counter
;
// Signal the threads to start running and wait for them to finish.
...
...
@@ -332,8 +332,12 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
if
(
ewald
||
pme
)
{
// Compute the interactions from the neighbor list.
for
(
int
i
=
threadIndex
;
i
<
neighborList
->
getNumBlocks
();
i
+=
numThreads
)
calculateBlockEwaldIxn
(
i
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
while
(
true
)
{
int
nextBlock
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
nextBlock
>=
neighborList
->
getNumBlocks
())
break
;
calculateBlockEwaldIxn
(
nextBlock
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
}
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
...
...
@@ -367,13 +371,20 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
else
if
(
cutoff
)
{
// Compute the interactions from the neighbor list.
for
(
int
i
=
threadIndex
;
i
<
neighborList
->
getNumBlocks
();
i
+=
numThreads
)
calculateBlockIxn
(
i
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
while
(
true
)
{
int
nextBlock
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
nextBlock
>=
neighborList
->
getNumBlocks
())
break
;
calculateBlockIxn
(
nextBlock
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
}
}
else
{
// Loop over all atom pairs
for
(
int
i
=
threadIndex
;
i
<
numberOfAtoms
;
i
+=
numThreads
){
while
(
true
)
{
int
i
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
i
>=
numberOfAtoms
)
break
;
for
(
int
j
=
i
+
1
;
j
<
numberOfAtoms
;
j
++
)
if
(
exclusions
[
j
].
find
(
i
)
==
exclusions
[
j
].
end
())
calculateOneIxn
(
i
,
j
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
...
...
@@ -461,12 +472,12 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
break
;
}
}
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
bool
include
[
4
];
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
...
...
@@ -475,43 +486,50 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
// Compute the distances to the block atoms.
bool
any
=
false
;
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
(
!
cutoff
||
r2
[
j
]
<
cutoffDistance
*
cutoffDistance
));
any
|=
include
[
j
];
}
if
(
!
any
)
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
switchValue
(
1.0
f
),
switchDeriv
(
0.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
eps
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
;
fvec4
dEdR
=
switchValue
*
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
fvec4
energy
=
eps
*
(
sig6
-
1.0
f
)
*
sig6
;
if
(
useSwitch
)
{
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
}
// Accumulate energies.
...
...
@@ -520,27 +538,25 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
if
(
include
[
j
])
*
totalEnergy
+=
energy
[
j
];
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
1.0
f
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
}
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
}
atomForce
.
store
(
forces
+
4
*
atom
);
}
// Record the forces on the block atoms.
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
...
...
@@ -569,12 +585,12 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
needPeriodic
=
true
;
break
;
}
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
bool
include
[
4
];
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
...
...
@@ -583,60 +599,65 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
// Compute the distances to the block atoms.
bool
any
=
false
;
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
r2
[
j
]
<
cutoffDistance
*
cutoffDistance
);
any
|=
include
[
j
];
}
if
(
!
any
)
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
switchValue
(
1.0
f
),
switchDeriv
(
0.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
fvec4
dEdR
=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
eps
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
;
dEdR
+=
switchValue
*
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
dEdR
*=
inverseR
*
inverseR
;
fvec4
energy
=
eps
*
(
sig6
-
1.0
f
)
*
sig6
;
if
(
useSwitch
)
{
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
if
(
include
[
j
])
*
totalEnergy
+=
energy
[
j
];
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
1.0
f
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
}
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
}
atomForce
.
store
(
forces
+
4
*
atom
);
}
...
...
@@ -683,18 +704,14 @@ fvec4 CpuNonbondedForce::erfcApprox(fvec4 x) {
fvec4
CpuNonbondedForce
::
ewaldScaleFunction
(
fvec4
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
float
y
[
4
];
fvec4
x1
=
x
*
ewaldDXInv
;
ivec4
index
=
floor
(
x1
);
fvec4
coeff
[
4
];
coeff
[
1
]
=
x1
-
index
;
coeff
[
0
]
=
1.0
f
-
coeff
[
1
];
coeff
[
2
]
=
coeff
[
0
]
*
coeff
[
0
]
*
coeff
[
0
]
-
coeff
[
0
];
coeff
[
3
]
=
coeff
[
1
]
*
coeff
[
1
]
*
coeff
[
1
]
-
coeff
[
1
];
transpose
(
coeff
[
0
],
coeff
[
1
],
coeff
[
2
],
coeff
[
3
]);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
if
(
index
[
i
]
<
NUM_TABLE_POINTS
)
y
[
i
]
=
dot4
(
coeff
[
i
],
fvec4
(
&
ewaldScaleTable
[
4
*
index
[
i
]]));
}
return
fvec4
(
y
);
ivec4
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
ewaldScaleTable
[
index
[
0
]]);
fvec4
t2
(
&
ewaldScaleTable
[
index
[
1
]]);
fvec4
t3
(
&
ewaldScaleTable
[
index
[
2
]]);
fvec4
t4
(
&
ewaldScaleTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
platforms/cpu/src/CpuPlatform.cpp
View file @
588a4c9d
...
...
@@ -32,6 +32,8 @@
#include "CpuPlatform.h"
#include "CpuKernelFactory.h"
#include "CpuKernels.h"
#include "CpuSETTLE.h"
#include "ReferenceConstraints.h"
#include "openmm/internal/hardware.h"
using
namespace
OpenMM
;
...
...
@@ -77,6 +79,12 @@ void CpuPlatform::contextCreated(ContextImpl& context, const map<string, string>
ReferencePlatform
::
contextCreated
(
context
,
properties
);
PlatformData
*
data
=
new
PlatformData
(
context
.
getSystem
().
getNumParticles
());
contextData
[
&
context
]
=
data
;
ReferenceConstraints
&
constraints
=
*
(
ReferenceConstraints
*
)
reinterpret_cast
<
ReferencePlatform
::
PlatformData
*>
(
context
.
getPlatformData
())
->
constraints
;
if
(
constraints
.
settle
!=
NULL
)
{
CpuSETTLE
*
parallelSettle
=
new
CpuSETTLE
(
context
.
getSystem
(),
*
(
ReferenceSETTLEAlgorithm
*
)
constraints
.
settle
,
data
->
threads
);
delete
constraints
.
settle
;
constraints
.
settle
=
parallelSettle
;
}
}
void
CpuPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
...
@@ -89,8 +97,7 @@ CpuPlatform::PlatformData& CpuPlatform::getPlatformData(ContextImpl& context) {
return
*
contextData
[
&
context
];
}
CpuPlatform
::
PlatformData
::
PlatformData
(
int
numParticles
)
{
posq
.
resize
(
4
*
numParticles
);
CpuPlatform
::
PlatformData
::
PlatformData
(
int
numParticles
)
:
posq
(
4
*
numParticles
)
{
int
numThreads
=
threads
.
getNumThreads
();
threadForce
.
resize
(
numThreads
);
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
...
...
platforms/cpu/src/CpuSETTLE.cpp
0 → 100644
View file @
588a4c9d
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CpuSETTLE.h"
using
namespace
OpenMM
;
using
namespace
std
;
class
CpuSETTLE
::
ApplyToPositionsTask
:
public
ThreadPool
::
Task
{
public:
ApplyToPositionsTask
(
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
vector
<
OpenMM
::
RealVec
>&
atomCoordinatesP
,
vector
<
RealOpenMM
>&
inverseMasses
,
RealOpenMM
tolerance
,
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
)
:
atomCoordinates
(
atomCoordinates
),
atomCoordinatesP
(
atomCoordinatesP
),
inverseMasses
(
inverseMasses
),
tolerance
(
tolerance
),
threadSettle
(
threadSettle
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
threadSettle
[
threadIndex
]
->
apply
(
atomCoordinates
,
atomCoordinatesP
,
inverseMasses
,
tolerance
);
}
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
;
vector
<
OpenMM
::
RealVec
>&
atomCoordinatesP
;
vector
<
RealOpenMM
>&
inverseMasses
;
RealOpenMM
tolerance
;
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
;
};
class
CpuSETTLE
::
ApplyToVelocitiesTask
:
public
ThreadPool
::
Task
{
public:
ApplyToVelocitiesTask
(
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
vector
<
OpenMM
::
RealVec
>&
velocities
,
vector
<
RealOpenMM
>&
inverseMasses
,
RealOpenMM
tolerance
,
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
)
:
atomCoordinates
(
atomCoordinates
),
velocities
(
velocities
),
inverseMasses
(
inverseMasses
),
tolerance
(
tolerance
),
threadSettle
(
threadSettle
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
threadSettle
[
threadIndex
]
->
applyToVelocities
(
atomCoordinates
,
velocities
,
inverseMasses
,
tolerance
);
}
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
;
vector
<
OpenMM
::
RealVec
>&
velocities
;
vector
<
RealOpenMM
>&
inverseMasses
;
RealOpenMM
tolerance
;
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
;
};
CpuSETTLE
::
CpuSETTLE
(
const
System
&
system
,
const
ReferenceSETTLEAlgorithm
&
settle
,
ThreadPool
&
threads
)
:
threads
(
threads
)
{
int
numThreads
=
threads
.
getNumThreads
();
int
numClusters
=
settle
.
getNumClusters
();
vector
<
RealOpenMM
>
mass
(
system
.
getNumParticles
());
for
(
int
i
=
0
;
i
<
system
.
getNumParticles
();
i
++
)
mass
[
i
]
=
system
.
getParticleMass
(
i
);
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
{
int
start
=
i
*
numClusters
/
numThreads
;
int
end
=
(
i
+
1
)
*
numClusters
/
numThreads
;
if
(
start
!=
end
)
{
int
numThreadClusters
=
end
-
start
;
vector
<
int
>
atom1
(
numThreadClusters
),
atom2
(
numThreadClusters
),
atom3
(
numThreadClusters
);
vector
<
RealOpenMM
>
distance1
(
numThreadClusters
),
distance2
(
numThreadClusters
);
for
(
int
j
=
0
;
j
<
numThreadClusters
;
j
++
)
settle
.
getClusterParameters
(
start
+
j
,
atom1
[
j
],
atom2
[
j
],
atom3
[
j
],
distance1
[
j
],
distance2
[
j
]);
threadSettle
.
push_back
(
new
ReferenceSETTLEAlgorithm
(
atom1
,
atom2
,
atom3
,
distance1
,
distance2
,
mass
));
}
}
}
CpuSETTLE
::~
CpuSETTLE
()
{
for
(
int
i
=
0
;
i
<
(
int
)
threadSettle
.
size
();
i
++
)
delete
threadSettle
[
i
];
}
void
CpuSETTLE
::
apply
(
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
vector
<
OpenMM
::
RealVec
>&
atomCoordinatesP
,
vector
<
RealOpenMM
>&
inverseMasses
,
RealOpenMM
tolerance
)
{
ApplyToPositionsTask
task
(
atomCoordinates
,
atomCoordinatesP
,
inverseMasses
,
tolerance
,
threadSettle
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
}
void
CpuSETTLE
::
applyToVelocities
(
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
vector
<
OpenMM
::
RealVec
>&
velocities
,
vector
<
RealOpenMM
>&
inverseMasses
,
RealOpenMM
tolerance
)
{
ApplyToVelocitiesTask
task
(
atomCoordinates
,
velocities
,
inverseMasses
,
tolerance
,
threadSettle
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
}
platforms/cpu/src/gmx_atomic.h
0 → 100644
View file @
588a4c9d
This diff is collapsed.
Click to expand it.
platforms/cpu/tests/CMakeLists.txt
View file @
588a4c9d
...
...
@@ -11,7 +11,6 @@ IF( INCLUDE_SERIALIZATION )
INCLUDE_DIRECTORIES
(
${
OPENMM_DIR
}
/serialization/include
)
SET
(
SHARED_OPENMM_SERIALIZATION
"OpenMMSerialization"
)
ENDIF
(
INCLUDE_SERIALIZATION
)
GET_PROPERTY
(
COMPILE_FLAGS GLOBAL PROPERTY COMPILE_FLAGS
)
# Automatically create tests using files named "Test*.cpp"
FILE
(
GLOB TEST_PROGS
"*Test*.cpp"
)
...
...
@@ -21,7 +20,7 @@ FOREACH(TEST_PROG ${TEST_PROGS})
# Link with shared library
ADD_EXECUTABLE
(
${
TEST_ROOT
}
${
TEST_PROG
}
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
SHARED_TARGET
}
)
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES
COMPILE
_FLAGS
"
${
COMPILE_FLAGS
}
"
LINK
_FLAGS
"
${
COMPILE_FLAGS
}
"
)
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES
LINK
_FLAGS
"
${
EXTRA_
COMPILE_FLAGS
}
"
COMPILE
_FLAGS
"
${
EXTRA_
COMPILE_FLAGS
}
"
)
ADD_TEST
(
${
TEST_ROOT
}
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
single
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
platforms/cpu/tests/TestCpuNeighborList.cpp
View file @
588a4c9d
...
...
@@ -35,6 +35,7 @@
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/internal/ThreadPool.h"
#include "AlignedArray.h"
#include "CpuNeighborList.h"
#include "CpuPlatform.h"
#include "sfmt/SFMT.h"
...
...
@@ -52,7 +53,7 @@ void testNeighborList(bool periodic) {
const
float
boxSize
[
3
]
=
{
20.0
f
,
15.0
f
,
22.0
f
};
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
float
>
positions
(
4
*
numParticles
);
AlignedArray
<
float
>
positions
(
4
*
numParticles
);
for
(
int
i
=
0
;
i
<
4
*
numParticles
;
i
++
)
if
(
i
%
4
<
3
)
positions
[
i
]
=
boxSize
[
i
%
4
]
*
genrand_real2
(
sfmt
);
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment