Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
3862202e
Commit
3862202e
authored
Jul 12, 2013
by
Justin MacCallum
Browse files
Merge branch 'upstream' into fork
parents
e1a4e015
73882ac5
Changes
249
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1044 additions
and
15 deletions
+1044
-15
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+11
-0
plugins/amoeba/platforms/reference/src/AmoebaReferenceKernels.h
...s/amoeba/platforms/reference/src/AmoebaReferenceKernels.h
+2
-2
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceAngleForce.h
.../reference/src/SimTKReference/AmoebaReferenceAngleForce.h
+1
-1
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceBondForce.h
...s/reference/src/SimTKReference/AmoebaReferenceBondForce.h
+1
-1
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceForce.h
...forms/reference/src/SimTKReference/AmoebaReferenceForce.h
+1
-1
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceGeneralizedKirkwoodForce.h
.../SimTKReference/AmoebaReferenceGeneralizedKirkwoodForce.h
+1
-1
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceInPlaneAngleForce.h
...nce/src/SimTKReference/AmoebaReferenceInPlaneAngleForce.h
+1
-1
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.h
...erence/src/SimTKReference/AmoebaReferenceMultipoleForce.h
+1
-1
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceOutOfPlaneBendForce.h
...e/src/SimTKReference/AmoebaReferenceOutOfPlaneBendForce.h
+1
-1
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferencePiTorsionForce.h
...erence/src/SimTKReference/AmoebaReferencePiTorsionForce.h
+1
-1
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceStretchBendForce.h
...ence/src/SimTKReference/AmoebaReferenceStretchBendForce.h
+1
-1
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceTorsionTorsionForce.h
...e/src/SimTKReference/AmoebaReferenceTorsionTorsionForce.h
+1
-1
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceVdwForce.h
...ms/reference/src/SimTKReference/AmoebaReferenceVdwForce.h
+2
-2
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceWcaDispersionForce.h
...ce/src/SimTKReference/AmoebaReferenceWcaDispersionForce.h
+1
-1
plugins/cpupme/CMakeLists.txt
plugins/cpupme/CMakeLists.txt
+93
-0
plugins/cpupme/include/internal/windowsExportPme.h
plugins/cpupme/include/internal/windowsExportPme.h
+41
-0
plugins/cpupme/src/CpuPmeKernelFactory.cpp
plugins/cpupme/src/CpuPmeKernelFactory.cpp
+54
-0
plugins/cpupme/src/CpuPmeKernelFactory.h
plugins/cpupme/src/CpuPmeKernelFactory.h
+50
-0
plugins/cpupme/src/CpuPmeKernels.cpp
plugins/cpupme/src/CpuPmeKernels.cpp
+649
-0
plugins/cpupme/src/CpuPmeKernels.h
plugins/cpupme/src/CpuPmeKernels.h
+131
-0
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
3862202e
...
...
@@ -778,6 +778,17 @@ public:
}
return
true
;
}
int
getNumParticleGroups
()
{
return
7
*
force
.
getNumMultipoles
();
}
void
getParticlesInGroup
(
int
index
,
vector
<
int
>&
particles
)
{
int
particle
=
index
/
7
;
int
type
=
index
-
7
*
particle
;
force
.
getCovalentMap
(
particle
,
AmoebaMultipoleForce
::
CovalentType
(
type
),
particles
);
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
return
((
group1
%
7
)
==
(
group2
%
7
));
}
private:
const
AmoebaMultipoleForce
&
force
;
};
...
...
plugins/amoeba/platforms/reference/src/AmoebaReferenceKernels.h
View file @
3862202e
...
...
@@ -31,8 +31,8 @@
#include "openmm/amoebaKernels.h"
#include "openmm/AmoebaMultipoleForce.h"
#include "AmoebaReferenceMultipoleForce.h"
#include "
SimTKReference/
ReferenceNeighborList.h"
#include "
SimTKUtilities/
SimTKOpenMMRealType.h"
#include "ReferenceNeighborList.h"
#include "SimTKOpenMMRealType.h"
namespace
OpenMM
{
...
...
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceAngleForce.h
View file @
3862202e
...
...
@@ -25,7 +25,7 @@
#ifndef __AmoebaReferenceAngleForce_H__
#define __AmoebaReferenceAngleForce_H__
#include "
SimTKUtilities/
RealVec.h"
#include "RealVec.h"
#include <vector>
// ---------------------------------------------------------------------------------------
...
...
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceBondForce.h
View file @
3862202e
...
...
@@ -25,7 +25,7 @@
#ifndef __AmoebaReferenceBondForce_H__
#define __AmoebaReferenceBondForce_H__
#include "
SimTKUtilities/
RealVec.h"
#include "RealVec.h"
#include <vector>
// ---------------------------------------------------------------------------------------
...
...
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceForce.h
View file @
3862202e
...
...
@@ -25,7 +25,7 @@
#ifndef __AmoebaReferenceForce_H__
#define __AmoebaReferenceForce_H__
#include "
SimTKUtilities/
RealVec.h"
#include "RealVec.h"
#include "openmm/Vec3.h"
#include <vector>
...
...
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceGeneralizedKirkwoodForce.h
View file @
3862202e
...
...
@@ -25,7 +25,7 @@
#ifndef __AmoebaReferenceGeneralizedKirkwoodForce_H__
#define __AmoebaReferenceGeneralizedKirkwoodForce_H__
#include "
SimTKUtilities/
RealVec.h"
#include "RealVec.h"
#include <vector>
using
namespace
OpenMM
;
...
...
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceInPlaneAngleForce.h
View file @
3862202e
...
...
@@ -25,7 +25,7 @@
#ifndef __AmoebaReferenceInPlaneAngleForce_H__
#define __AmoebaReferenceInPlaneAngleForce_H__
#include "
SimTKUtilities/
RealVec.h"
#include "RealVec.h"
#include <vector>
// ---------------------------------------------------------------------------------------
...
...
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.h
View file @
3862202e
...
...
@@ -24,7 +24,7 @@
#ifndef __AmoebaReferenceMultipoleForce_H__
#define __AmoebaReferenceMultipoleForce_H__
#include "
SimTKUtilities/
RealVec.h"
#include "RealVec.h"
#include "openmm/AmoebaMultipoleForce.h"
#include "AmoebaReferenceGeneralizedKirkwoodForce.h"
#include <map>
...
...
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceOutOfPlaneBendForce.h
View file @
3862202e
...
...
@@ -25,7 +25,7 @@
#ifndef __AmoebaReferenceOutOfPlaneBendForce_H__
#define __AmoebaReferenceOutOfPlaneBendForce_H__
#include "
SimTKUtilities/
RealVec.h"
#include "RealVec.h"
#include <vector>
// ---------------------------------------------------------------------------------------
...
...
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferencePiTorsionForce.h
View file @
3862202e
...
...
@@ -25,7 +25,7 @@
#ifndef __AmoebaReferencePiTorsionForce_H__
#define __AmoebaReferencePiTorsionForce_H__
#include "
SimTKUtilities/
RealVec.h"
#include "RealVec.h"
#include <vector>
// ---------------------------------------------------------------------------------------
...
...
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceStretchBendForce.h
View file @
3862202e
...
...
@@ -25,7 +25,7 @@
#ifndef __AmoebaReferenceStretchBendForce_H__
#define __AmoebaReferenceStretchBendForce_H__
#include "
SimTKUtilities/
RealVec.h"
#include "RealVec.h"
#include <vector>
// ---------------------------------------------------------------------------------------
...
...
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceTorsionTorsionForce.h
View file @
3862202e
...
...
@@ -25,7 +25,7 @@
#ifndef __AmoebaReferenceTorsionTorsionForce_H__
#define __AmoebaReferenceTorsionTorsionForce_H__
#include "
SimTKUtilities/
RealVec.h"
#include "RealVec.h"
#include <vector>
// ---------------------------------------------------------------------------------------
...
...
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceVdwForce.h
View file @
3862202e
...
...
@@ -25,9 +25,9 @@
#ifndef __AmoebaReferenceVdwForce_H__
#define __AmoebaReferenceVdwForce_H__
#include "
SimTKUtilities/
RealVec.h"
#include "RealVec.h"
#include "openmm/Vec3.h"
#include "
SimTKReference/
ReferenceNeighborList.h"
#include "ReferenceNeighborList.h"
#include <string>
#include <vector>
...
...
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceWcaDispersionForce.h
View file @
3862202e
...
...
@@ -25,7 +25,7 @@
#ifndef __AmoebaReferenceWcaDispersionForce_H__
#define __AmoebaReferenceWcaDispersionForce_H__
#include "
SimTKUtilities/
RealVec.h"
#include "RealVec.h"
#include "openmm/Vec3.h"
#include <string>
#include <vector>
...
...
plugins/cpupme/CMakeLists.txt
0 → 100644
View file @
3862202e
#---------------------------------------------------
# OpenMM CPU PME Plugin
#
# Creates plugin library, base name=OpenMMPME.
# Default libraries are shared & optimized.
#
# Windows:
# OpenMMPME[_d].dll
# OpenMMPME[_d].lib
# Unix:
# libOpenMMPME[_d].so
#----------------------------------------------------
IF
(
APPLE
)
SET
(
CMAKE_OSX_DEPLOYMENT_TARGET
"10.6"
)
ENDIF
(
APPLE
)
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET
(
OPENMM_SOURCE_SUBDIRS .
)
# Collect up information about the version of the OpenMM library we're building
# and make it available to the code so it can be built into the binaries.
SET
(
OPENMMPME_LIBRARY_NAME OpenMMPME
)
SET
(
SHARED_TARGET
${
OPENMMPME_LIBRARY_NAME
}
)
# Ensure that debug libraries have "_d" appended to their names.
# CMake gets this right on Windows automatically with this definition.
IF
(
${
CMAKE_GENERATOR
}
MATCHES
"Visual Studio"
)
SET
(
CMAKE_DEBUG_POSTFIX
"_d"
CACHE INTERNAL
""
FORCE
)
ENDIF
(
${
CMAKE_GENERATOR
}
MATCHES
"Visual Studio"
)
# But on Unix or Cygwin we have to add the suffix manually
IF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
SET
(
SHARED_TARGET
${
SHARED_TARGET
}
_d
)
SET
(
STATIC_TARGET
${
STATIC_TARGET
}
_d
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
# These are all the places to search for header files which are
# to be part of the API.
SET
(
API_INCLUDE_DIRS
)
# start empty
FOREACH
(
subdir
${
OPENMM_SOURCE_SUBDIRS
}
)
# append
SET
(
API_INCLUDE_DIRS
${
API_INCLUDE_DIRS
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/include
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/include/internal
)
ENDFOREACH
(
subdir
)
# Find the include files.
SET
(
API_INCLUDE_FILES
)
FOREACH
(
dir
${
API_INCLUDE_DIRS
}
)
FILE
(
GLOB fullpaths
${
dir
}
/*.h
)
# returns full pathnames
SET
(
API_INCLUDE_FILES
${
API_INCLUDE_FILES
}
${
fullpaths
}
)
ENDFOREACH
(
dir
)
# collect up source files
SET
(
SOURCE_FILES
)
# empty
SET
(
SOURCE_INCLUDE_FILES
)
FOREACH
(
subdir
${
OPENMM_SOURCE_SUBDIRS
}
)
FILE
(
GLOB_RECURSE src_files
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/src/*.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/src/*.c
)
FILE
(
GLOB incl_files
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/src/*.h
)
SET
(
SOURCE_FILES
${
SOURCE_FILES
}
${
src_files
}
)
#append
SET
(
SOURCE_INCLUDE_FILES
${
SOURCE_INCLUDE_FILES
}
${
incl_files
}
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/include
)
ENDFOREACH
(
subdir
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
)
SET_SOURCE_FILES_PROPERTIES
(
${
SOURCE_FILES
}
PROPERTIES COMPILE_FLAGS
"-msse4.1"
)
# Include FFTW related files.
INCLUDE_DIRECTORIES
(
${
FFTW_INCLUDES
}
)
# Build the plugin library.
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_INCLUDE_FILES
}
)
IF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
_d
)
ELSE
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
MAIN_OPENMM_LIB
}
${
PTHREADS_LIB
}
${
FFTW_LIBRARY
}
${
FFTW_THREADS_LIBRARY
}
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES COMPILE_FLAGS
"-DOPENMM_PME_BUILDING_SHARED_LIBRARY"
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_TARGET
}
)
SUBDIRS
(
tests
)
plugins/cpupme/include/internal/windowsExportPme.h
0 → 100644
View file @
3862202e
#ifndef OPENMM_WINDOWSEXPORTPME_H_
#define OPENMM_WINDOWSEXPORTPME_H_
/*
* Shared libraries are messy in Visual Studio. We have to distinguish three
* cases:
* (1) this header is being used to build the OpenMM shared library
* (dllexport)
* (2) this header is being used by a *client* of the OpenMM shared
* library (dllimport)
* (3) we are building the OpenMM static library, or the client is
* being compiled with the expectation of linking with the
* OpenMM static library (nothing special needed)
* In the CMake script for building this library, we define one of the symbols
* OPENMM_PME_BUILDING_{SHARED|STATIC}_LIBRARY
* Client code normally has no special symbol defined, in which case we'll
* assume it wants to use the shared library. However, if the client defines
* the symbol OPENMM_USE_STATIC_LIBRARIES we'll suppress the dllimport so
* that the client code can be linked with static libraries. Note that
* the client symbol is not library dependent, while the library symbols
* affect only the OpenMM library, meaning that other libraries can
* be clients of this one. However, we are assuming all-static or all-shared.
*/
#ifdef _MSC_VER
// We don't want to hear about how sprintf is "unsafe".
#pragma warning(disable:4996)
// Keep MS VC++ quiet about lack of dll export of private members.
#pragma warning(disable:4251)
#if defined(OPENMM_PME_BUILDING_SHARED_LIBRARY)
#define OPENMM_EXPORT_PME __declspec(dllexport)
#elif defined(OPENMM_PME_BUILDING_STATIC_LIBRARY) || defined(OPENMM_PME_USE_STATIC_LIBRARIES)
#define OPENMM_EXPORT_PME
#else
#define OPENMM_EXPORT_PME __declspec(dllimport) // i.e., a client of a shared library
#endif
#else
#define OPENMM_EXPORT_PME // Linux, Mac
#endif
#endif // OPENMM_WINDOWSEXPORTPME_H_
plugins/cpupme/src/CpuPmeKernelFactory.cpp
0 → 100644
View file @
3862202e
/* -------------------------------------------------------------------------- *
* OpenMMCpuPme *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CpuPmeKernelFactory.h"
#include "CpuPmeKernels.h"
#include "internal/windowsExportPme.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
using
namespace
OpenMM
;
extern
"C"
void
registerPlatforms
()
{
}
extern
"C"
void
registerKernelFactories
()
{
if
(
CpuCalcPmeReciprocalForceKernel
::
isProcessorSupported
())
{
CpuPmeKernelFactory
*
factory
=
new
CpuPmeKernelFactory
();
for
(
int
i
=
0
;
i
<
Platform
::
getNumPlatforms
();
i
++
)
Platform
::
getPlatform
(
i
).
registerKernelFactory
(
CalcPmeReciprocalForceKernel
::
Name
(),
factory
);
}
}
extern
"C"
OPENMM_EXPORT_PME
void
registerCpuPmeKernelFactories
()
{
registerKernelFactories
();
}
KernelImpl
*
CpuPmeKernelFactory
::
createKernelImpl
(
std
::
string
name
,
const
Platform
&
platform
,
ContextImpl
&
context
)
const
{
if
(
name
==
CalcPmeReciprocalForceKernel
::
Name
())
return
new
CpuCalcPmeReciprocalForceKernel
(
name
,
platform
);
throw
OpenMMException
((
std
::
string
(
"Tried to create kernel with illegal kernel name '"
)
+
name
+
"'"
).
c_str
());
}
plugins/cpupme/src/CpuPmeKernelFactory.h
0 → 100644
View file @
3862202e
#ifndef OPENMM_CPUPMEKERNELFACTORY_H_
#define OPENMM_CPUPMEKERNELFACTORY_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/KernelFactory.h"
namespace
OpenMM
{
/**
* This KernelFactory creates kernels for the CPU implementation of PME.
*/
class
CpuPmeKernelFactory
:
public
KernelFactory
{
public:
KernelImpl
*
createKernelImpl
(
std
::
string
name
,
const
Platform
&
platform
,
ContextImpl
&
context
)
const
;
};
}
// namespace OpenMM
#endif
/*OPENMM_CPUPMEKERNELFACTORY_H_*/
plugins/cpupme/src/CpuPmeKernels.cpp
0 → 100644
View file @
3862202e
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#ifdef WIN32
#define _USE_MATH_DEFINES // Needed to get M_PI
#endif
#include "CpuPmeKernels.h"
#include "SimTKOpenMMRealType.h"
#include <cmath>
#include <cstring>
#include <smmintrin.h>
using
namespace
OpenMM
;
using
namespace
std
;
static
const
int
PME_ORDER
=
5
;
bool
CpuCalcPmeReciprocalForceKernel
::
hasInitializedThreads
=
false
;
int
CpuCalcPmeReciprocalForceKernel
::
numThreads
=
0
;
#define EXTRACT_FLOAT(v, element) _mm_cvtss_f32(_mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, element)))
// Define function to get the number of processors.
#ifdef __APPLE__
#include <sys/sysctl.h>
#include <dlfcn.h>
#else
#ifdef WIN32
#include <windows.h>
#else
#include <dlfcn.h>
#include <unistd.h>
#endif
#endif
static
int
getNumProcessors
()
{
#ifdef __APPLE__
int
ncpu
;
size_t
len
=
4
;
if
(
sysctlbyname
(
"hw.logicalcpu"
,
&
ncpu
,
&
len
,
NULL
,
0
)
==
0
)
return
ncpu
;
else
return
1
;
#else
#ifdef WIN32
SYSTEM_INFO
siSysInfo
;
int
ncpu
;
GetSystemInfo
(
&
siSysInfo
);
ncpu
=
siSysInfo
.
dwNumberOfProcessors
;
if
(
ncpu
<
1
)
ncpu
=
1
;
return
ncpu
;
#else
long
nProcessorsOnline
=
sysconf
(
_SC_NPROCESSORS_ONLN
);
if
(
nProcessorsOnline
==
-
1
)
return
1
;
else
return
(
int
)
nProcessorsOnline
;
#endif
#endif
}
// Define a function to check the CPU's capabilities.
#ifdef _WIN32
#define cpuid __cpuid
#else
static
void
cpuid
(
int
cpuInfo
[
4
],
int
infoType
){
__asm__
__volatile__
(
"cpuid"
:
"=a"
(
cpuInfo
[
0
]),
"=b"
(
cpuInfo
[
1
]),
"=c"
(
cpuInfo
[
2
]),
"=d"
(
cpuInfo
[
3
])
:
"a"
(
infoType
)
);
}
#endif
static
void
spreadCharge
(
int
start
,
int
end
,
float
*
posq
,
float
*
grid
,
int
gridx
,
int
gridy
,
int
gridz
,
int
numParticles
,
Vec3
periodicBoxSize
)
{
float
temp
[
4
];
__m128
boxSize
=
_mm_set_ps
(
0
,
(
float
)
periodicBoxSize
[
2
],
(
float
)
periodicBoxSize
[
1
],
(
float
)
periodicBoxSize
[
0
]);
__m128
invBoxSize
=
_mm_set_ps
(
0
,
(
float
)
(
1
/
periodicBoxSize
[
2
]),
(
float
)
(
1
/
periodicBoxSize
[
1
]),
(
float
)
(
1
/
periodicBoxSize
[
0
]));
__m128
gridSize
=
_mm_set_ps
(
0
,
gridz
,
gridy
,
gridx
);
__m128i
gridSizeInt
=
_mm_set_epi32
(
0
,
gridz
,
gridy
,
gridx
);
__m128
one
=
_mm_set1_ps
(
1
);
__m128
scale
=
_mm_set1_ps
(
1.0
f
/
(
PME_ORDER
-
1
));
const
float
epsilonFactor
=
sqrt
(
ONE_4PI_EPS0
);
memset
(
grid
,
0
,
sizeof
(
float
)
*
gridx
*
gridy
*
gridz
);
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
// Find the position relative to the nearest grid point.
__m128
pos
=
_mm_load_ps
(
&
posq
[
4
*
i
]);
__m128
posInBox
=
_mm_sub_ps
(
pos
,
_mm_mul_ps
(
boxSize
,
_mm_floor_ps
(
_mm_mul_ps
(
pos
,
invBoxSize
))));
__m128
t
=
_mm_mul_ps
(
_mm_mul_ps
(
posInBox
,
invBoxSize
),
gridSize
);
__m128i
ti
=
_mm_cvttps_epi32
(
t
);
__m128
dr
=
_mm_sub_ps
(
t
,
_mm_cvtepi32_ps
(
ti
));
__m128i
gridIndex
=
_mm_sub_epi32
(
ti
,
_mm_and_si128
(
gridSizeInt
,
_mm_cmpeq_epi32
(
ti
,
gridSizeInt
)));
// Compute the B-spline coefficients.
__m128
data
[
PME_ORDER
];
data
[
PME_ORDER
-
1
]
=
_mm_setzero_ps
();
data
[
1
]
=
dr
;
data
[
0
]
=
_mm_sub_ps
(
one
,
dr
);
for
(
int
j
=
3
;
j
<
PME_ORDER
;
j
++
)
{
__m128
div
=
_mm_set1_ps
(
1.0
f
/
(
j
-
1
));
data
[
j
-
1
]
=
_mm_mul_ps
(
_mm_mul_ps
(
div
,
dr
),
data
[
j
-
2
]);
for
(
int
k
=
1
;
k
<
j
-
1
;
k
++
)
data
[
j
-
k
-
1
]
=
_mm_mul_ps
(
div
,
_mm_add_ps
(
_mm_mul_ps
(
_mm_add_ps
(
dr
,
_mm_set1_ps
(
k
)),
data
[
j
-
k
-
2
]),
_mm_mul_ps
(
_mm_sub_ps
(
_mm_set1_ps
(
j
-
k
),
dr
),
data
[
j
-
k
-
1
])));
data
[
0
]
=
_mm_mul_ps
(
_mm_mul_ps
(
div
,
_mm_sub_ps
(
one
,
dr
)),
data
[
0
]);
}
data
[
PME_ORDER
-
1
]
=
_mm_mul_ps
(
_mm_mul_ps
(
scale
,
dr
),
data
[
PME_ORDER
-
2
]);
for
(
int
j
=
1
;
j
<
(
PME_ORDER
-
1
);
j
++
)
data
[
PME_ORDER
-
j
-
1
]
=
_mm_mul_ps
(
scale
,
_mm_add_ps
(
_mm_mul_ps
(
_mm_add_ps
(
dr
,
_mm_set1_ps
(
j
)),
data
[
PME_ORDER
-
j
-
2
]),
_mm_mul_ps
(
_mm_sub_ps
(
_mm_set1_ps
(
PME_ORDER
-
j
),
dr
),
data
[
PME_ORDER
-
j
-
1
])));
data
[
0
]
=
_mm_mul_ps
(
_mm_mul_ps
(
scale
,
_mm_sub_ps
(
one
,
dr
)),
data
[
0
]);
// Spread the charges.
int
gridIndexX
=
_mm_extract_epi32
(
gridIndex
,
0
);
int
gridIndexY
=
_mm_extract_epi32
(
gridIndex
,
1
);
int
gridIndexZ
=
_mm_extract_epi32
(
gridIndex
,
2
);
int
zindex
[
PME_ORDER
];
for
(
int
j
=
0
;
j
<
PME_ORDER
;
j
++
)
{
zindex
[
j
]
=
gridIndexZ
+
j
;
zindex
[
j
]
-=
(
zindex
[
j
]
>=
gridz
?
gridz
:
0
);
}
float
charge
=
epsilonFactor
*
posq
[
4
*
i
+
3
];
__m128
zdata0to3
=
_mm_set_ps
(
EXTRACT_FLOAT
(
data
[
3
],
2
),
EXTRACT_FLOAT
(
data
[
2
],
2
),
EXTRACT_FLOAT
(
data
[
1
],
2
),
EXTRACT_FLOAT
(
data
[
0
],
2
));
float
zdata4
=
EXTRACT_FLOAT
(
data
[
4
],
2
);
if
(
gridIndexZ
+
4
<
gridz
)
{
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
xbase
=
gridIndexX
+
ix
;
xbase
-=
(
xbase
>=
gridx
?
gridx
:
0
);
xbase
=
xbase
*
gridy
*
gridz
;
float
xdata
=
charge
*
EXTRACT_FLOAT
(
data
[
ix
],
0
);
for
(
int
iy
=
0
;
iy
<
PME_ORDER
;
iy
++
)
{
int
ybase
=
gridIndexY
+
iy
;
ybase
-=
(
ybase
>=
gridy
?
gridy
:
0
);
ybase
=
xbase
+
ybase
*
gridz
;
float
multiplier
=
xdata
*
EXTRACT_FLOAT
(
data
[
iy
],
1
);
__m128
add0to3
=
_mm_mul_ps
(
zdata0to3
,
_mm_set1_ps
(
multiplier
));
_mm_storeu_ps
(
&
grid
[
ybase
+
gridIndexZ
],
_mm_add_ps
(
_mm_loadu_ps
(
&
grid
[
ybase
+
gridIndexZ
]),
add0to3
));
grid
[
ybase
+
zindex
[
4
]]
+=
multiplier
*
zdata4
;
}
}
}
else
{
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
xbase
=
gridIndexX
+
ix
;
xbase
-=
(
xbase
>=
gridx
?
gridx
:
0
);
xbase
=
xbase
*
gridy
*
gridz
;
float
xdata
=
charge
*
EXTRACT_FLOAT
(
data
[
ix
],
0
);
for
(
int
iy
=
0
;
iy
<
PME_ORDER
;
iy
++
)
{
int
ybase
=
gridIndexY
+
iy
;
ybase
-=
(
ybase
>=
gridy
?
gridy
:
0
);
ybase
=
xbase
+
ybase
*
gridz
;
float
multiplier
=
xdata
*
EXTRACT_FLOAT
(
data
[
iy
],
1
);
__m128
add0to3
=
_mm_mul_ps
(
zdata0to3
,
_mm_set1_ps
(
multiplier
));
_mm_store_ps
(
temp
,
add0to3
);
grid
[
ybase
+
zindex
[
0
]]
+=
temp
[
0
];
grid
[
ybase
+
zindex
[
1
]]
+=
temp
[
1
];
grid
[
ybase
+
zindex
[
2
]]
+=
temp
[
2
];
grid
[
ybase
+
zindex
[
3
]]
+=
temp
[
3
];
grid
[
ybase
+
zindex
[
4
]]
+=
multiplier
*
zdata4
;
}
}
}
}
}
static
void
computeReciprocalEterm
(
int
start
,
int
end
,
int
gridx
,
int
gridy
,
int
gridz
,
vector
<
float
>&
recipEterm
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
periodicBoxSize
)
{
const
unsigned
int
zsize
=
gridz
/
2
+
1
;
const
unsigned
int
yzsize
=
gridy
*
zsize
;
const
float
scaleFactor
=
(
float
)
(
M_PI
*
periodicBoxSize
[
0
]
*
periodicBoxSize
[
1
]
*
periodicBoxSize
[
2
]);
const
float
recipExpFactor
=
(
float
)
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
const
float
invPeriodicBoxSizeX
=
(
float
)
(
1.0
/
periodicBoxSize
[
0
]);
const
float
invPeriodicBoxSizeY
=
(
float
)
(
1.0
/
periodicBoxSize
[
1
]);
const
float
invPeriodicBoxSizeZ
=
(
float
)
(
1.0
/
periodicBoxSize
[
2
]);
int
firstz
=
(
start
==
0
?
1
:
0
);
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
int
mx
=
(
kx
<
(
gridx
+
1
)
/
2
)
?
kx
:
kx
-
gridx
;
float
mhx
=
mx
*
invPeriodicBoxSizeX
;
float
bx
=
scaleFactor
*
bsplineModuli
[
0
][
kx
];
for
(
int
ky
=
0
;
ky
<
gridy
;
ky
++
)
{
int
my
=
(
ky
<
(
gridy
+
1
)
/
2
)
?
ky
:
ky
-
gridy
;
float
mhy
=
my
*
invPeriodicBoxSizeY
;
float
mhx2y2
=
mhx
*
mhx
+
mhy
*
mhy
;
float
bxby
=
bx
*
bsplineModuli
[
1
][
ky
];
for
(
int
kz
=
firstz
;
kz
<
zsize
;
kz
++
)
{
int
index
=
kx
*
yzsize
+
ky
*
zsize
+
kz
;
int
mz
=
(
kz
<
(
gridz
+
1
)
/
2
)
?
kz
:
kz
-
gridz
;
float
mhz
=
mz
*
invPeriodicBoxSizeZ
;
float
bz
=
bsplineModuli
[
2
][
kz
];
float
m2
=
mhx2y2
+
mhz
*
mhz
;
float
denom
=
m2
*
bxby
*
bz
;
recipEterm
[
index
]
=
exp
(
-
recipExpFactor
*
m2
)
/
denom
;
}
firstz
=
0
;
}
}
}
static
float
reciprocalEnergy
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
int
gridx
,
int
gridy
,
int
gridz
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
periodicBoxSize
)
{
const
unsigned
int
zsizeHalf
=
gridz
/
2
+
1
;
const
unsigned
int
yzsizeHalf
=
gridy
*
zsizeHalf
;
const
float
scaleFactor
=
(
float
)
(
M_PI
*
periodicBoxSize
[
0
]
*
periodicBoxSize
[
1
]
*
periodicBoxSize
[
2
]);
const
float
recipExpFactor
=
(
float
)
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
const
float
invPeriodicBoxSizeX
=
(
float
)
(
1.0
/
periodicBoxSize
[
0
]);
const
float
invPeriodicBoxSizeY
=
(
float
)
(
1.0
/
periodicBoxSize
[
1
]);
const
float
invPeriodicBoxSizeZ
=
(
float
)
(
1.0
/
periodicBoxSize
[
2
]);
float
energy
=
0.0
f
;
int
firstz
=
(
start
==
0
?
1
:
0
);
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
int
mx
=
(
kx
<
(
gridx
+
1
)
/
2
)
?
kx
:
kx
-
gridx
;
float
mhx
=
mx
*
invPeriodicBoxSizeX
;
float
bx
=
scaleFactor
*
bsplineModuli
[
0
][
kx
];
for
(
int
ky
=
0
;
ky
<
gridy
;
ky
++
)
{
int
my
=
(
ky
<
(
gridy
+
1
)
/
2
)
?
ky
:
ky
-
gridy
;
float
mhy
=
my
*
invPeriodicBoxSizeY
;
float
mhx2y2
=
mhx
*
mhx
+
mhy
*
mhy
;
float
bxby
=
bx
*
bsplineModuli
[
1
][
ky
];
for
(
int
kz
=
firstz
;
kz
<
gridz
;
kz
++
)
{
int
mz
=
(
kz
<
(
gridz
+
1
)
/
2
)
?
kz
:
kz
-
gridz
;
float
mhz
=
mz
*
invPeriodicBoxSizeZ
;
float
bz
=
bsplineModuli
[
2
][
kz
];
float
m2
=
mhx2y2
+
mhz
*
mhz
;
float
denom
=
m2
*
bxby
*
bz
;
float
eterm
=
exp
(
-
recipExpFactor
*
m2
)
/
denom
;
int
kx1
,
ky1
,
kz1
;
if
(
kz
>=
gridz
/
2
+
1
)
{
kx1
=
(
kx
==
0
?
kx
:
gridx
-
kx
);
ky1
=
(
ky
==
0
?
ky
:
gridy
-
ky
);
kz1
=
gridz
-
kz
;
}
else
{
kx1
=
kx
;
ky1
=
ky
;
kz1
=
kz
;
}
int
index
=
kx1
*
yzsizeHalf
+
ky1
*
zsizeHalf
+
kz1
;
float
gridReal
=
grid
[
index
][
0
];
float
gridImag
=
grid
[
index
][
1
];
energy
+=
eterm
*
(
gridReal
*
gridReal
+
gridImag
*
gridImag
);
}
firstz
=
0
;
}
}
return
0.5
f
*
energy
;
}
static
void
reciprocalConvolution
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
int
gridx
,
int
gridy
,
int
gridz
,
vector
<
float
>&
recipEterm
)
{
const
unsigned
int
zsize
=
gridz
/
2
+
1
;
const
unsigned
int
yzsize
=
gridy
*
zsize
;
int
firstz
=
(
start
==
0
?
1
:
0
);
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
for
(
int
ky
=
0
;
ky
<
gridy
;
ky
++
)
{
for
(
int
kz
=
firstz
;
kz
<
zsize
;
kz
++
)
{
int
index
=
kx
*
yzsize
+
ky
*
zsize
+
kz
;
float
eterm
=
recipEterm
[
index
];
grid
[
index
][
0
]
*=
eterm
;
grid
[
index
][
1
]
*=
eterm
;
}
firstz
=
0
;
}
}
}
static
void
interpolateForces
(
int
start
,
int
end
,
float
*
posq
,
float
*
force
,
float
*
grid
,
int
gridx
,
int
gridy
,
int
gridz
,
int
numParticles
,
Vec3
periodicBoxSize
)
{
__m128
boxSize
=
_mm_set_ps
(
0
,
(
float
)
periodicBoxSize
[
2
],
(
float
)
periodicBoxSize
[
1
],
(
float
)
periodicBoxSize
[
0
]);
__m128
invBoxSize
=
_mm_set_ps
(
0
,
(
float
)
(
1
/
periodicBoxSize
[
2
]),
(
float
)
(
1
/
periodicBoxSize
[
1
]),
(
float
)
(
1
/
periodicBoxSize
[
0
]));
__m128
gridSize
=
_mm_set_ps
(
0
,
gridz
,
gridy
,
gridx
);
__m128i
gridSizeInt
=
_mm_set_epi32
(
0
,
gridz
,
gridy
,
gridx
);
__m128
one
=
_mm_set1_ps
(
1
);
__m128
scale
=
_mm_set1_ps
(
1.0
f
/
(
PME_ORDER
-
1
));
const
float
epsilonFactor
=
sqrt
(
ONE_4PI_EPS0
);
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
// Find the position relative to the nearest grid point.
__m128
pos
=
_mm_load_ps
(
&
posq
[
4
*
i
]);
__m128
posInBox
=
_mm_sub_ps
(
pos
,
_mm_mul_ps
(
boxSize
,
_mm_floor_ps
(
_mm_mul_ps
(
pos
,
invBoxSize
))));
__m128
t
=
_mm_mul_ps
(
_mm_mul_ps
(
posInBox
,
invBoxSize
),
gridSize
);
__m128i
ti
=
_mm_cvttps_epi32
(
t
);
__m128
dr
=
_mm_sub_ps
(
t
,
_mm_cvtepi32_ps
(
ti
));
__m128i
gridIndex
=
_mm_sub_epi32
(
ti
,
_mm_and_si128
(
gridSizeInt
,
_mm_cmpeq_epi32
(
ti
,
gridSizeInt
)));
// Compute the B-spline coefficients.
__m128
data
[
PME_ORDER
];
__m128
ddata
[
PME_ORDER
];
data
[
PME_ORDER
-
1
]
=
_mm_setzero_ps
();
data
[
1
]
=
dr
;
data
[
0
]
=
_mm_sub_ps
(
one
,
dr
);
for
(
int
j
=
3
;
j
<
PME_ORDER
;
j
++
)
{
__m128
div
=
_mm_set1_ps
(
1.0
f
/
(
j
-
1
));
data
[
j
-
1
]
=
_mm_mul_ps
(
_mm_mul_ps
(
div
,
dr
),
data
[
j
-
2
]);
for
(
int
k
=
1
;
k
<
j
-
1
;
k
++
)
data
[
j
-
k
-
1
]
=
_mm_mul_ps
(
div
,
_mm_add_ps
(
_mm_mul_ps
(
_mm_add_ps
(
dr
,
_mm_set1_ps
(
k
)),
data
[
j
-
k
-
2
]),
_mm_mul_ps
(
_mm_sub_ps
(
_mm_set1_ps
(
j
-
k
),
dr
),
data
[
j
-
k
-
1
])));
data
[
0
]
=
_mm_mul_ps
(
_mm_mul_ps
(
div
,
_mm_sub_ps
(
one
,
dr
)),
data
[
0
]);
}
ddata
[
0
]
=
_mm_sub_ps
(
_mm_set1_ps
(
0
),
data
[
0
]);
for
(
int
j
=
1
;
j
<
PME_ORDER
;
j
++
)
ddata
[
j
]
=
_mm_sub_ps
(
data
[
j
-
1
],
data
[
j
]);
data
[
PME_ORDER
-
1
]
=
_mm_mul_ps
(
_mm_mul_ps
(
scale
,
dr
),
data
[
PME_ORDER
-
2
]);
for
(
int
j
=
1
;
j
<
(
PME_ORDER
-
1
);
j
++
)
data
[
PME_ORDER
-
j
-
1
]
=
_mm_mul_ps
(
scale
,
_mm_add_ps
(
_mm_mul_ps
(
_mm_add_ps
(
dr
,
_mm_set1_ps
(
j
)),
data
[
PME_ORDER
-
j
-
2
]),
_mm_mul_ps
(
_mm_sub_ps
(
_mm_set1_ps
(
PME_ORDER
-
j
),
dr
),
data
[
PME_ORDER
-
j
-
1
])));
data
[
0
]
=
_mm_mul_ps
(
_mm_mul_ps
(
scale
,
_mm_sub_ps
(
one
,
dr
)),
data
[
0
]);
// Compute the force on this atom.
int
gridIndexX
=
_mm_extract_epi32
(
gridIndex
,
0
);
int
gridIndexY
=
_mm_extract_epi32
(
gridIndex
,
1
);
int
gridIndexZ
=
_mm_extract_epi32
(
gridIndex
,
2
);
int
zindex
[
PME_ORDER
];
for
(
int
j
=
0
;
j
<
PME_ORDER
;
j
++
)
{
zindex
[
j
]
=
gridIndexZ
+
j
;
zindex
[
j
]
-=
(
zindex
[
j
]
>=
gridz
?
gridz
:
0
);
}
__m128
zdata
[
PME_ORDER
];
for
(
int
j
=
0
;
j
<
PME_ORDER
;
j
++
)
zdata
[
j
]
=
_mm_set_ps
(
0
,
EXTRACT_FLOAT
(
ddata
[
j
],
2
),
EXTRACT_FLOAT
(
data
[
j
],
2
),
EXTRACT_FLOAT
(
data
[
j
],
2
));
__m128
f
=
_mm_set1_ps
(
0
);
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
xbase
=
gridIndexX
+
ix
;
xbase
-=
(
xbase
>=
gridx
?
gridx
:
0
);
xbase
=
xbase
*
gridy
*
gridz
;
float
dx
=
EXTRACT_FLOAT
(
data
[
ix
],
0
);
float
ddx
=
EXTRACT_FLOAT
(
ddata
[
ix
],
0
);
__m128
xdata
=
_mm_set_ps
(
0
,
dx
,
dx
,
ddx
);
for
(
int
iy
=
0
;
iy
<
PME_ORDER
;
iy
++
)
{
int
ybase
=
gridIndexY
+
iy
;
ybase
-=
(
ybase
>=
gridy
?
gridy
:
0
);
ybase
=
xbase
+
ybase
*
gridz
;
float
dy
=
EXTRACT_FLOAT
(
data
[
iy
],
1
);
float
ddy
=
EXTRACT_FLOAT
(
ddata
[
iy
],
1
);
__m128
xydata
=
_mm_mul_ps
(
xdata
,
_mm_set_ps
(
0
,
dy
,
ddy
,
dy
));
for
(
int
iz
=
0
;
iz
<
PME_ORDER
;
iz
++
)
{
__m128
gridValue
=
_mm_set1_ps
(
grid
[
ybase
+
zindex
[
iz
]]);
f
=
_mm_add_ps
(
f
,
_mm_mul_ps
(
xydata
,
_mm_mul_ps
(
zdata
[
iz
],
gridValue
)));
}
}
}
f
=
_mm_mul_ps
(
invBoxSize
,
_mm_mul_ps
(
gridSize
,
_mm_mul_ps
(
f
,
_mm_set1_ps
(
-
epsilonFactor
*
posq
[
4
*
i
+
3
]))));
_mm_store_ps
(
&
force
[
4
*
i
],
f
);
}
}
class
CpuCalcPmeReciprocalForceKernel
::
ThreadData
{
public:
CpuCalcPmeReciprocalForceKernel
&
owner
;
int
index
;
float
*
tempGrid
;
ThreadData
(
CpuCalcPmeReciprocalForceKernel
&
owner
,
int
index
)
:
owner
(
owner
),
index
(
index
),
tempGrid
(
NULL
)
{
}
};
static
void
*
threadBody
(
void
*
args
)
{
CpuCalcPmeReciprocalForceKernel
::
ThreadData
&
data
=
*
reinterpret_cast
<
CpuCalcPmeReciprocalForceKernel
::
ThreadData
*>
(
args
);
data
.
owner
.
runThread
(
data
.
index
);
if
(
data
.
tempGrid
!=
NULL
)
fftwf_free
(
data
.
tempGrid
);
delete
&
data
;
return
0
;
}
void
CpuCalcPmeReciprocalForceKernel
::
initialize
(
int
xsize
,
int
ysize
,
int
zsize
,
int
numParticles
,
double
alpha
)
{
if
(
!
hasInitializedThreads
)
{
numThreads
=
getNumProcessors
();
fftwf_init_threads
();
hasInitializedThreads
=
true
;
}
gridx
=
findFFTDimension
(
xsize
);
gridy
=
findFFTDimension
(
ysize
);
gridz
=
findFFTDimension
(
zsize
);
this
->
numParticles
=
numParticles
;
this
->
alpha
=
alpha
;
force
.
resize
(
4
*
numParticles
);
recipEterm
.
resize
(
gridx
*
gridy
*
gridz
);
// Initialize threads.
pthread_cond_init
(
&
startCondition
,
NULL
);
pthread_cond_init
(
&
endCondition
,
NULL
);
pthread_cond_init
(
&
mainThreadStartCondition
,
NULL
);
pthread_cond_init
(
&
mainThreadEndCondition
,
NULL
);
pthread_mutex_init
(
&
lock
,
NULL
);
thread
.
resize
(
numThreads
);
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
{
ThreadData
*
data
=
new
ThreadData
(
*
this
,
i
);
threadData
.
push_back
(
data
);
pthread_create
(
&
thread
[
i
],
NULL
,
threadBody
,
data
);
data
->
tempGrid
=
(
float
*
)
fftwf_malloc
(
sizeof
(
float
)
*
(
gridx
*
gridy
*
gridz
+
3
));
}
pthread_create
(
&
mainThread
,
NULL
,
threadBody
,
new
ThreadData
(
*
this
,
-
1
));
// Initialize FFTW.
realGrid
=
threadData
[
0
]
->
tempGrid
;
complexGrid
=
(
fftwf_complex
*
)
fftwf_malloc
(
sizeof
(
fftwf_complex
)
*
gridx
*
gridy
*
(
gridz
/
2
+
1
));
fftwf_plan_with_nthreads
(
numThreads
);
forwardFFT
=
fftwf_plan_dft_r2c_3d
(
gridx
,
gridy
,
gridz
,
realGrid
,
complexGrid
,
FFTW_MEASURE
);
backwardFFT
=
fftwf_plan_dft_c2r_3d
(
gridx
,
gridy
,
gridz
,
complexGrid
,
realGrid
,
FFTW_MEASURE
);
hasCreatedPlan
=
true
;
// Initialize the b-spline moduli.
int
maxSize
=
max
(
max
(
gridx
,
gridy
),
gridz
);
vector
<
double
>
data
(
PME_ORDER
);
vector
<
double
>
ddata
(
PME_ORDER
);
vector
<
double
>
bsplinesData
(
maxSize
);
data
[
PME_ORDER
-
1
]
=
0.0
;
data
[
1
]
=
0.0
;
data
[
0
]
=
1.0
;
for
(
int
i
=
3
;
i
<
PME_ORDER
;
i
++
)
{
double
div
=
1.0
/
(
i
-
1.0
);
data
[
i
-
1
]
=
0.0
;
for
(
int
j
=
1
;
j
<
(
i
-
1
);
j
++
)
data
[
i
-
j
-
1
]
=
div
*
(
j
*
data
[
i
-
j
-
2
]
+
(
i
-
j
)
*
data
[
i
-
j
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
}
// Differentiate.
ddata
[
0
]
=
-
data
[
0
];
for
(
int
i
=
1
;
i
<
PME_ORDER
;
i
++
)
ddata
[
i
]
=
data
[
i
-
1
]
-
data
[
i
];
double
div
=
1.0
/
(
PME_ORDER
-
1
);
data
[
PME_ORDER
-
1
]
=
0.0
;
for
(
int
i
=
1
;
i
<
(
PME_ORDER
-
1
);
i
++
)
data
[
PME_ORDER
-
i
-
1
]
=
div
*
(
i
*
data
[
PME_ORDER
-
i
-
2
]
+
(
PME_ORDER
-
i
)
*
data
[
PME_ORDER
-
i
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
for
(
int
i
=
0
;
i
<
maxSize
;
i
++
)
bsplinesData
[
i
]
=
0.0
;
for
(
int
i
=
1
;
i
<=
PME_ORDER
;
i
++
)
bsplinesData
[
i
]
=
data
[
i
-
1
];
// Evaluate the actual bspline moduli for X/Y/Z.
bsplineModuli
[
0
].
resize
(
gridx
);
bsplineModuli
[
1
].
resize
(
gridy
);
bsplineModuli
[
2
].
resize
(
gridz
);
for
(
int
dim
=
0
;
dim
<
3
;
dim
++
)
{
int
ndata
=
bsplineModuli
[
dim
].
size
();
vector
<
float
>&
moduli
=
bsplineModuli
[
dim
];
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
double
sc
=
0.0
;
double
ss
=
0.0
;
for
(
int
j
=
0
;
j
<
ndata
;
j
++
)
{
double
arg
=
(
2.0
*
M_PI
*
i
*
j
)
/
ndata
;
sc
+=
bsplinesData
[
j
]
*
cos
(
arg
);
ss
+=
bsplinesData
[
j
]
*
sin
(
arg
);
}
moduli
[
i
]
=
(
float
)
(
sc
*
sc
+
ss
*
ss
);
}
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
if
(
moduli
[
i
]
<
1.0e-7
f
)
moduli
[
i
]
=
(
moduli
[
i
-
1
]
+
moduli
[
i
+
1
])
*
0.5
f
;
}
}
CpuCalcPmeReciprocalForceKernel
::~
CpuCalcPmeReciprocalForceKernel
()
{
isDeleted
=
true
;
pthread_mutex_lock
(
&
lock
);
pthread_cond_broadcast
(
&
startCondition
);
pthread_cond_broadcast
(
&
mainThreadStartCondition
);
pthread_mutex_unlock
(
&
lock
);
for
(
int
i
=
0
;
i
<
(
int
)
thread
.
size
();
i
++
)
pthread_join
(
thread
[
i
],
NULL
);
pthread_join
(
mainThread
,
NULL
);
pthread_mutex_destroy
(
&
lock
);
pthread_cond_destroy
(
&
startCondition
);
pthread_cond_destroy
(
&
endCondition
);
pthread_cond_destroy
(
&
mainThreadStartCondition
);
pthread_cond_destroy
(
&
mainThreadEndCondition
);
if
(
complexGrid
!=
NULL
)
fftwf_free
(
complexGrid
);
if
(
hasCreatedPlan
)
{
fftwf_destroy_plan
(
forwardFFT
);
fftwf_destroy_plan
(
backwardFFT
);
}
}
void
CpuCalcPmeReciprocalForceKernel
::
runThread
(
int
index
)
{
if
(
index
==
-
1
)
{
// This is the main thread that coordinates all the other ones.
pthread_mutex_lock
(
&
lock
);
while
(
true
)
{
// Wait for the signal to start.
pthread_cond_wait
(
&
mainThreadStartCondition
,
&
lock
);
if
(
isDeleted
)
break
;
posq
=
io
->
getPosq
();
advanceThreads
();
// Signal threads to perform charge spreading.
advanceThreads
();
// Signal threads to sum the charge grids.
fftwf_execute_dft_r2c
(
forwardFFT
,
realGrid
,
complexGrid
);
if
(
lastBoxSize
!=
periodicBoxSize
)
advanceThreads
();
// Signal threads to compute the reciprocal scale factors.
if
(
includeEnergy
)
advanceThreads
();
// Signal threads to compute energy.
advanceThreads
();
// Signal threads to perform reciprocal convolution.
fftwf_execute_dft_c2r
(
backwardFFT
,
complexGrid
,
realGrid
);
advanceThreads
();
// Signal threads to interpolate forces.
isFinished
=
true
;
lastBoxSize
=
periodicBoxSize
;
pthread_cond_signal
(
&
mainThreadEndCondition
);
}
pthread_mutex_unlock
(
&
lock
);
}
else
{
// This is a worker thread.
int
particleStart
=
(
index
*
numParticles
)
/
numThreads
;
int
particleEnd
=
((
index
+
1
)
*
numParticles
)
/
numThreads
;
int
gridxStart
=
(
index
*
gridx
)
/
numThreads
;
int
gridxEnd
=
((
index
+
1
)
*
gridx
)
/
numThreads
;
int
gridSize
=
(
gridx
*
gridy
*
gridz
+
3
)
/
4
;
int
gridStart
=
4
*
((
index
*
gridSize
)
/
numThreads
);
int
gridEnd
=
4
*
(((
index
+
1
)
*
gridSize
)
/
numThreads
);
while
(
true
)
{
threadWait
();
if
(
isDeleted
)
break
;
spreadCharge
(
particleStart
,
particleEnd
,
posq
,
threadData
[
index
]
->
tempGrid
,
gridx
,
gridy
,
gridz
,
numParticles
,
periodicBoxSize
);
threadWait
();
int
numGrids
=
threadData
.
size
();
for
(
int
i
=
gridStart
;
i
<
gridEnd
;
i
+=
4
)
{
__m128
sum
=
_mm_load_ps
(
&
realGrid
[
i
]);
for
(
int
j
=
1
;
j
<
numGrids
;
j
++
)
sum
=
_mm_add_ps
(
sum
,
_mm_load_ps
(
&
threadData
[
j
]
->
tempGrid
[
i
]));
_mm_store_ps
(
&
realGrid
[
i
],
sum
);
}
threadWait
();
if
(
lastBoxSize
!=
periodicBoxSize
)
{
computeReciprocalEterm
(
gridxStart
,
gridxEnd
,
gridx
,
gridy
,
gridz
,
recipEterm
,
alpha
,
bsplineModuli
,
periodicBoxSize
);
threadWait
();
}
if
(
includeEnergy
)
{
double
threadEnergy
=
reciprocalEnergy
(
gridxStart
,
gridxEnd
,
complexGrid
,
gridx
,
gridy
,
gridz
,
alpha
,
bsplineModuli
,
periodicBoxSize
);
pthread_mutex_lock
(
&
lock
);
energy
+=
threadEnergy
;
pthread_mutex_unlock
(
&
lock
);
threadWait
();
}
reciprocalConvolution
(
gridxStart
,
gridxEnd
,
complexGrid
,
gridx
,
gridy
,
gridz
,
recipEterm
);
threadWait
();
interpolateForces
(
particleStart
,
particleEnd
,
posq
,
&
force
[
0
],
realGrid
,
gridx
,
gridy
,
gridz
,
numParticles
,
periodicBoxSize
);
}
}
}
void
CpuCalcPmeReciprocalForceKernel
::
threadWait
()
{
pthread_mutex_lock
(
&
lock
);
waitCount
++
;
pthread_cond_signal
(
&
endCondition
);
pthread_cond_wait
(
&
startCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
}
void
CpuCalcPmeReciprocalForceKernel
::
advanceThreads
()
{
waitCount
=
0
;
pthread_cond_broadcast
(
&
startCondition
);
while
(
waitCount
<
numThreads
)
{
pthread_cond_wait
(
&
endCondition
,
&
lock
);
}
}
void
CpuCalcPmeReciprocalForceKernel
::
beginComputation
(
IO
&
io
,
Vec3
periodicBoxSize
,
bool
includeEnergy
)
{
this
->
io
=
&
io
;
this
->
periodicBoxSize
=
periodicBoxSize
;
this
->
includeEnergy
=
includeEnergy
;
energy
=
0.0
;
pthread_mutex_lock
(
&
lock
);
isFinished
=
false
;
pthread_cond_signal
(
&
mainThreadStartCondition
);
pthread_mutex_unlock
(
&
lock
);
}
double
CpuCalcPmeReciprocalForceKernel
::
finishComputation
(
IO
&
io
)
{
pthread_mutex_lock
(
&
lock
);
while
(
!
isFinished
)
{
pthread_cond_wait
(
&
mainThreadEndCondition
,
&
lock
);
}
pthread_mutex_unlock
(
&
lock
);
io
.
setForce
(
&
force
[
0
]);
return
energy
;
}
bool
CpuCalcPmeReciprocalForceKernel
::
isProcessorSupported
()
{
int
cpuInfo
[
4
];
cpuid
(
cpuInfo
,
0
);
if
(
cpuInfo
[
0
]
>=
1
)
{
cpuid
(
cpuInfo
,
1
);
return
((
cpuInfo
[
2
]
&
((
int
)
1
<<
19
))
!=
0
);
// Require SSE 4.1
}
return
false
;
}
int
CpuCalcPmeReciprocalForceKernel
::
findFFTDimension
(
int
minimum
)
{
if
(
minimum
<
1
)
return
1
;
while
(
true
)
{
// Attempt to factor the current value.
int
unfactored
=
minimum
;
for
(
int
factor
=
2
;
factor
<
12
;
factor
++
)
{
while
(
unfactored
>
1
&&
unfactored
%
factor
==
0
)
unfactored
/=
factor
;
}
if
(
unfactored
==
1
)
return
minimum
;
minimum
++
;
}
}
plugins/cpupme/src/CpuPmeKernels.h
0 → 100644
View file @
3862202e
#ifndef OPENMM_CPU_PME_KERNELS_H_
#define OPENMM_CPU_PME_KERNELS_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "internal/windowsExportPme.h"
#include "openmm/kernels.h"
#include "openmm/Vec3.h"
#include <fftw3.h>
#include <pthread.h>
#include <vector>
namespace
OpenMM
{
/**
* This is an optimized CPU implementation of CalcPmeReciprocalForceKernel. It is both
* vectorized (requiring SSE 4.1) and multithreaded. It uses FFTW to perform the FFTs.
*/
class
OPENMM_EXPORT_PME
CpuCalcPmeReciprocalForceKernel
:
public
CalcPmeReciprocalForceKernel
{
public:
class
ThreadData
;
CpuCalcPmeReciprocalForceKernel
(
std
::
string
name
,
const
Platform
&
platform
)
:
CalcPmeReciprocalForceKernel
(
name
,
platform
),
hasCreatedPlan
(
false
),
isDeleted
(
false
),
realGrid
(
NULL
),
complexGrid
(
NULL
)
{
}
/**
* Initialize the kernel.
*
* @param gridx the x size of the PME grid
* @param gridy the y size of the PME grid
* @param gridz the z size of the PME grid
* @param numParticles the number of particles in the system
* @param alpha the Ewald blending parameter
*/
void
initialize
(
int
xsize
,
int
ysize
,
int
zsize
,
int
numParticles
,
double
alpha
);
~
CpuCalcPmeReciprocalForceKernel
();
/**
* Begin computing the force and energy.
*
* @param io an object that coordinates data transfer
* @param periodicBoxSize the size of the periodic box (measured in nm)
* @param includeEnergy true if potential energy should be computed
*/
void
beginComputation
(
IO
&
io
,
Vec3
periodicBoxSize
,
bool
includeEnergy
);
/**
* Finish computing the force and energy.
*
* @param io an object that coordinates data transfer
* @return the potential energy due to the PME reciprocal space interactions
*/
double
finishComputation
(
IO
&
io
);
/**
* This routine contains the code executed by each thread.
*/
void
runThread
(
int
index
);
/**
* Get whether the current CPU supports all features needed by this kernel.
*/
static
bool
isProcessorSupported
();
private:
/**
* This is called by the worker threads to wait until the master thread instructs them to advance.
*/
void
threadWait
();
/**
* This is called by the master thread to instruct all the worker threads to advance.
*/
void
advanceThreads
();
/**
* Select a size for one grid dimension that FFTW can handle efficiently.
*/
int
findFFTDimension
(
int
minimum
);
static
bool
hasInitializedThreads
;
static
int
numThreads
;
int
gridx
,
gridy
,
gridz
,
numParticles
;
double
alpha
;
bool
hasCreatedPlan
,
isFinished
,
isDeleted
;
std
::
vector
<
float
>
force
;
std
::
vector
<
float
>
bsplineModuli
[
3
];
std
::
vector
<
float
>
recipEterm
;
Vec3
lastBoxSize
;
float
*
realGrid
;
fftwf_complex
*
complexGrid
;
fftwf_plan
forwardFFT
,
backwardFFT
;
int
waitCount
;
pthread_cond_t
startCondition
,
endCondition
;
pthread_cond_t
mainThreadStartCondition
,
mainThreadEndCondition
;
pthread_mutex_t
lock
;
pthread_t
mainThread
;
std
::
vector
<
pthread_t
>
thread
;
std
::
vector
<
ThreadData
*>
threadData
;
// The following variables are used to store information about the calculation currently being performed.
IO
*
io
;
float
energy
;
float
*
posq
;
Vec3
periodicBoxSize
;
bool
includeEnergy
;
};
}
// namespace OpenMM
#endif
/*OPENMM_CPU_PME_KERNELS_H_*/
Prev
1
…
7
8
9
10
11
12
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment