Commit b1d621b3 authored by Yutong Zhao's avatar Yutong Zhao
Browse files

Merge pull request #32 from peastman/pme

Created CPU implementation of PME
parents cce1ca81 794ab9a7
......@@ -54,7 +54,7 @@ void testGaussian() {
System system;
for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0);
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"));
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false");
OpenCLContext& context = *platformData.contexts[0];
context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0);
......
......@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) {
System system;
system.addParticle(0.0);
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"));
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false");
OpenCLContext& context = *platformData.contexts[0];
context.initialize();
OpenCLArray data(context, array.size(), sizeof(float), "sortData");
......
#---------------------------------------------------
# OpenMM CPU PME Plugin
#
# Creates plugin library, base name=OpenMMPME.
# Default libraries are shared & optimized.
#
# Windows:
# OpenMMPME[_d].dll
# OpenMMPME[_d].lib
# Unix:
# libOpenMMPME[_d].so
#----------------------------------------------------
IF (APPLE)
SET (CMAKE_OSX_DEPLOYMENT_TARGET "10.6")
ENDIF (APPLE)
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET(OPENMM_SOURCE_SUBDIRS .)
# Collect up information about the version of the OpenMM library we're building
# and make it available to the code so it can be built into the binaries.
SET(OPENMMPME_LIBRARY_NAME OpenMMPME)
SET(SHARED_TARGET ${OPENMMPME_LIBRARY_NAME})
# Ensure that debug libraries have "_d" appended to their names.
# CMake gets this right on Windows automatically with this definition.
IF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
SET(CMAKE_DEBUG_POSTFIX "_d" CACHE INTERNAL "" FORCE)
ENDIF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
# But on Unix or Cygwin we have to add the suffix manually
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(SHARED_TARGET ${SHARED_TARGET}_d)
SET(STATIC_TARGET ${STATIC_TARGET}_d)
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
# These are all the places to search for header files which are
# to be part of the API.
SET(API_INCLUDE_DIRS) # start empty
FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
# append
SET(API_INCLUDE_DIRS ${API_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include
${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include/internal)
ENDFOREACH(subdir)
# Find the include files.
SET(API_INCLUDE_FILES)
FOREACH(dir ${API_INCLUDE_DIRS})
FILE(GLOB fullpaths ${dir}/*.h) # returns full pathnames
SET(API_INCLUDE_FILES ${API_INCLUDE_FILES} ${fullpaths})
ENDFOREACH(dir)
# collect up source files
SET(SOURCE_FILES) # empty
SET(SOURCE_INCLUDE_FILES)
FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
FILE(GLOB_RECURSE src_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.c)
FILE(GLOB incl_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${src_files}) #append
SET(SOURCE_INCLUDE_FILES ${SOURCE_INCLUDE_FILES} ${incl_files})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
ENDFOREACH(subdir)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
SET_SOURCE_FILES_PROPERTIES(${SOURCE_FILES} PROPERTIES COMPILE_FLAGS "-msse4.1")
# Include FFTW related files.
INCLUDE_DIRECTORIES(${FFTW_INCLUDES})
# Build the plugin library.
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_INCLUDE_FILES})
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}_d)
ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${PTHREADS_LIB} ${FFTW_LIBRARY} ${FFTW_THREADS_LIBRARY})
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_PME_BUILDING_SHARED_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
SUBDIRS(tests)
#ifndef OPENMM_WINDOWSEXPORTPME_H_
#define OPENMM_WINDOWSEXPORTPME_H_
/*
* Shared libraries are messy in Visual Studio. We have to distinguish three
* cases:
* (1) this header is being used to build the OpenMM shared library
* (dllexport)
* (2) this header is being used by a *client* of the OpenMM shared
* library (dllimport)
* (3) we are building the OpenMM static library, or the client is
* being compiled with the expectation of linking with the
* OpenMM static library (nothing special needed)
* In the CMake script for building this library, we define one of the symbols
* OPENMM_PME_BUILDING_{SHARED|STATIC}_LIBRARY
* Client code normally has no special symbol defined, in which case we'll
* assume it wants to use the shared library. However, if the client defines
* the symbol OPENMM_USE_STATIC_LIBRARIES we'll suppress the dllimport so
* that the client code can be linked with static libraries. Note that
* the client symbol is not library dependent, while the library symbols
* affect only the OpenMM library, meaning that other libraries can
* be clients of this one. However, we are assuming all-static or all-shared.
*/
#ifdef _MSC_VER
// We don't want to hear about how sprintf is "unsafe".
#pragma warning(disable:4996)
// Keep MS VC++ quiet about lack of dll export of private members.
#pragma warning(disable:4251)
#if defined(OPENMM_PME_BUILDING_SHARED_LIBRARY)
#define OPENMM_EXPORT_PME __declspec(dllexport)
#elif defined(OPENMM_PME_BUILDING_STATIC_LIBRARY) || defined(OPENMM_PME_USE_STATIC_LIBRARIES)
#define OPENMM_EXPORT_PME
#else
#define OPENMM_EXPORT_PME __declspec(dllimport) // i.e., a client of a shared library
#endif
#else
#define OPENMM_EXPORT_PME // Linux, Mac
#endif
#endif // OPENMM_WINDOWSEXPORTPME_H_
/* -------------------------------------------------------------------------- *
* OpenMMCpuPme *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CpuPmeKernelFactory.h"
#include "CpuPmeKernels.h"
#include "internal/windowsExportPme.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
using namespace OpenMM;
extern "C" void registerPlatforms() {
}
extern "C" void registerKernelFactories() {
if (CpuCalcPmeReciprocalForceKernel::isProcessorSupported()) {
CpuPmeKernelFactory* factory = new CpuPmeKernelFactory();
for (int i = 0; i < Platform::getNumPlatforms(); i++)
Platform::getPlatform(i).registerKernelFactory(CalcPmeReciprocalForceKernel::Name(), factory);
}
}
extern "C" OPENMM_EXPORT_PME void registerCpuPmeKernelFactories() {
registerKernelFactories();
}
KernelImpl* CpuPmeKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
if (name == CalcPmeReciprocalForceKernel::Name())
return new CpuCalcPmeReciprocalForceKernel(name, platform);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
}
#ifndef OPENMM_CPUPMEKERNELFACTORY_H_
#define OPENMM_CPUPMEKERNELFACTORY_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/KernelFactory.h"
namespace OpenMM {
/**
* This KernelFactory creates kernels for the CPU implementation of PME.
*/
class CpuPmeKernelFactory : public KernelFactory {
public:
KernelImpl* createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const;
};
} // namespace OpenMM
#endif /*OPENMM_CPUPMEKERNELFACTORY_H_*/
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#ifdef WIN32
#define _USE_MATH_DEFINES // Needed to get M_PI
#endif
#include "CpuPmeKernels.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include <cmath>
#include <cstring>
#include <smmintrin.h>
using namespace OpenMM;
using namespace std;
static const int PME_ORDER = 5;
bool CpuCalcPmeReciprocalForceKernel::hasInitializedThreads = false;
int CpuCalcPmeReciprocalForceKernel::numThreads = 0;
#define EXTRACT_FLOAT(v, element) _mm_cvtss_f32(_mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, element)))
// Define function to get the number of processors.
#ifdef __APPLE__
#include <sys/sysctl.h>
#include <dlfcn.h>
#else
#ifdef WIN32
#include <windows.h>
#else
#include <dlfcn.h>
#include <unistd.h>
#endif
#endif
static int getNumProcessors() {
#ifdef __APPLE__
int ncpu;
size_t len = 4;
if (sysctlbyname("hw.logicalcpu", &ncpu, &len, NULL, 0) == 0)
return ncpu;
else
return 1;
#else
#ifdef WIN32
SYSTEM_INFO siSysInfo;
int ncpu;
GetSystemInfo(&siSysInfo);
ncpu = siSysInfo.dwNumberOfProcessors;
if (ncpu < 1)
ncpu = 1;
return ncpu;
#else
long nProcessorsOnline = sysconf(_SC_NPROCESSORS_ONLN);
if (nProcessorsOnline == -1)
return 1;
else
return (int) nProcessorsOnline;
#endif
#endif
}
// Define a function to check the CPU's capabilities.
#ifdef _WIN32
#define cpuid __cpuid
#else
static void cpuid(int cpuInfo[4], int infoType){
__asm__ __volatile__ (
"cpuid":
"=a" (cpuInfo[0]),
"=b" (cpuInfo[1]),
"=c" (cpuInfo[2]),
"=d" (cpuInfo[3]) :
"a" (infoType)
);
}
#endif
static void spreadCharge(int start, int end, float* posq, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3 periodicBoxSize) {
float temp[4];
__m128 boxSize = _mm_set_ps(0, (float) periodicBoxSize[2], (float) periodicBoxSize[1], (float) periodicBoxSize[0]);
__m128 invBoxSize = _mm_set_ps(0, (float) (1/periodicBoxSize[2]), (float) (1/periodicBoxSize[1]), (float) (1/periodicBoxSize[0]));
__m128 gridSize = _mm_set_ps(0, gridz, gridy, gridx);
__m128i gridSizeInt = _mm_set_epi32(0, gridz, gridy, gridx);
__m128 one = _mm_set1_ps(1);
__m128 scale = _mm_set1_ps(1.0f/(PME_ORDER-1));
const float epsilonFactor = sqrt(ONE_4PI_EPS0);
memset(grid, 0, sizeof(float)*gridx*gridy*gridz);
for (int i = start; i < end; i++) {
// Find the position relative to the nearest grid point.
__m128 pos = _mm_load_ps(&posq[4*i]);
__m128 posInBox = _mm_sub_ps(pos, _mm_mul_ps(boxSize, _mm_floor_ps(_mm_mul_ps(pos, invBoxSize))));
__m128 t = _mm_mul_ps(_mm_mul_ps(posInBox, invBoxSize), gridSize);
__m128i ti = _mm_cvttps_epi32(t);
__m128 dr = _mm_sub_ps(t, _mm_cvtepi32_ps(ti));
__m128i gridIndex = _mm_sub_epi32(ti, _mm_and_si128(gridSizeInt, _mm_cmpeq_epi32(ti, gridSizeInt)));
// Compute the B-spline coefficients.
__m128 data[PME_ORDER];
data[PME_ORDER-1] = _mm_setzero_ps();
data[1] = dr;
data[0] = _mm_sub_ps(one, dr);
for (int j = 3; j < PME_ORDER; j++) {
__m128 div = _mm_set1_ps(1.0f/(j-1));
data[j-1] = _mm_mul_ps(_mm_mul_ps(div, dr), data[j-2]);
for (int k = 1; k < j-1; k++)
data[j-k-1] = _mm_mul_ps(div, _mm_add_ps(_mm_mul_ps(_mm_add_ps(dr, _mm_set1_ps(k)), data[j-k-2]), _mm_mul_ps(_mm_sub_ps(_mm_set1_ps(j-k), dr), data[j-k-1])));
data[0] = _mm_mul_ps(_mm_mul_ps(div, _mm_sub_ps(one, dr)), data[0]);
}
data[PME_ORDER-1] = _mm_mul_ps(_mm_mul_ps(scale, dr), data[PME_ORDER-2]);
for (int j = 1; j < (PME_ORDER-1); j++)
data[PME_ORDER-j-1] = _mm_mul_ps(scale, _mm_add_ps(_mm_mul_ps(_mm_add_ps(dr, _mm_set1_ps(j)), data[PME_ORDER-j-2]), _mm_mul_ps(_mm_sub_ps(_mm_set1_ps(PME_ORDER-j), dr), data[PME_ORDER-j-1])));
data[0] = _mm_mul_ps(_mm_mul_ps(scale, _mm_sub_ps(one, dr)), data[0]);
// Spread the charges.
int gridIndexX = _mm_extract_epi32(gridIndex, 0);
int gridIndexY = _mm_extract_epi32(gridIndex, 1);
int gridIndexZ = _mm_extract_epi32(gridIndex, 2);
int zindex[PME_ORDER];
for (int j = 0; j < PME_ORDER; j++) {
zindex[j] = gridIndexZ+j;
zindex[j] -= (zindex[j] >= gridz ? gridz : 0);
}
float charge = epsilonFactor*posq[4*i+3];
__m128 zdata0to3 = _mm_set_ps(EXTRACT_FLOAT(data[3], 2), EXTRACT_FLOAT(data[2], 2), EXTRACT_FLOAT(data[1], 2), EXTRACT_FLOAT(data[0], 2));
float zdata4 = EXTRACT_FLOAT(data[4], 2);
if (gridIndexZ+4 < gridz) {
for (int ix = 0; ix < PME_ORDER; ix++) {
int xbase = gridIndexX+ix;
xbase -= (xbase >= gridx ? gridx : 0);
xbase = xbase*gridy*gridz;
float xdata = charge*EXTRACT_FLOAT(data[ix], 0);
for (int iy = 0; iy < PME_ORDER; iy++) {
int ybase = gridIndexY+iy;
ybase -= (ybase >= gridy ? gridy : 0);
ybase = xbase + ybase*gridz;
float multiplier = xdata*EXTRACT_FLOAT(data[iy], 1);
__m128 add0to3 = _mm_mul_ps(zdata0to3, _mm_set1_ps(multiplier));
_mm_storeu_ps(&grid[ybase+gridIndexZ], _mm_add_ps(_mm_loadu_ps(&grid[ybase+gridIndexZ]), add0to3));
grid[ybase+zindex[4]] += multiplier*zdata4;
}
}
}
else {
for (int ix = 0; ix < PME_ORDER; ix++) {
int xbase = gridIndexX+ix;
xbase -= (xbase >= gridx ? gridx : 0);
xbase = xbase*gridy*gridz;
float xdata = charge*EXTRACT_FLOAT(data[ix], 0);
for (int iy = 0; iy < PME_ORDER; iy++) {
int ybase = gridIndexY+iy;
ybase -= (ybase >= gridy ? gridy : 0);
ybase = xbase + ybase*gridz;
float multiplier = xdata*EXTRACT_FLOAT(data[iy], 1);
__m128 add0to3 = _mm_mul_ps(zdata0to3, _mm_set1_ps(multiplier));
_mm_store_ps(temp, add0to3);
grid[ybase+zindex[0]] += temp[0];
grid[ybase+zindex[1]] += temp[1];
grid[ybase+zindex[2]] += temp[2];
grid[ybase+zindex[3]] += temp[3];
grid[ybase+zindex[4]] += multiplier*zdata4;
}
}
}
}
}
static void computeReciprocalEterm(int start, int end, int gridx, int gridy, int gridz, vector<float>& recipEterm, double alpha, vector<float>* bsplineModuli, Vec3 periodicBoxSize) {
const unsigned int zsize = gridz/2+1;
const unsigned int yzsize = gridy*zsize;
const float scaleFactor = (float) (M_PI*periodicBoxSize[0]*periodicBoxSize[1]*periodicBoxSize[2]);
const float recipExpFactor = (float) (M_PI*M_PI/(alpha*alpha));
const float invPeriodicBoxSizeX = (float) (1.0/periodicBoxSize[0]);
const float invPeriodicBoxSizeY = (float) (1.0/periodicBoxSize[1]);
const float invPeriodicBoxSizeZ = (float) (1.0/periodicBoxSize[2]);
int firstz = (start == 0 ? 1 : 0);
for (int kx = start; kx < end; kx++) {
int mx = (kx < (gridx+1)/2) ? kx : kx-gridx;
float mhx = mx*invPeriodicBoxSizeX;
float bx = scaleFactor*bsplineModuli[0][kx];
for (int ky = 0; ky < gridy; ky++) {
int my = (ky < (gridy+1)/2) ? ky : ky-gridy;
float mhy = my*invPeriodicBoxSizeY;
float mhx2y2 = mhx*mhx + mhy*mhy;
float bxby = bx*bsplineModuli[1][ky];
for (int kz = firstz; kz < zsize; kz++) {
int index = kx*yzsize + ky*zsize + kz;
int mz = (kz < (gridz+1)/2) ? kz : kz-gridz;
float mhz = mz*invPeriodicBoxSizeZ;
float bz = bsplineModuli[2][kz];
float m2 = mhx2y2 + mhz*mhz;
float denom = m2*bxby*bz;
recipEterm[index] = exp(-recipExpFactor*m2)/denom;
}
firstz = 0;
}
}
}
static float reciprocalEnergy(int start, int end, fftwf_complex* grid, int gridx, int gridy, int gridz, double alpha, vector<float>* bsplineModuli, Vec3 periodicBoxSize) {
const unsigned int zsizeHalf = gridz/2+1;
const unsigned int yzsizeHalf = gridy*zsizeHalf;
const float scaleFactor = (float) (M_PI*periodicBoxSize[0]*periodicBoxSize[1]*periodicBoxSize[2]);
const float recipExpFactor = (float) (M_PI*M_PI/(alpha*alpha));
const float invPeriodicBoxSizeX = (float) (1.0/periodicBoxSize[0]);
const float invPeriodicBoxSizeY = (float) (1.0/periodicBoxSize[1]);
const float invPeriodicBoxSizeZ = (float) (1.0/periodicBoxSize[2]);
float energy = 0.0f;
int firstz = (start == 0 ? 1 : 0);
for (int kx = start; kx < end; kx++) {
int mx = (kx < (gridx+1)/2) ? kx : kx-gridx;
float mhx = mx*invPeriodicBoxSizeX;
float bx = scaleFactor*bsplineModuli[0][kx];
for (int ky = 0; ky < gridy; ky++) {
int my = (ky < (gridy+1)/2) ? ky : ky-gridy;
float mhy = my*invPeriodicBoxSizeY;
float mhx2y2 = mhx*mhx + mhy*mhy;
float bxby = bx*bsplineModuli[1][ky];
for (int kz = firstz; kz < gridz; kz++) {
int mz = (kz < (gridz+1)/2) ? kz : kz-gridz;
float mhz = mz*invPeriodicBoxSizeZ;
float bz = bsplineModuli[2][kz];
float m2 = mhx2y2 + mhz*mhz;
float denom = m2*bxby*bz;
float eterm = exp(-recipExpFactor*m2)/denom;
int kx1, ky1, kz1;
if (kz >= gridz/2+1) {
kx1 = (kx == 0 ? kx : gridx-kx);
ky1 = (ky == 0 ? ky : gridy-ky);
kz1 = gridz-kz;
}
else {
kx1 = kx;
ky1 = ky;
kz1 = kz;
}
int index = kx1*yzsizeHalf + ky1*zsizeHalf + kz1;
float gridReal = grid[index][0];
float gridImag = grid[index][1];
energy += eterm*(gridReal*gridReal+gridImag*gridImag);
}
firstz = 0;
}
}
return 0.5f*energy;
}
static void reciprocalConvolution(int start, int end, fftwf_complex* grid, int gridx, int gridy, int gridz, vector<float>& recipEterm) {
const unsigned int zsize = gridz/2+1;
const unsigned int yzsize = gridy*zsize;
int firstz = (start == 0 ? 1 : 0);
for (int kx = start; kx < end; kx++) {
for (int ky = 0; ky < gridy; ky++) {
for (int kz = firstz; kz < zsize; kz++) {
int index = kx*yzsize + ky*zsize + kz;
float eterm = recipEterm[index];
grid[index][0] *= eterm;
grid[index][1] *= eterm;
}
firstz = 0;
}
}
}
static void interpolateForces(int start, int end, float* posq, float* force, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3 periodicBoxSize) {
__m128 boxSize = _mm_set_ps(0, (float) periodicBoxSize[2], (float) periodicBoxSize[1], (float) periodicBoxSize[0]);
__m128 invBoxSize = _mm_set_ps(0, (float) (1/periodicBoxSize[2]), (float) (1/periodicBoxSize[1]), (float) (1/periodicBoxSize[0]));
__m128 gridSize = _mm_set_ps(0, gridz, gridy, gridx);
__m128i gridSizeInt = _mm_set_epi32(0, gridz, gridy, gridx);
__m128 one = _mm_set1_ps(1);
__m128 scale = _mm_set1_ps(1.0f/(PME_ORDER-1));
const float epsilonFactor = sqrt(ONE_4PI_EPS0);
for (int i = start; i < end; i++) {
// Find the position relative to the nearest grid point.
__m128 pos = _mm_load_ps(&posq[4*i]);
__m128 posInBox = _mm_sub_ps(pos, _mm_mul_ps(boxSize, _mm_floor_ps(_mm_mul_ps(pos, invBoxSize))));
__m128 t = _mm_mul_ps(_mm_mul_ps(posInBox, invBoxSize), gridSize);
__m128i ti = _mm_cvttps_epi32(t);
__m128 dr = _mm_sub_ps(t, _mm_cvtepi32_ps(ti));
__m128i gridIndex = _mm_sub_epi32(ti, _mm_and_si128(gridSizeInt, _mm_cmpeq_epi32(ti, gridSizeInt)));
// Compute the B-spline coefficients.
__m128 data[PME_ORDER];
__m128 ddata[PME_ORDER];
data[PME_ORDER-1] = _mm_setzero_ps();
data[1] = dr;
data[0] = _mm_sub_ps(one, dr);
for (int j = 3; j < PME_ORDER; j++) {
__m128 div = _mm_set1_ps(1.0f/(j-1));
data[j-1] = _mm_mul_ps(_mm_mul_ps(div, dr), data[j-2]);
for (int k = 1; k < j-1; k++)
data[j-k-1] = _mm_mul_ps(div, _mm_add_ps(_mm_mul_ps(_mm_add_ps(dr, _mm_set1_ps(k)), data[j-k-2]), _mm_mul_ps(_mm_sub_ps(_mm_set1_ps(j-k), dr), data[j-k-1])));
data[0] = _mm_mul_ps(_mm_mul_ps(div, _mm_sub_ps(one, dr)), data[0]);
}
ddata[0] = _mm_sub_ps(_mm_set1_ps(0), data[0]);
for (int j = 1; j < PME_ORDER; j++)
ddata[j] = _mm_sub_ps(data[j-1], data[j]);
data[PME_ORDER-1] = _mm_mul_ps(_mm_mul_ps(scale, dr), data[PME_ORDER-2]);
for (int j = 1; j < (PME_ORDER-1); j++)
data[PME_ORDER-j-1] = _mm_mul_ps(scale, _mm_add_ps(_mm_mul_ps(_mm_add_ps(dr, _mm_set1_ps(j)), data[PME_ORDER-j-2]), _mm_mul_ps(_mm_sub_ps(_mm_set1_ps(PME_ORDER-j), dr), data[PME_ORDER-j-1])));
data[0] = _mm_mul_ps(_mm_mul_ps(scale, _mm_sub_ps(one, dr)), data[0]);
// Compute the force on this atom.
int gridIndexX = _mm_extract_epi32(gridIndex, 0);
int gridIndexY = _mm_extract_epi32(gridIndex, 1);
int gridIndexZ = _mm_extract_epi32(gridIndex, 2);
int zindex[PME_ORDER];
for (int j = 0; j < PME_ORDER; j++) {
zindex[j] = gridIndexZ+j;
zindex[j] -= (zindex[j] >= gridz ? gridz : 0);
}
__m128 zdata[PME_ORDER];
for (int j = 0; j < PME_ORDER; j++)
zdata[j] = _mm_set_ps(0, EXTRACT_FLOAT(ddata[j], 2), EXTRACT_FLOAT(data[j], 2), EXTRACT_FLOAT(data[j], 2));
__m128 f = _mm_set1_ps(0);
for (int ix = 0; ix < PME_ORDER; ix++) {
int xbase = gridIndexX+ix;
xbase -= (xbase >= gridx ? gridx : 0);
xbase = xbase*gridy*gridz;
float dx = EXTRACT_FLOAT(data[ix], 0);
float ddx = EXTRACT_FLOAT(ddata[ix], 0);
__m128 xdata = _mm_set_ps(0, dx, dx, ddx);
for (int iy = 0; iy < PME_ORDER; iy++) {
int ybase = gridIndexY+iy;
ybase -= (ybase >= gridy ? gridy : 0);
ybase = xbase + ybase*gridz;
float dy = EXTRACT_FLOAT(data[iy], 1);
float ddy = EXTRACT_FLOAT(ddata[iy], 1);
__m128 xydata = _mm_mul_ps(xdata, _mm_set_ps(0, dy, ddy, dy));
for (int iz = 0; iz < PME_ORDER; iz++) {
__m128 gridValue = _mm_set1_ps(grid[ybase+zindex[iz]]);
f = _mm_add_ps(f, _mm_mul_ps(xydata, _mm_mul_ps(zdata[iz], gridValue)));
}
}
}
f = _mm_mul_ps(invBoxSize, _mm_mul_ps(gridSize, _mm_mul_ps(f, _mm_set1_ps(-epsilonFactor*posq[4*i+3]))));
_mm_store_ps(&force[4*i], f);
}
}
class CpuCalcPmeReciprocalForceKernel::ThreadData {
public:
CpuCalcPmeReciprocalForceKernel& owner;
int index;
float* tempGrid;
ThreadData(CpuCalcPmeReciprocalForceKernel& owner, int index) : owner(owner), index(index), tempGrid(NULL) {
}
};
static void* threadBody(void* args) {
CpuCalcPmeReciprocalForceKernel::ThreadData& data = *reinterpret_cast<CpuCalcPmeReciprocalForceKernel::ThreadData*>(args);
data.owner.runThread(data.index);
if (data.tempGrid != NULL)
fftwf_free(data.tempGrid);
delete &data;
return 0;
}
void CpuCalcPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize, int numParticles, double alpha) {
if (!hasInitializedThreads) {
numThreads = getNumProcessors();
fftwf_init_threads();
hasInitializedThreads = true;
}
gridx = findFFTDimension(xsize);
gridy = findFFTDimension(ysize);
gridz = findFFTDimension(zsize);
this->numParticles = numParticles;
this->alpha = alpha;
force.resize(4*numParticles);
recipEterm.resize(gridx*gridy*gridz);
// Initialize threads.
pthread_cond_init(&startCondition, NULL);
pthread_cond_init(&endCondition, NULL);
pthread_cond_init(&mainThreadStartCondition, NULL);
pthread_cond_init(&mainThreadEndCondition, NULL);
pthread_mutex_init(&lock, NULL);
thread.resize(numThreads);
for (int i = 0; i < numThreads; i++) {
ThreadData* data = new ThreadData(*this, i);
threadData.push_back(data);
pthread_create(&thread[i], NULL, threadBody, data);
data->tempGrid = (float*) fftwf_malloc(sizeof(float)*(gridx*gridy*gridz+3));
}
pthread_create(&mainThread, NULL, threadBody, new ThreadData(*this, -1));
// Initialize FFTW.
realGrid = threadData[0]->tempGrid;
complexGrid = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)*gridx*gridy*(gridz/2+1));
fftwf_plan_with_nthreads(numThreads);
forwardFFT = fftwf_plan_dft_r2c_3d(gridx, gridy, gridz, realGrid, complexGrid, FFTW_MEASURE);
backwardFFT = fftwf_plan_dft_c2r_3d(gridx, gridy, gridz, complexGrid, realGrid, FFTW_MEASURE);
hasCreatedPlan = true;
// Initialize the b-spline moduli.
int maxSize = max(max(gridx, gridy), gridz);
vector<double> data(PME_ORDER);
vector<double> ddata(PME_ORDER);
vector<double> bsplinesData(maxSize);
data[PME_ORDER-1] = 0.0;
data[1] = 0.0;
data[0] = 1.0;
for (int i = 3; i < PME_ORDER; i++) {
double div = 1.0/(i-1.0);
data[i-1] = 0.0;
for (int j = 1; j < (i-1); j++)
data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
data[0] = div*data[0];
}
// Differentiate.
ddata[0] = -data[0];
for (int i = 1; i < PME_ORDER; i++)
ddata[i] = data[i-1]-data[i];
double div = 1.0/(PME_ORDER-1);
data[PME_ORDER-1] = 0.0;
for (int i = 1; i < (PME_ORDER-1); i++)
data[PME_ORDER-i-1] = div*(i*data[PME_ORDER-i-2]+(PME_ORDER-i)*data[PME_ORDER-i-1]);
data[0] = div*data[0];
for (int i = 0; i < maxSize; i++)
bsplinesData[i] = 0.0;
for (int i = 1; i <= PME_ORDER; i++)
bsplinesData[i] = data[i-1];
// Evaluate the actual bspline moduli for X/Y/Z.
bsplineModuli[0].resize(gridx);
bsplineModuli[1].resize(gridy);
bsplineModuli[2].resize(gridz);
for (int dim = 0; dim < 3; dim++) {
int ndata = bsplineModuli[dim].size();
vector<float>& moduli = bsplineModuli[dim];
for (int i = 0; i < ndata; i++) {
double sc = 0.0;
double ss = 0.0;
for (int j = 0; j < ndata; j++) {
double arg = (2.0*M_PI*i*j)/ndata;
sc += bsplinesData[j]*cos(arg);
ss += bsplinesData[j]*sin(arg);
}
moduli[i] = (float) (sc*sc+ss*ss);
}
for (int i = 0; i < ndata; i++)
if (moduli[i] < 1.0e-7f)
moduli[i] = (moduli[i-1]+moduli[i+1])*0.5f;
}
}
CpuCalcPmeReciprocalForceKernel::~CpuCalcPmeReciprocalForceKernel() {
isDeleted = true;
pthread_mutex_lock(&lock);
pthread_cond_broadcast(&startCondition);
pthread_cond_broadcast(&mainThreadStartCondition);
pthread_mutex_unlock(&lock);
for (int i = 0; i < (int) thread.size(); i++)
pthread_join(thread[i], NULL);
pthread_join(mainThread, NULL);
pthread_mutex_destroy(&lock);
pthread_cond_destroy(&startCondition);
pthread_cond_destroy(&endCondition);
pthread_cond_destroy(&mainThreadStartCondition);
pthread_cond_destroy(&mainThreadEndCondition);
if (complexGrid != NULL)
fftwf_free(complexGrid);
if (hasCreatedPlan) {
fftwf_destroy_plan(forwardFFT);
fftwf_destroy_plan(backwardFFT);
}
}
void CpuCalcPmeReciprocalForceKernel::runThread(int index) {
if (index == -1) {
// This is the main thread that coordinates all the other ones.
pthread_mutex_lock(&lock);
while (true) {
// Wait for the signal to start.
pthread_cond_wait(&mainThreadStartCondition, &lock);
if (isDeleted)
break;
posq = io->getPosq();
advanceThreads(); // Signal threads to perform charge spreading.
advanceThreads(); // Signal threads to sum the charge grids.
fftwf_execute_dft_r2c(forwardFFT, realGrid, complexGrid);
if (lastBoxSize != periodicBoxSize)
advanceThreads(); // Signal threads to compute the reciprocal scale factors.
if (includeEnergy)
advanceThreads(); // Signal threads to compute energy.
advanceThreads(); // Signal threads to perform reciprocal convolution.
fftwf_execute_dft_c2r(backwardFFT, complexGrid, realGrid);
advanceThreads(); // Signal threads to interpolate forces.
isFinished = true;
lastBoxSize = periodicBoxSize;
pthread_cond_signal(&mainThreadEndCondition);
}
pthread_mutex_unlock(&lock);
}
else {
// This is a worker thread.
int particleStart = (index*numParticles)/numThreads;
int particleEnd = ((index+1)*numParticles)/numThreads;
int gridxStart = (index*gridx)/numThreads;
int gridxEnd = ((index+1)*gridx)/numThreads;
int gridSize = (gridx*gridy*gridz+3)/4;
int gridStart = 4*((index*gridSize)/numThreads);
int gridEnd = 4*(((index+1)*gridSize)/numThreads);
while (true) {
threadWait();
if (isDeleted)
break;
spreadCharge(particleStart, particleEnd, posq, threadData[index]->tempGrid, gridx, gridy, gridz, numParticles, periodicBoxSize);
threadWait();
int numGrids = threadData.size();
for (int i = gridStart; i < gridEnd; i += 4) {
__m128 sum = _mm_load_ps(&realGrid[i]);
for (int j = 1; j < numGrids; j++)
sum = _mm_add_ps(sum, _mm_load_ps(&threadData[j]->tempGrid[i]));
_mm_store_ps(&realGrid[i], sum);
}
threadWait();
if (lastBoxSize != periodicBoxSize) {
computeReciprocalEterm(gridxStart, gridxEnd, gridx, gridy, gridz, recipEterm, alpha, bsplineModuli, periodicBoxSize);
threadWait();
}
if (includeEnergy) {
double threadEnergy = reciprocalEnergy(gridxStart, gridxEnd, complexGrid, gridx, gridy, gridz, alpha, bsplineModuli, periodicBoxSize);
pthread_mutex_lock(&lock);
energy += threadEnergy;
pthread_mutex_unlock(&lock);
threadWait();
}
reciprocalConvolution(gridxStart, gridxEnd, complexGrid, gridx, gridy, gridz, recipEterm);
threadWait();
interpolateForces(particleStart, particleEnd, posq, &force[0], realGrid, gridx, gridy, gridz, numParticles, periodicBoxSize);
}
}
}
void CpuCalcPmeReciprocalForceKernel::threadWait() {
pthread_mutex_lock(&lock);
waitCount++;
pthread_cond_signal(&endCondition);
pthread_cond_wait(&startCondition, &lock);
pthread_mutex_unlock(&lock);
}
void CpuCalcPmeReciprocalForceKernel::advanceThreads() {
waitCount = 0;
pthread_cond_broadcast(&startCondition);
while (waitCount < numThreads) {
pthread_cond_wait(&endCondition, &lock);
}
}
void CpuCalcPmeReciprocalForceKernel::beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy) {
this->io = &io;
this->periodicBoxSize = periodicBoxSize;
this->includeEnergy = includeEnergy;
energy = 0.0;
pthread_mutex_lock(&lock);
isFinished = false;
pthread_cond_signal(&mainThreadStartCondition);
pthread_mutex_unlock(&lock);
}
double CpuCalcPmeReciprocalForceKernel::finishComputation(IO& io) {
pthread_mutex_lock(&lock);
while (!isFinished) {
pthread_cond_wait(&mainThreadEndCondition, &lock);
}
pthread_mutex_unlock(&lock);
io.setForce(&force[0]);
return energy;
}
bool CpuCalcPmeReciprocalForceKernel::isProcessorSupported() {
int cpuInfo[4];
cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) {
cpuid(cpuInfo, 1);
return ((cpuInfo[2] & ((int) 1 << 19)) != 0); // Require SSE 4.1
}
return false;
}
int CpuCalcPmeReciprocalForceKernel::findFFTDimension(int minimum) {
if (minimum < 1)
return 1;
while (true) {
// Attempt to factor the current value.
int unfactored = minimum;
for (int factor = 2; factor < 12; factor++) {
while (unfactored > 1 && unfactored%factor == 0)
unfactored /= factor;
}
if (unfactored == 1)
return minimum;
minimum++;
}
}
#ifndef OPENMM_CPU_PME_KERNELS_H_
#define OPENMM_CPU_PME_KERNELS_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "internal/windowsExportPme.h"
#include "openmm/kernels.h"
#include "openmm/Vec3.h"
#include <fftw3.h>
#include <pthread.h>
#include <vector>
namespace OpenMM {
/**
* This is an optimized CPU implementation of CalcPmeReciprocalForceKernel. It is both
* vectorized (requiring SSE 4.1) and multithreaded. It uses FFTW to perform the FFTs.
*/
class OPENMM_EXPORT_PME CpuCalcPmeReciprocalForceKernel : public CalcPmeReciprocalForceKernel {
public:
class ThreadData;
CpuCalcPmeReciprocalForceKernel(std::string name, const Platform& platform) : CalcPmeReciprocalForceKernel(name, platform),
hasCreatedPlan(false), isDeleted(false), realGrid(NULL), complexGrid(NULL) {
}
/**
* Initialize the kernel.
*
* @param gridx the x size of the PME grid
* @param gridy the y size of the PME grid
* @param gridz the z size of the PME grid
* @param numParticles the number of particles in the system
* @param alpha the Ewald blending parameter
*/
void initialize(int xsize, int ysize, int zsize, int numParticles, double alpha);
~CpuCalcPmeReciprocalForceKernel();
/**
* Begin computing the force and energy.
*
* @param io an object that coordinates data transfer
* @param periodicBoxSize the size of the periodic box (measured in nm)
* @param includeEnergy true if potential energy should be computed
*/
void beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy);
/**
* Finish computing the force and energy.
*
* @param io an object that coordinates data transfer
* @return the potential energy due to the PME reciprocal space interactions
*/
double finishComputation(IO& io);
/**
* This routine contains the code executed by each thread.
*/
void runThread(int index);
/**
* Get whether the current CPU supports all features needed by this kernel.
*/
static bool isProcessorSupported();
private:
/**
* This is called by the worker threads to wait until the master thread instructs them to advance.
*/
void threadWait();
/**
* This is called by the master thread to instruct all the worker threads to advance.
*/
void advanceThreads();
/**
* Select a size for one grid dimension that FFTW can handle efficiently.
*/
int findFFTDimension(int minimum);
static bool hasInitializedThreads;
static int numThreads;
int gridx, gridy, gridz, numParticles;
double alpha;
bool hasCreatedPlan, isFinished, isDeleted;
std::vector<float> force;
std::vector<float> bsplineModuli[3];
std::vector<float> recipEterm;
Vec3 lastBoxSize;
float* realGrid;
fftwf_complex* complexGrid;
fftwf_plan forwardFFT, backwardFFT;
int waitCount;
pthread_cond_t startCondition, endCondition;
pthread_cond_t mainThreadStartCondition, mainThreadEndCondition;
pthread_mutex_t lock;
pthread_t mainThread;
std::vector<pthread_t> thread;
std::vector<ThreadData*> threadData;
// The following variables are used to store information about the calculation currently being performed.
IO* io;
float energy;
float* posq;
Vec3 periodicBoxSize;
bool includeEnergy;
};
} // namespace OpenMM
#endif /*OPENMM_CPU_PME_KERNELS_H_*/
#
# Testing
#
ENABLE_TESTING()
SET(SHARED_OPENMM_PME_TARGET OpenMMPME)
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(SHARED_CUDA_TARGET ${SHARED_CUDA_TARGET}_d)
SET(SHARED_OPENMM_PME_TARGET ${SHARED_OPENMM_PME_TARGET}_d)
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
#LINK_DIRECTORIES
# Automatically create tests using files named "Test*.cpp"
FILE(GLOB TEST_PROGS "*Test*.cpp")
FOREACH(TEST_PROG ${TEST_PROGS})
GET_FILENAME_COMPONENT(TEST_ROOT ${TEST_PROG} NAME_WE)
# Link with shared library
ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG})
TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_TARGET} ${SHARED_OPENMM_TARGET} ${SHARED_OPENMM_PME_TARGET})
ADD_TEST(${TEST_ROOT} ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT})
ENDFOREACH(TEST_PROG ${TEST_PROGS})
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CPU implementation of PME.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "openmm/NonbondedForce.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "openmm/internal/ContextImpl.h"
#include "../src/CpuPmeKernels.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
class IO : public CalcPmeReciprocalForceKernel::IO {
public:
vector<float> posq;
float* force;
float* getPosq() {
return &posq[0];
}
void setForce(float* force) {
this->force = force;
}
};
void testPME() {
// Create a cloud of random point charges.
const int numParticles = 51;
const double boxWidth = 5.0;
const double cutoff = 1.0;
System system;
system.setDefaultPeriodicBoxVectors(Vec3(boxWidth, 0, 0), Vec3(0, boxWidth, 0), Vec3(0, 0, boxWidth));
NonbondedForce* force = new NonbondedForce();
system.addForce(force);
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.0);
force->addParticle(-1.0+i*2.0/(numParticles-1), 1.0, 0.0);
positions[i] = Vec3(boxWidth*genrand_real2(sfmt), boxWidth*genrand_real2(sfmt), boxWidth*genrand_real2(sfmt));
}
force->setNonbondedMethod(NonbondedForce::PME);
force->setCutoffDistance(cutoff);
force->setReciprocalSpaceForceGroup(1);
force->setEwaldErrorTolerance(1e-4);
// Compute the reciprocal space forces with the reference platform.
Platform& platform = Platform::getPlatformByName("Reference");
VerletIntegrator integrator(0.01);
Context context(system, integrator, platform);
context.setPositions(positions);
State refState = context.getState(State::Forces | State::Energy, false, 1<<1);
// Now compute them with the optimized kernel.
double alpha;
int gridx, gridy, gridz;
NonbondedForceImpl::calcPMEParameters(system, *force, alpha, gridx, gridy, gridz);
CpuCalcPmeReciprocalForceKernel pme(CalcPmeReciprocalForceKernel::Name(), platform);
IO io;
double sumSquaredCharges = 0;
for (int i = 0; i < numParticles; i++) {
io.posq.push_back(positions[i][0]);
io.posq.push_back(positions[i][1]);
io.posq.push_back(positions[i][2]);
double charge, sigma, epsilon;
force->getParticleParameters(i, charge, sigma, epsilon);
io.posq.push_back(charge);
sumSquaredCharges += charge*charge;
}
double ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
pme.initialize(gridx, gridy, gridz, numParticles, alpha);
pme.beginComputation(io, Vec3(boxWidth, boxWidth, boxWidth), true);
double energy = pme.finishComputation(io);
// See if they match.
ASSERT_EQUAL_TOL(refState.getPotentialEnergy(), energy+ewaldSelfEnergy, 1e-3);
for (int i = 0; i < numParticles; i++)
ASSERT_EQUAL_VEC(refState.getForces()[i], Vec3(io.force[4*i], io.force[4*i+1], io.force[4*i+2]), 1e-3);
}
int main(int argc, char* argv[]) {
try {
testPME();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment