Commit 5e0b0e3a authored by peastman's avatar peastman
Browse files

Moved CPU based PME to a separate plugin

parent 932a61b6
......@@ -1152,6 +1152,70 @@ public:
virtual void execute(ContextImpl& context) = 0;
};
/**
* This kernel performs the reciprocal space calculation for PME. In most cases, this
* calculation is done directly by CalcNonbondedForceKernel so this kernel is unneeded.
* In some cases it may want to outsource the work to a different kernel. In particular,
* GPU based platforms sometimes use a CPU based implementation provided by a separate
* plugin.
*/
class CalcPmeReciprocalForceKernel : public KernelImpl {
public:
class IO;
static std::string Name() {
return "CalcPmeReciprocalForce";
}
CalcPmeReciprocalForceKernel(std::string name, const Platform& platform) : KernelImpl(name, platform) {
}
/**
* Initialize the kernel.
*
* @param gridx the x size of the PME grid
* @param gridy the y size of the PME grid
* @param gridz the z size of the PME grid
* @param numParticles the number of particles in the system
* @param alpha the Ewald blending parameter
*/
virtual void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha) = 0;
/**
* Begin computing the force and energy.
*
* @param io an object that coordinates data transfer
* @param periodicBoxSize the size of the periodic box (measured in nm)
* @param includeEnergy true if potential energy should be computed
*/
virtual void beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy) = 0;
/**
* Finish computing the force and energy.
*
* @param io an object that coordinates data transfer
* @return the potential energy due to the PME reciprocal space interactions
*/
virtual double finishComputation(IO& io) = 0;
};
/**
* Any class that uses CalcPmeReciprocalForceKernel should create an implementation of this
* class, then pass it to the kernel to manage communication with it.
*/
class CalcPmeReciprocalForceKernel::IO {
public:
/**
* Get a pointer to the atom charges and positions. This array should contain four
* elements for each atom: x, y, z, and q in that order.
*/
virtual float* getPosq() = 0;
/**
* Record the forces calculated by the kernel.
*
* @param force an array containing four elements for each atom. The first three
* are the x, y, and z components of the force, while the fourth element
* should be ignored.
*/
virtual void setForce(float* force) = 0;
};
} // namespace OpenMM
#endif /*OPENMM_KERNELS_H_*/
......@@ -99,11 +99,12 @@ public:
class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData {
public:
PlatformData(const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty,
PlatformData(ContextImpl* context, const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty,
const std::string& compilerProperty, const std::string& tempProperty);
~PlatformData();
void initializeContexts(const System& system);
void syncContexts();
ContextImpl* context;
std::vector<CudaContext*> contexts;
std::vector<double> contextEnergy;
bool removeCM, peerAccessSupported;
......
......@@ -2,8 +2,7 @@
# Include CUDA related files.
#
INCLUDE(FindCUDA)
INCLUDE(FindFFTW)
INCLUDE_DIRECTORIES(${CUDA_TOOLKIT_INCLUDE} ${FFTW_INCLUDES})
INCLUDE_DIRECTORIES(${CUDA_TOOLKIT_INCLUDE})
FILE(GLOB CUDA_KERNELS ${CUDA_SOURCE_DIR}/kernels/*.cu)
ADD_CUSTOM_COMMAND(OUTPUT ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H}
......@@ -19,7 +18,7 @@ IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${CUDA_CUDA_LIBRARY} ${CUDA_cufft_LIBRARY} ${PTHREADS_LIB} ${FFTW_LIBRARY} ${FFTW_THREADS_LIBRARY})
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${CUDA_CUDA_LIBRARY} ${CUDA_cufft_LIBRARY} ${PTHREADS_LIB})
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_CUDA_BUILDING_SHARED_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
......@@ -1334,7 +1334,7 @@ private:
const NonbondedForce& force;
};
class CudaCalcNonbondedForceKernel::PmeIO : public CpuPme::IO {
class CudaCalcNonbondedForceKernel::PmeIO : public CalcPmeReciprocalForceKernel::IO {
public:
PmeIO(CudaContext& cu, CUfunction addForcesKernel) : cu(cu), addForcesKernel(addForcesKernel), forceTemp(NULL) {
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
......@@ -1363,28 +1363,28 @@ private:
class CudaCalcNonbondedForceKernel::PmePreComputation : public CudaContext::ForcePreComputation {
public:
PmePreComputation(CudaContext& cu, CpuPme& pme, CpuPme::IO& io) : cu(cu), pme(pme), io(io) {
PmePreComputation(CudaContext& cu, Kernel& pme, CalcPmeReciprocalForceKernel::IO& io) : cu(cu), pme(pme), io(io) {
}
void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
Vec3 boxSize(cu.getPeriodicBoxSize().x, cu.getPeriodicBoxSize().y, cu.getPeriodicBoxSize().z);
pme.beginComputation(io, boxSize, includeEnergy);
pme.getAs<CalcPmeReciprocalForceKernel>().beginComputation(io, boxSize, includeEnergy);
}
private:
CudaContext& cu;
CpuPme& pme;
CpuPme::IO& io;
Kernel pme;
CalcPmeReciprocalForceKernel::IO& io;
};
class CudaCalcNonbondedForceKernel::PmePostComputation : public CudaContext::ForcePostComputation {
public:
PmePostComputation(CpuPme& pme, CpuPme::IO& io) : pme(pme), io(io) {
PmePostComputation(Kernel& pme, CalcPmeReciprocalForceKernel::IO& io) : pme(pme), io(io) {
}
double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
return pme.finishComputation(io);
return pme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io);
}
private:
CpuPme& pme;
CpuPme::IO& io;
Kernel pme;
CalcPmeReciprocalForceKernel::IO& io;
};
CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
......@@ -1411,8 +1411,6 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
delete pmeAtomGridIndex;
if (sort != NULL)
delete sort;
if (cpuPme != NULL)
delete cpuPme;
if (pmeio != NULL)
delete pmeio;
if (hasInitializedFFT) {
......@@ -1571,15 +1569,21 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (cu.getUseDoublePrecision())
pmeDefines["USE_DOUBLE_PRECISION"] = "1";
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
bool useCpuPme = false;
bool useCpuPme = true;
if (useCpuPme) {
cpuPme = new CpuPme(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha);
CUfunction addForcesKernel = cu.getKernel(module, "addForces");
pmeio = new PmeIO(cu, addForcesKernel);
cu.addPreComputation(new PmePreComputation(cu, *cpuPme, *pmeio));
cu.addPostComputation(new PmePostComputation(*cpuPme, *pmeio));
try {
cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cu.getPlatformData().context);
cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha);
CUfunction addForcesKernel = cu.getKernel(module, "addForces");
pmeio = new PmeIO(cu, addForcesKernel);
cu.addPreComputation(new PmePreComputation(cu, cpuPme, *pmeio));
cu.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
}
catch (OpenMMException& ex) {
// The CPU PME plugin isn't available.
}
}
else {
if (pmeio == NULL) {
pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
pmeSpreadChargeKernel = cu.getKernel(module, "gridSpreadCharge");
pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
......
......@@ -34,7 +34,6 @@
#include "CudaSort.h"
#include "openmm/kernels.h"
#include "openmm/System.h"
#include "CpuPme.h"
#include <cufft.h>
namespace OpenMM {
......@@ -558,7 +557,7 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform),
cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL),
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), sort(NULL), cpuPme(NULL), pmeio(NULL) {
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), sort(NULL), pmeio(NULL) {
}
~CudaCalcNonbondedForceKernel();
/**
......@@ -613,7 +612,7 @@ private:
CudaArray* pmeAtomRange;
CudaArray* pmeAtomGridIndex;
CudaSort* sort;
CpuPme* cpuPme;
Kernel cpuPme;
PmeIO* pmeio;
cufftHandle fftForward;
cufftHandle fftBackward;
......
......@@ -147,7 +147,7 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue(CudaTempDirectory()) : properties.find(CudaTempDirectory())->second);
transform(blockingPropValue.begin(), blockingPropValue.end(), blockingPropValue.begin(), ::tolower);
transform(precisionPropValue.begin(), precisionPropValue.end(), precisionPropValue.begin(), ::tolower);
context.setPlatformData(new PlatformData(context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, compilerPropValue, tempPropValue));
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, compilerPropValue, tempPropValue));
}
void CudaPlatform::contextDestroyed(ContextImpl& context) const {
......@@ -155,8 +155,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
delete data;
}
CudaPlatform::PlatformData::PlatformData(const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& compilerProperty, const string& tempProperty) : removeCM(false), stepCount(0), computeForceCount(0), time(0.0) {
CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& compilerProperty, const string& tempProperty) : context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0) {
bool blocking = (blockingProperty == "true");
vector<string> devices;
size_t searchPos = 0, nextPos;
......
......@@ -54,7 +54,7 @@ void testGaussian() {
System system;
for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0);
CudaPlatform::PlatformData platformData(system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"),
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"),
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()));
CudaContext& context = *platformData.contexts[0];
context.initialize();
......
......@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) {
System system;
system.addParticle(0.0);
CudaPlatform::PlatformData platformData(system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"),
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"),
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()));
CudaContext& context = *platformData.contexts[0];
context.initialize();
......
#---------------------------------------------------
# OpenMM CPU PME Plugin
#
# Creates plugin library, base name=OpenMMPME.
# Default libraries are shared & optimized.
#
# Windows:
# OpenMMPME[_d].dll
# OpenMMPME[_d].lib
# Unix:
# libOpenMMPME[_d].so
#----------------------------------------------------
IF (APPLE)
SET (CMAKE_OSX_DEPLOYMENT_TARGET "10.6")
ENDIF (APPLE)
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET(OPENMM_SOURCE_SUBDIRS .)
# Collect up information about the version of the OpenMM library we're building
# and make it available to the code so it can be built into the binaries.
SET(OPENMMPME_LIBRARY_NAME OpenMMPME)
SET(SHARED_TARGET ${OPENMMPME_LIBRARY_NAME})
# Ensure that debug libraries have "_d" appended to their names.
# CMake gets this right on Windows automatically with this definition.
IF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
SET(CMAKE_DEBUG_POSTFIX "_d" CACHE INTERNAL "" FORCE)
ENDIF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
# But on Unix or Cygwin we have to add the suffix manually
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(SHARED_TARGET ${SHARED_TARGET}_d)
SET(STATIC_TARGET ${STATIC_TARGET}_d)
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
# These are all the places to search for header files which are
# to be part of the API.
SET(API_INCLUDE_DIRS) # start empty
FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
# append
SET(API_INCLUDE_DIRS ${API_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include
${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include/internal)
ENDFOREACH(subdir)
# Find the include files.
SET(API_INCLUDE_FILES)
FOREACH(dir ${API_INCLUDE_DIRS})
FILE(GLOB fullpaths ${dir}/*.h) # returns full pathnames
SET(API_INCLUDE_FILES ${API_INCLUDE_FILES} ${fullpaths})
ENDFOREACH(dir)
# collect up source files
SET(SOURCE_FILES) # empty
SET(SOURCE_INCLUDE_FILES)
FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
FILE(GLOB_RECURSE src_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.c)
FILE(GLOB incl_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${src_files}) #append
SET(SOURCE_INCLUDE_FILES ${SOURCE_INCLUDE_FILES} ${incl_files})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
ENDFOREACH(subdir)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
# Include FFTW related files.
INCLUDE(FindFFTW)
INCLUDE_DIRECTORIES(${FFTW_INCLUDES})
# Build the plugin library.
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_INCLUDE_FILES})
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}_d)
ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${PTHREADS_LIB} ${FFTW_LIBRARY} ${FFTW_THREADS_LIBRARY})
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_PME_BUILDING_SHARED_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
#ifndef OPENMM_WINDOWSEXPORTPME_H_
#define OPENMM_WINDOWSEXPORTPME_H_
/*
* Shared libraries are messy in Visual Studio. We have to distinguish three
* cases:
* (1) this header is being used to build the OpenMM shared library
* (dllexport)
* (2) this header is being used by a *client* of the OpenMM shared
* library (dllimport)
* (3) we are building the OpenMM static library, or the client is
* being compiled with the expectation of linking with the
* OpenMM static library (nothing special needed)
* In the CMake script for building this library, we define one of the symbols
* OPENMM_PME_BUILDING_{SHARED|STATIC}_LIBRARY
* Client code normally has no special symbol defined, in which case we'll
* assume it wants to use the shared library. However, if the client defines
* the symbol OPENMM_USE_STATIC_LIBRARIES we'll suppress the dllimport so
* that the client code can be linked with static libraries. Note that
* the client symbol is not library dependent, while the library symbols
* affect only the OpenMM library, meaning that other libraries can
* be clients of this one. However, we are assuming all-static or all-shared.
*/
#ifdef _MSC_VER
// We don't want to hear about how sprintf is "unsafe".
#pragma warning(disable:4996)
// Keep MS VC++ quiet about lack of dll export of private members.
#pragma warning(disable:4251)
#if defined(OPENMM_PME_BUILDING_SHARED_LIBRARY)
#define OPENMM_EXPORT_PME __declspec(dllexport)
#elif defined(OPENMM_PME_BUILDING_STATIC_LIBRARY) || defined(OPENMM_PME_USE_STATIC_LIBRARIES)
#define OPENMM_EXPORT_PME
#else
#define OPENMM_EXPORT_PME __declspec(dllimport) // i.e., a client of a shared library
#endif
#else
#define OPENMM_EXPORT_PME // Linux, Mac
#endif
#endif // OPENMM_WINDOWSEXPORTPME_H_
/* -------------------------------------------------------------------------- *
* OpenMMAmoeba *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CpuPmeKernelFactory.h"
#include "CpuPmeKernels.h"
#include "internal/windowsExportPme.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
using namespace OpenMM;
extern "C" void registerPlatforms() {
}
extern "C" void registerKernelFactories() {
if (CpuCalcPmeReciprocalForceKernel::isProcessorSupported()) {
CpuPmeKernelFactory* factory = new CpuPmeKernelFactory();
for (int i = 0; i < Platform::getNumPlatforms(); i++)
Platform::getPlatform(i).registerKernelFactory(CalcPmeReciprocalForceKernel::Name(), factory);
}
}
extern "C" OPENMM_EXPORT_PME void registerCpuPmeKernelFactories() {
registerKernelFactories();
}
KernelImpl* CpuPmeKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
if (name == CalcPmeReciprocalForceKernel::Name())
return new CpuCalcPmeReciprocalForceKernel(name, platform);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
}
#ifndef OPENMM_CPUPMEKERNELFACTORY_H_
#define OPENMM_CPUPMEKERNELFACTORY_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/KernelFactory.h"
namespace OpenMM {
/**
* This KernelFactory creates kernels for the CPU implementation of PME.
*/
class CpuPmeKernelFactory : public KernelFactory {
public:
KernelImpl* createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const;
};
} // namespace OpenMM
#endif /*OPENMM_CPUPMEKERNELFACTORY_H_*/
......@@ -32,7 +32,7 @@
#ifdef WIN32
#define _USE_MATH_DEFINES // Needed to get M_PI
#endif
#include "CpuPme.h"
#include "CpuPmeKernels.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include <cmath>
#include <smmintrin.h>
......@@ -42,8 +42,8 @@ using namespace std;
static const int PME_ORDER = 5;
bool CpuPme::hasInitializedThreads = false;
int CpuPme::numThreads = 0;
bool CpuCalcPmeReciprocalForceKernel::hasInitializedThreads = false;
int CpuCalcPmeReciprocalForceKernel::numThreads = 0;
static float extractFloat(__m128 v, unsigned int element) {
float f[4];
......@@ -51,6 +51,8 @@ static float extractFloat(__m128 v, unsigned int element) {
return f[element];
}
// Define function to get the number of processors.
#ifdef __APPLE__
#include <sys/sysctl.h>
#include <dlfcn.h>
......@@ -90,6 +92,23 @@ static int getNumProcessors() {
#endif
}
// Define a function to check the CPU's capabilities.
#ifdef _WIN32
#define cpuid __cpuid
#else
static void cpuid(int cpuInfo[4], int infoType){
__asm__ __volatile__ (
"cpuid":
"=a" (cpuInfo[0]),
"=b" (cpuInfo[1]),
"=c" (cpuInfo[2]),
"=d" (cpuInfo[3]) :
"a" (infoType)
);
}
#endif
static void spreadCharge(int start, int end, float* posq, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3 periodicBoxSize) {
float temp[4];
__m128 boxSize = _mm_set_ps(0, (float) periodicBoxSize[2], (float) periodicBoxSize[1], (float) periodicBoxSize[0]);
......@@ -336,17 +355,17 @@ static void interpolateForces(int start, int end, float* posq, float* force, flo
}
}
class CpuPme::ThreadData {
class CpuCalcPmeReciprocalForceKernel::ThreadData {
public:
CpuPme& owner;
CpuCalcPmeReciprocalForceKernel& owner;
int index;
float* tempGrid;
ThreadData(CpuPme& owner, int index) : owner(owner), index(index), tempGrid(NULL) {
ThreadData(CpuCalcPmeReciprocalForceKernel& owner, int index) : owner(owner), index(index), tempGrid(NULL) {
}
};
static void* threadBody(void* args) {
CpuPme::ThreadData& data = *reinterpret_cast<CpuPme::ThreadData*>(args);
CpuCalcPmeReciprocalForceKernel::ThreadData& data = *reinterpret_cast<CpuCalcPmeReciprocalForceKernel::ThreadData*>(args);
data.owner.runThread(data.index);
if (data.tempGrid != NULL)
fftwf_free(data.tempGrid);
......@@ -354,13 +373,19 @@ static void* threadBody(void* args) {
return 0;
}
CpuPme::CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha) :
gridx(gridx), gridy(gridy), gridz(gridz), numParticles(numParticles), alpha(alpha), hasCreatedPlan(false), isDeleted(false), force(4*numParticles), realGrid(NULL), complexGrid(NULL) {
void CpuCalcPmeReciprocalForceKernel::initialize(int gridx, int gridy, int gridz, int numParticles, double alpha) {
if (!hasInitializedThreads) {
numThreads = getNumProcessors();
fftwf_init_threads();
hasInitializedThreads = true;
}
this->gridx = gridx;
this->gridy = gridy;
this->gridz = gridz;
this->numParticles = numParticles;
this->alpha = alpha;
force.resize(4*numParticles);
// Initialize threads.
pthread_cond_init(&startCondition, NULL);
......@@ -442,7 +467,7 @@ CpuPme::CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha)
}
}
CpuPme::~CpuPme() {
CpuCalcPmeReciprocalForceKernel::~CpuCalcPmeReciprocalForceKernel() {
isDeleted = true;
pthread_mutex_lock(&lock);
pthread_cond_broadcast(&startCondition);
......@@ -469,7 +494,7 @@ double diff(struct timeval t1, struct timeval t2) {
return t2.tv_usec-t1.tv_usec+1e6*(t2.tv_sec-t1.tv_sec);
}
void CpuPme::runThread(int index) {
void CpuCalcPmeReciprocalForceKernel::runThread(int index) {
if (index == -1) {
// This is the main thread that coordinates all the other ones.
......@@ -541,7 +566,7 @@ void CpuPme::runThread(int index) {
}
}
void CpuPme::threadWait() {
void CpuCalcPmeReciprocalForceKernel::threadWait() {
pthread_mutex_lock(&lock);
waitCount++;
pthread_cond_signal(&endCondition);
......@@ -549,7 +574,7 @@ void CpuPme::threadWait() {
pthread_mutex_unlock(&lock);
}
void CpuPme::advanceThreads() {
void CpuCalcPmeReciprocalForceKernel::advanceThreads() {
waitCount = 0;
pthread_cond_broadcast(&startCondition);
while (waitCount < numThreads) {
......@@ -557,7 +582,7 @@ void CpuPme::advanceThreads() {
}
}
void CpuPme::beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy) {
void CpuCalcPmeReciprocalForceKernel::beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy) {
this->io = &io;
this->periodicBoxSize = periodicBoxSize;
this->includeEnergy = includeEnergy;
......@@ -568,7 +593,7 @@ void CpuPme::beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy)
pthread_mutex_unlock(&lock);
}
double CpuPme::finishComputation(IO& io) {
double CpuCalcPmeReciprocalForceKernel::finishComputation(IO& io) {
pthread_mutex_lock(&lock);
while (!isFinished) {
pthread_cond_wait(&mainThreadEndCondition, &lock);
......@@ -577,3 +602,13 @@ double CpuPme::finishComputation(IO& io) {
io.setForce(&force[0]);
return energy;
}
bool CpuCalcPmeReciprocalForceKernel::isProcessorSupported() {
int cpuInfo[4];
cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) {
cpuid(cpuInfo, 1);
return ((cpuInfo[2] & ((int) 1 << 19)) != 0); // Require SSE 4.1
}
return false;
}
\ No newline at end of file
#ifndef OPENMM_CPU_PME_H_
#define OPENMM_CPU_PME_H_
#ifndef OPENMM_CPU_PME_KERNELS_H_
#define OPENMM_CPU_PME_KERNELS_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
......@@ -32,7 +32,8 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "windowsExportCuda.h"
#include "internal/windowsExportPme.h"
#include "openmm/kernels.h"
#include "openmm/Vec3.h"
#include <fftw3.h>
#include <pthread.h>
......@@ -43,17 +44,18 @@ namespace OpenMM {
/**
*/
class OPENMM_EXPORT_CUDA CpuPme {
class OPENMM_EXPORT_PME CpuCalcPmeReciprocalForceKernel : public CalcPmeReciprocalForceKernel {
public:
class IO;
class ThreadData;
/**
*/
CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha);
~CpuPme();
CpuCalcPmeReciprocalForceKernel(std::string name, const Platform& platform) : CalcPmeReciprocalForceKernel(name, platform),
hasCreatedPlan(false), isDeleted(false), realGrid(NULL), complexGrid(NULL) {
}
void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha);
~CpuCalcPmeReciprocalForceKernel();
void beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy);
double finishComputation(IO& io);
void runThread(int index);
static bool isProcessorSupported();
private:
void threadWait();
void advanceThreads();
......@@ -82,12 +84,6 @@ private:
bool includeEnergy;
};
class CpuPme::IO {
public:
virtual float* getPosq() = 0;
virtual void setForce(float* force) = 0;
};
} // namespace OpenMM
#endif /*OPENMM_CPU_PME_H_*/
#endif /*OPENMM_CPU_PME_KERNELS_H_*/
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment