Commit 5e0b0e3a authored by peastman's avatar peastman
Browse files

Moved CPU based PME to a separate plugin

parent 932a61b6
...@@ -1152,6 +1152,70 @@ public: ...@@ -1152,6 +1152,70 @@ public:
virtual void execute(ContextImpl& context) = 0; virtual void execute(ContextImpl& context) = 0;
}; };
/**
* This kernel performs the reciprocal space calculation for PME. In most cases, this
* calculation is done directly by CalcNonbondedForceKernel so this kernel is unneeded.
* In some cases it may want to outsource the work to a different kernel. In particular,
* GPU based platforms sometimes use a CPU based implementation provided by a separate
* plugin.
*/
class CalcPmeReciprocalForceKernel : public KernelImpl {
public:
class IO;
static std::string Name() {
return "CalcPmeReciprocalForce";
}
CalcPmeReciprocalForceKernel(std::string name, const Platform& platform) : KernelImpl(name, platform) {
}
/**
* Initialize the kernel.
*
* @param gridx the x size of the PME grid
* @param gridy the y size of the PME grid
* @param gridz the z size of the PME grid
* @param numParticles the number of particles in the system
* @param alpha the Ewald blending parameter
*/
virtual void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha) = 0;
/**
* Begin computing the force and energy.
*
* @param io an object that coordinates data transfer
* @param periodicBoxSize the size of the periodic box (measured in nm)
* @param includeEnergy true if potential energy should be computed
*/
virtual void beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy) = 0;
/**
* Finish computing the force and energy.
*
* @param io an object that coordinates data transfer
* @return the potential energy due to the PME reciprocal space interactions
*/
virtual double finishComputation(IO& io) = 0;
};
/**
* Any class that uses CalcPmeReciprocalForceKernel should create an implementation of this
* class, then pass it to the kernel to manage communication with it.
*/
class CalcPmeReciprocalForceKernel::IO {
public:
/**
* Get a pointer to the atom charges and positions. This array should contain four
* elements for each atom: x, y, z, and q in that order.
*/
virtual float* getPosq() = 0;
/**
* Record the forces calculated by the kernel.
*
* @param force an array containing four elements for each atom. The first three
* are the x, y, and z components of the force, while the fourth element
* should be ignored.
*/
virtual void setForce(float* force) = 0;
};
} // namespace OpenMM } // namespace OpenMM
#endif /*OPENMM_KERNELS_H_*/ #endif /*OPENMM_KERNELS_H_*/
...@@ -99,11 +99,12 @@ public: ...@@ -99,11 +99,12 @@ public:
class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData { class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData {
public: public:
PlatformData(const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty, PlatformData(ContextImpl* context, const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty,
const std::string& compilerProperty, const std::string& tempProperty); const std::string& compilerProperty, const std::string& tempProperty);
~PlatformData(); ~PlatformData();
void initializeContexts(const System& system); void initializeContexts(const System& system);
void syncContexts(); void syncContexts();
ContextImpl* context;
std::vector<CudaContext*> contexts; std::vector<CudaContext*> contexts;
std::vector<double> contextEnergy; std::vector<double> contextEnergy;
bool removeCM, peerAccessSupported; bool removeCM, peerAccessSupported;
......
...@@ -2,8 +2,7 @@ ...@@ -2,8 +2,7 @@
# Include CUDA related files. # Include CUDA related files.
# #
INCLUDE(FindCUDA) INCLUDE(FindCUDA)
INCLUDE(FindFFTW) INCLUDE_DIRECTORIES(${CUDA_TOOLKIT_INCLUDE})
INCLUDE_DIRECTORIES(${CUDA_TOOLKIT_INCLUDE} ${FFTW_INCLUDES})
FILE(GLOB CUDA_KERNELS ${CUDA_SOURCE_DIR}/kernels/*.cu) FILE(GLOB CUDA_KERNELS ${CUDA_SOURCE_DIR}/kernels/*.cu)
ADD_CUSTOM_COMMAND(OUTPUT ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H} ADD_CUSTOM_COMMAND(OUTPUT ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H}
...@@ -19,7 +18,7 @@ IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) ...@@ -19,7 +18,7 @@ IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}) SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${CUDA_CUDA_LIBRARY} ${CUDA_cufft_LIBRARY} ${PTHREADS_LIB} ${FFTW_LIBRARY} ${FFTW_THREADS_LIBRARY}) TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${CUDA_CUDA_LIBRARY} ${CUDA_cufft_LIBRARY} ${PTHREADS_LIB})
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_CUDA_BUILDING_SHARED_LIBRARY") SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_CUDA_BUILDING_SHARED_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET}) INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
...@@ -1334,7 +1334,7 @@ private: ...@@ -1334,7 +1334,7 @@ private:
const NonbondedForce& force; const NonbondedForce& force;
}; };
class CudaCalcNonbondedForceKernel::PmeIO : public CpuPme::IO { class CudaCalcNonbondedForceKernel::PmeIO : public CalcPmeReciprocalForceKernel::IO {
public: public:
PmeIO(CudaContext& cu, CUfunction addForcesKernel) : cu(cu), addForcesKernel(addForcesKernel), forceTemp(NULL) { PmeIO(CudaContext& cu, CUfunction addForcesKernel) : cu(cu), addForcesKernel(addForcesKernel), forceTemp(NULL) {
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4)); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
...@@ -1363,28 +1363,28 @@ private: ...@@ -1363,28 +1363,28 @@ private:
class CudaCalcNonbondedForceKernel::PmePreComputation : public CudaContext::ForcePreComputation { class CudaCalcNonbondedForceKernel::PmePreComputation : public CudaContext::ForcePreComputation {
public: public:
PmePreComputation(CudaContext& cu, CpuPme& pme, CpuPme::IO& io) : cu(cu), pme(pme), io(io) { PmePreComputation(CudaContext& cu, Kernel& pme, CalcPmeReciprocalForceKernel::IO& io) : cu(cu), pme(pme), io(io) {
} }
void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) { void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
Vec3 boxSize(cu.getPeriodicBoxSize().x, cu.getPeriodicBoxSize().y, cu.getPeriodicBoxSize().z); Vec3 boxSize(cu.getPeriodicBoxSize().x, cu.getPeriodicBoxSize().y, cu.getPeriodicBoxSize().z);
pme.beginComputation(io, boxSize, includeEnergy); pme.getAs<CalcPmeReciprocalForceKernel>().beginComputation(io, boxSize, includeEnergy);
} }
private: private:
CudaContext& cu; CudaContext& cu;
CpuPme& pme; Kernel pme;
CpuPme::IO& io; CalcPmeReciprocalForceKernel::IO& io;
}; };
class CudaCalcNonbondedForceKernel::PmePostComputation : public CudaContext::ForcePostComputation { class CudaCalcNonbondedForceKernel::PmePostComputation : public CudaContext::ForcePostComputation {
public: public:
PmePostComputation(CpuPme& pme, CpuPme::IO& io) : pme(pme), io(io) { PmePostComputation(Kernel& pme, CalcPmeReciprocalForceKernel::IO& io) : pme(pme), io(io) {
} }
double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) { double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
return pme.finishComputation(io); return pme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io);
} }
private: private:
CpuPme& pme; Kernel pme;
CpuPme::IO& io; CalcPmeReciprocalForceKernel::IO& io;
}; };
CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() { CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
...@@ -1411,8 +1411,6 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() { ...@@ -1411,8 +1411,6 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
delete pmeAtomGridIndex; delete pmeAtomGridIndex;
if (sort != NULL) if (sort != NULL)
delete sort; delete sort;
if (cpuPme != NULL)
delete cpuPme;
if (pmeio != NULL) if (pmeio != NULL)
delete pmeio; delete pmeio;
if (hasInitializedFFT) { if (hasInitializedFFT) {
...@@ -1571,15 +1569,21 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1571,15 +1569,21 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (cu.getUseDoublePrecision()) if (cu.getUseDoublePrecision())
pmeDefines["USE_DOUBLE_PRECISION"] = "1"; pmeDefines["USE_DOUBLE_PRECISION"] = "1";
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines); CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
bool useCpuPme = false; bool useCpuPme = true;
if (useCpuPme) { if (useCpuPme) {
cpuPme = new CpuPme(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha); try {
CUfunction addForcesKernel = cu.getKernel(module, "addForces"); cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cu.getPlatformData().context);
pmeio = new PmeIO(cu, addForcesKernel); cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha);
cu.addPreComputation(new PmePreComputation(cu, *cpuPme, *pmeio)); CUfunction addForcesKernel = cu.getKernel(module, "addForces");
cu.addPostComputation(new PmePostComputation(*cpuPme, *pmeio)); pmeio = new PmeIO(cu, addForcesKernel);
cu.addPreComputation(new PmePreComputation(cu, cpuPme, *pmeio));
cu.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
}
catch (OpenMMException& ex) {
// The CPU PME plugin isn't available.
}
} }
else { if (pmeio == NULL) {
pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex"); pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
pmeSpreadChargeKernel = cu.getKernel(module, "gridSpreadCharge"); pmeSpreadChargeKernel = cu.getKernel(module, "gridSpreadCharge");
pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution"); pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
......
...@@ -34,7 +34,6 @@ ...@@ -34,7 +34,6 @@
#include "CudaSort.h" #include "CudaSort.h"
#include "openmm/kernels.h" #include "openmm/kernels.h"
#include "openmm/System.h" #include "openmm/System.h"
#include "CpuPme.h"
#include <cufft.h> #include <cufft.h>
namespace OpenMM { namespace OpenMM {
...@@ -558,7 +557,7 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel { ...@@ -558,7 +557,7 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public: public:
CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform), CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform),
cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL), cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL),
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), sort(NULL), cpuPme(NULL), pmeio(NULL) { pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), sort(NULL), pmeio(NULL) {
} }
~CudaCalcNonbondedForceKernel(); ~CudaCalcNonbondedForceKernel();
/** /**
...@@ -613,7 +612,7 @@ private: ...@@ -613,7 +612,7 @@ private:
CudaArray* pmeAtomRange; CudaArray* pmeAtomRange;
CudaArray* pmeAtomGridIndex; CudaArray* pmeAtomGridIndex;
CudaSort* sort; CudaSort* sort;
CpuPme* cpuPme; Kernel cpuPme;
PmeIO* pmeio; PmeIO* pmeio;
cufftHandle fftForward; cufftHandle fftForward;
cufftHandle fftBackward; cufftHandle fftBackward;
......
...@@ -147,7 +147,7 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string ...@@ -147,7 +147,7 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue(CudaTempDirectory()) : properties.find(CudaTempDirectory())->second); getPropertyDefaultValue(CudaTempDirectory()) : properties.find(CudaTempDirectory())->second);
transform(blockingPropValue.begin(), blockingPropValue.end(), blockingPropValue.begin(), ::tolower); transform(blockingPropValue.begin(), blockingPropValue.end(), blockingPropValue.begin(), ::tolower);
transform(precisionPropValue.begin(), precisionPropValue.end(), precisionPropValue.begin(), ::tolower); transform(precisionPropValue.begin(), precisionPropValue.end(), precisionPropValue.begin(), ::tolower);
context.setPlatformData(new PlatformData(context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, compilerPropValue, tempPropValue)); context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, compilerPropValue, tempPropValue));
} }
void CudaPlatform::contextDestroyed(ContextImpl& context) const { void CudaPlatform::contextDestroyed(ContextImpl& context) const {
...@@ -155,8 +155,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const { ...@@ -155,8 +155,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
delete data; delete data;
} }
CudaPlatform::PlatformData::PlatformData(const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty, CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& compilerProperty, const string& tempProperty) : removeCM(false), stepCount(0), computeForceCount(0), time(0.0) { const string& compilerProperty, const string& tempProperty) : context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0) {
bool blocking = (blockingProperty == "true"); bool blocking = (blockingProperty == "true");
vector<string> devices; vector<string> devices;
size_t searchPos = 0, nextPos; size_t searchPos = 0, nextPos;
......
...@@ -54,7 +54,7 @@ void testGaussian() { ...@@ -54,7 +54,7 @@ void testGaussian() {
System system; System system;
for (int i = 0; i < numAtoms; i++) for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0); system.addParticle(1.0);
CudaPlatform::PlatformData platformData(system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"),
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory())); platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()));
CudaContext& context = *platformData.contexts[0]; CudaContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
......
...@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) { ...@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) {
System system; System system;
system.addParticle(0.0); system.addParticle(0.0);
CudaPlatform::PlatformData platformData(system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"),
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory())); platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()));
CudaContext& context = *platformData.contexts[0]; CudaContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
......
#---------------------------------------------------
# OpenMM CPU PME Plugin
#
# Creates plugin library, base name=OpenMMPME.
# Default libraries are shared & optimized.
#
# Windows:
# OpenMMPME[_d].dll
# OpenMMPME[_d].lib
# Unix:
# libOpenMMPME[_d].so
#----------------------------------------------------
IF (APPLE)
SET (CMAKE_OSX_DEPLOYMENT_TARGET "10.6")
ENDIF (APPLE)
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET(OPENMM_SOURCE_SUBDIRS .)
# Collect up information about the version of the OpenMM library we're building
# and make it available to the code so it can be built into the binaries.
SET(OPENMMPME_LIBRARY_NAME OpenMMPME)
SET(SHARED_TARGET ${OPENMMPME_LIBRARY_NAME})
# Ensure that debug libraries have "_d" appended to their names.
# CMake gets this right on Windows automatically with this definition.
IF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
SET(CMAKE_DEBUG_POSTFIX "_d" CACHE INTERNAL "" FORCE)
ENDIF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
# But on Unix or Cygwin we have to add the suffix manually
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(SHARED_TARGET ${SHARED_TARGET}_d)
SET(STATIC_TARGET ${STATIC_TARGET}_d)
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
# These are all the places to search for header files which are
# to be part of the API.
SET(API_INCLUDE_DIRS) # start empty
FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
# append
SET(API_INCLUDE_DIRS ${API_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include
${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include/internal)
ENDFOREACH(subdir)
# Find the include files.
SET(API_INCLUDE_FILES)
FOREACH(dir ${API_INCLUDE_DIRS})
FILE(GLOB fullpaths ${dir}/*.h) # returns full pathnames
SET(API_INCLUDE_FILES ${API_INCLUDE_FILES} ${fullpaths})
ENDFOREACH(dir)
# collect up source files
SET(SOURCE_FILES) # empty
SET(SOURCE_INCLUDE_FILES)
FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
FILE(GLOB_RECURSE src_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.c)
FILE(GLOB incl_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${src_files}) #append
SET(SOURCE_INCLUDE_FILES ${SOURCE_INCLUDE_FILES} ${incl_files})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
ENDFOREACH(subdir)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
# Include FFTW related files.
INCLUDE(FindFFTW)
INCLUDE_DIRECTORIES(${FFTW_INCLUDES})
# Build the plugin library.
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_INCLUDE_FILES})
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}_d)
ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${PTHREADS_LIB} ${FFTW_LIBRARY} ${FFTW_THREADS_LIBRARY})
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_PME_BUILDING_SHARED_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
#ifndef OPENMM_WINDOWSEXPORTPME_H_
#define OPENMM_WINDOWSEXPORTPME_H_
/*
* Shared libraries are messy in Visual Studio. We have to distinguish three
* cases:
* (1) this header is being used to build the OpenMM shared library
* (dllexport)
* (2) this header is being used by a *client* of the OpenMM shared
* library (dllimport)
* (3) we are building the OpenMM static library, or the client is
* being compiled with the expectation of linking with the
* OpenMM static library (nothing special needed)
* In the CMake script for building this library, we define one of the symbols
* OPENMM_PME_BUILDING_{SHARED|STATIC}_LIBRARY
* Client code normally has no special symbol defined, in which case we'll
* assume it wants to use the shared library. However, if the client defines
* the symbol OPENMM_USE_STATIC_LIBRARIES we'll suppress the dllimport so
* that the client code can be linked with static libraries. Note that
* the client symbol is not library dependent, while the library symbols
* affect only the OpenMM library, meaning that other libraries can
* be clients of this one. However, we are assuming all-static or all-shared.
*/
#ifdef _MSC_VER
// We don't want to hear about how sprintf is "unsafe".
#pragma warning(disable:4996)
// Keep MS VC++ quiet about lack of dll export of private members.
#pragma warning(disable:4251)
#if defined(OPENMM_PME_BUILDING_SHARED_LIBRARY)
#define OPENMM_EXPORT_PME __declspec(dllexport)
#elif defined(OPENMM_PME_BUILDING_STATIC_LIBRARY) || defined(OPENMM_PME_USE_STATIC_LIBRARIES)
#define OPENMM_EXPORT_PME
#else
#define OPENMM_EXPORT_PME __declspec(dllimport) // i.e., a client of a shared library
#endif
#else
#define OPENMM_EXPORT_PME // Linux, Mac
#endif
#endif // OPENMM_WINDOWSEXPORTPME_H_
/* -------------------------------------------------------------------------- *
* OpenMMAmoeba *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CpuPmeKernelFactory.h"
#include "CpuPmeKernels.h"
#include "internal/windowsExportPme.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
using namespace OpenMM;
extern "C" void registerPlatforms() {
}
extern "C" void registerKernelFactories() {
if (CpuCalcPmeReciprocalForceKernel::isProcessorSupported()) {
CpuPmeKernelFactory* factory = new CpuPmeKernelFactory();
for (int i = 0; i < Platform::getNumPlatforms(); i++)
Platform::getPlatform(i).registerKernelFactory(CalcPmeReciprocalForceKernel::Name(), factory);
}
}
extern "C" OPENMM_EXPORT_PME void registerCpuPmeKernelFactories() {
registerKernelFactories();
}
KernelImpl* CpuPmeKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
if (name == CalcPmeReciprocalForceKernel::Name())
return new CpuCalcPmeReciprocalForceKernel(name, platform);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
}
#ifndef OPENMM_CPUPMEKERNELFACTORY_H_
#define OPENMM_CPUPMEKERNELFACTORY_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/KernelFactory.h"
namespace OpenMM {
/**
* This KernelFactory creates kernels for the CPU implementation of PME.
*/
class CpuPmeKernelFactory : public KernelFactory {
public:
KernelImpl* createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const;
};
} // namespace OpenMM
#endif /*OPENMM_CPUPMEKERNELFACTORY_H_*/
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
#ifdef WIN32 #ifdef WIN32
#define _USE_MATH_DEFINES // Needed to get M_PI #define _USE_MATH_DEFINES // Needed to get M_PI
#endif #endif
#include "CpuPme.h" #include "CpuPmeKernels.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h" #include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include <cmath> #include <cmath>
#include <smmintrin.h> #include <smmintrin.h>
...@@ -42,8 +42,8 @@ using namespace std; ...@@ -42,8 +42,8 @@ using namespace std;
static const int PME_ORDER = 5; static const int PME_ORDER = 5;
bool CpuPme::hasInitializedThreads = false; bool CpuCalcPmeReciprocalForceKernel::hasInitializedThreads = false;
int CpuPme::numThreads = 0; int CpuCalcPmeReciprocalForceKernel::numThreads = 0;
static float extractFloat(__m128 v, unsigned int element) { static float extractFloat(__m128 v, unsigned int element) {
float f[4]; float f[4];
...@@ -51,6 +51,8 @@ static float extractFloat(__m128 v, unsigned int element) { ...@@ -51,6 +51,8 @@ static float extractFloat(__m128 v, unsigned int element) {
return f[element]; return f[element];
} }
// Define function to get the number of processors.
#ifdef __APPLE__ #ifdef __APPLE__
#include <sys/sysctl.h> #include <sys/sysctl.h>
#include <dlfcn.h> #include <dlfcn.h>
...@@ -90,6 +92,23 @@ static int getNumProcessors() { ...@@ -90,6 +92,23 @@ static int getNumProcessors() {
#endif #endif
} }
// Define a function to check the CPU's capabilities.
#ifdef _WIN32
#define cpuid __cpuid
#else
static void cpuid(int cpuInfo[4], int infoType){
__asm__ __volatile__ (
"cpuid":
"=a" (cpuInfo[0]),
"=b" (cpuInfo[1]),
"=c" (cpuInfo[2]),
"=d" (cpuInfo[3]) :
"a" (infoType)
);
}
#endif
static void spreadCharge(int start, int end, float* posq, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3 periodicBoxSize) { static void spreadCharge(int start, int end, float* posq, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3 periodicBoxSize) {
float temp[4]; float temp[4];
__m128 boxSize = _mm_set_ps(0, (float) periodicBoxSize[2], (float) periodicBoxSize[1], (float) periodicBoxSize[0]); __m128 boxSize = _mm_set_ps(0, (float) periodicBoxSize[2], (float) periodicBoxSize[1], (float) periodicBoxSize[0]);
...@@ -336,17 +355,17 @@ static void interpolateForces(int start, int end, float* posq, float* force, flo ...@@ -336,17 +355,17 @@ static void interpolateForces(int start, int end, float* posq, float* force, flo
} }
} }
class CpuPme::ThreadData { class CpuCalcPmeReciprocalForceKernel::ThreadData {
public: public:
CpuPme& owner; CpuCalcPmeReciprocalForceKernel& owner;
int index; int index;
float* tempGrid; float* tempGrid;
ThreadData(CpuPme& owner, int index) : owner(owner), index(index), tempGrid(NULL) { ThreadData(CpuCalcPmeReciprocalForceKernel& owner, int index) : owner(owner), index(index), tempGrid(NULL) {
} }
}; };
static void* threadBody(void* args) { static void* threadBody(void* args) {
CpuPme::ThreadData& data = *reinterpret_cast<CpuPme::ThreadData*>(args); CpuCalcPmeReciprocalForceKernel::ThreadData& data = *reinterpret_cast<CpuCalcPmeReciprocalForceKernel::ThreadData*>(args);
data.owner.runThread(data.index); data.owner.runThread(data.index);
if (data.tempGrid != NULL) if (data.tempGrid != NULL)
fftwf_free(data.tempGrid); fftwf_free(data.tempGrid);
...@@ -354,13 +373,19 @@ static void* threadBody(void* args) { ...@@ -354,13 +373,19 @@ static void* threadBody(void* args) {
return 0; return 0;
} }
CpuPme::CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha) : void CpuCalcPmeReciprocalForceKernel::initialize(int gridx, int gridy, int gridz, int numParticles, double alpha) {
gridx(gridx), gridy(gridy), gridz(gridz), numParticles(numParticles), alpha(alpha), hasCreatedPlan(false), isDeleted(false), force(4*numParticles), realGrid(NULL), complexGrid(NULL) {
if (!hasInitializedThreads) { if (!hasInitializedThreads) {
numThreads = getNumProcessors(); numThreads = getNumProcessors();
fftwf_init_threads(); fftwf_init_threads();
hasInitializedThreads = true; hasInitializedThreads = true;
} }
this->gridx = gridx;
this->gridy = gridy;
this->gridz = gridz;
this->numParticles = numParticles;
this->alpha = alpha;
force.resize(4*numParticles);
// Initialize threads. // Initialize threads.
pthread_cond_init(&startCondition, NULL); pthread_cond_init(&startCondition, NULL);
...@@ -442,7 +467,7 @@ CpuPme::CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha) ...@@ -442,7 +467,7 @@ CpuPme::CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha)
} }
} }
CpuPme::~CpuPme() { CpuCalcPmeReciprocalForceKernel::~CpuCalcPmeReciprocalForceKernel() {
isDeleted = true; isDeleted = true;
pthread_mutex_lock(&lock); pthread_mutex_lock(&lock);
pthread_cond_broadcast(&startCondition); pthread_cond_broadcast(&startCondition);
...@@ -469,7 +494,7 @@ double diff(struct timeval t1, struct timeval t2) { ...@@ -469,7 +494,7 @@ double diff(struct timeval t1, struct timeval t2) {
return t2.tv_usec-t1.tv_usec+1e6*(t2.tv_sec-t1.tv_sec); return t2.tv_usec-t1.tv_usec+1e6*(t2.tv_sec-t1.tv_sec);
} }
void CpuPme::runThread(int index) { void CpuCalcPmeReciprocalForceKernel::runThread(int index) {
if (index == -1) { if (index == -1) {
// This is the main thread that coordinates all the other ones. // This is the main thread that coordinates all the other ones.
...@@ -541,7 +566,7 @@ void CpuPme::runThread(int index) { ...@@ -541,7 +566,7 @@ void CpuPme::runThread(int index) {
} }
} }
void CpuPme::threadWait() { void CpuCalcPmeReciprocalForceKernel::threadWait() {
pthread_mutex_lock(&lock); pthread_mutex_lock(&lock);
waitCount++; waitCount++;
pthread_cond_signal(&endCondition); pthread_cond_signal(&endCondition);
...@@ -549,7 +574,7 @@ void CpuPme::threadWait() { ...@@ -549,7 +574,7 @@ void CpuPme::threadWait() {
pthread_mutex_unlock(&lock); pthread_mutex_unlock(&lock);
} }
void CpuPme::advanceThreads() { void CpuCalcPmeReciprocalForceKernel::advanceThreads() {
waitCount = 0; waitCount = 0;
pthread_cond_broadcast(&startCondition); pthread_cond_broadcast(&startCondition);
while (waitCount < numThreads) { while (waitCount < numThreads) {
...@@ -557,7 +582,7 @@ void CpuPme::advanceThreads() { ...@@ -557,7 +582,7 @@ void CpuPme::advanceThreads() {
} }
} }
void CpuPme::beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy) { void CpuCalcPmeReciprocalForceKernel::beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy) {
this->io = &io; this->io = &io;
this->periodicBoxSize = periodicBoxSize; this->periodicBoxSize = periodicBoxSize;
this->includeEnergy = includeEnergy; this->includeEnergy = includeEnergy;
...@@ -568,7 +593,7 @@ void CpuPme::beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy) ...@@ -568,7 +593,7 @@ void CpuPme::beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy)
pthread_mutex_unlock(&lock); pthread_mutex_unlock(&lock);
} }
double CpuPme::finishComputation(IO& io) { double CpuCalcPmeReciprocalForceKernel::finishComputation(IO& io) {
pthread_mutex_lock(&lock); pthread_mutex_lock(&lock);
while (!isFinished) { while (!isFinished) {
pthread_cond_wait(&mainThreadEndCondition, &lock); pthread_cond_wait(&mainThreadEndCondition, &lock);
...@@ -577,3 +602,13 @@ double CpuPme::finishComputation(IO& io) { ...@@ -577,3 +602,13 @@ double CpuPme::finishComputation(IO& io) {
io.setForce(&force[0]); io.setForce(&force[0]);
return energy; return energy;
} }
bool CpuCalcPmeReciprocalForceKernel::isProcessorSupported() {
int cpuInfo[4];
cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) {
cpuid(cpuInfo, 1);
return ((cpuInfo[2] & ((int) 1 << 19)) != 0); // Require SSE 4.1
}
return false;
}
\ No newline at end of file
#ifndef OPENMM_CPU_PME_H_ #ifndef OPENMM_CPU_PME_KERNELS_H_
#define OPENMM_CPU_PME_H_ #define OPENMM_CPU_PME_KERNELS_H_
/* -------------------------------------------------------------------------- * /* -------------------------------------------------------------------------- *
* OpenMM * * OpenMM *
...@@ -32,7 +32,8 @@ ...@@ -32,7 +32,8 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. * * USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "windowsExportCuda.h" #include "internal/windowsExportPme.h"
#include "openmm/kernels.h"
#include "openmm/Vec3.h" #include "openmm/Vec3.h"
#include <fftw3.h> #include <fftw3.h>
#include <pthread.h> #include <pthread.h>
...@@ -43,17 +44,18 @@ namespace OpenMM { ...@@ -43,17 +44,18 @@ namespace OpenMM {
/** /**
*/ */
class OPENMM_EXPORT_CUDA CpuPme { class OPENMM_EXPORT_PME CpuCalcPmeReciprocalForceKernel : public CalcPmeReciprocalForceKernel {
public: public:
class IO;
class ThreadData; class ThreadData;
/** CpuCalcPmeReciprocalForceKernel(std::string name, const Platform& platform) : CalcPmeReciprocalForceKernel(name, platform),
*/ hasCreatedPlan(false), isDeleted(false), realGrid(NULL), complexGrid(NULL) {
CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha); }
~CpuPme(); void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha);
~CpuCalcPmeReciprocalForceKernel();
void beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy); void beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy);
double finishComputation(IO& io); double finishComputation(IO& io);
void runThread(int index); void runThread(int index);
static bool isProcessorSupported();
private: private:
void threadWait(); void threadWait();
void advanceThreads(); void advanceThreads();
...@@ -82,12 +84,6 @@ private: ...@@ -82,12 +84,6 @@ private:
bool includeEnergy; bool includeEnergy;
}; };
class CpuPme::IO {
public:
virtual float* getPosq() = 0;
virtual void setForce(float* force) = 0;
};
} // namespace OpenMM } // namespace OpenMM
#endif /*OPENMM_CPU_PME_H_*/ #endif /*OPENMM_CPU_PME_KERNELS_H_*/
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment