Commit 05198df6 authored by Peter Eastman's avatar Peter Eastman
Browse files

Created CUDA implementation of RPMD

parent 99bca8c1
......@@ -170,6 +170,15 @@ IF(OPENMM_BUILD_RPMD_OPENCL_LIB)
ADD_SUBDIRECTORY(platforms/opencl)
ENDIF(OPENMM_BUILD_RPMD_OPENCL_LIB)
IF(CUDA_FOUND)
SET(OPENMM_BUILD_RPMD_CUDA_LIB ON CACHE BOOL "Build RPMD implementation for CUDA")
ELSE(CUDA_FOUND)
SET(OPENMM_BUILD_RPMD_CUDA_LIB OFF CACHE BOOL "Build RPMD implementation for CUDA")
ENDIF(CUDA_FOUND)
IF(OPENMM_BUILD_RPMD_CUDA_LIB)
ADD_SUBDIRECTORY(platforms/cuda)
ENDIF(OPENMM_BUILD_RPMD_CUDA_LIB)
INSTALL_TARGETS(/lib RUNTIME_DIRECTORY /lib ${SHARED_RPMD_TARGET})
IF( CREATE_SERIALIZABLE_OPENMM_RPMD )
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_RPMD_SERIALIZABLE_TARGET})
......
......@@ -182,7 +182,7 @@ protected:
private:
double temperature, friction;
int numCopies, randomNumberSeed;
bool forcesAreValid, hasSetPosition, hasSetVelocity;
bool forcesAreValid, hasSetPosition, hasSetVelocity, isFirstStep;
ContextImpl* context;
Context* owner;
Kernel kernel;
......
......@@ -42,7 +42,7 @@ using std::string;
using std::vector;
RPMDIntegrator::RPMDIntegrator(int numCopies, double temperature, double frictionCoeff, double stepSize) :
owner(NULL), numCopies(numCopies), forcesAreValid(false), hasSetPosition(false), hasSetVelocity(false) {
owner(NULL), numCopies(numCopies), forcesAreValid(false), hasSetPosition(false), hasSetVelocity(false), isFirstStep(true) {
setTemperature(temperature);
setFriction(frictionCoeff);
setStepSize(stepSize);
......@@ -109,6 +109,15 @@ void RPMDIntegrator::step(int steps) {
for (int i = 0; i < numCopies; i++)
setVelocities(i, s.getVelocities());
}
if (isFirstStep) {
// Call setPositions() on the Context so it doesn't think the user is trying to
// run a simulation without setting positions first. These positions will
// immediately get overwritten by the ones stored in this integrator.
vector<Vec3> p(context->getSystem().getNumParticles(), Vec3());
context->getOwner().setPositions(p);
isFirstStep = false;
}
for (int i = 0; i < steps; ++i) {
kernel.getAs<IntegrateRPMDStepKernel>().execute(*context, *this, forcesAreValid);
forcesAreValid = true;
......
#---------------------------------------------------
# OpenMM CUDA RPMD Integrator
#
# Creates OpenMM library, base name=OpenMMRPMDCUDA.
# Default libraries are shared & optimized. Variants
# are created for debug (_d).
#
# Windows:
# OpenMMRPMDCUDA[_d].dll
# OpenMMRPMDCUDA[_d].lib
# Unix:
# libOpenMMRPMDCUDA[_d].so
#----------------------------------------------------
IF (APPLE)
SET (CMAKE_OSX_DEPLOYMENT_TARGET "10.6")
SET (CMAKE_OSX_SYSROOT "/Developer/SDKs/MacOSX10.6.sdk")
ENDIF (APPLE)
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET(OPENMM_SOURCE_SUBDIRS .)
# Collect up information about the version of the OpenMM library we're building
# and make it available to the code so it can be built into the binaries.
SET(OPENMMRPMDCUDA_LIBRARY_NAME OpenMMRPMDCUDA)
SET(SHARED_TARGET ${OPENMMRPMDCUDA_LIBRARY_NAME})
# Ensure that debug libraries have "_d" appended to their names.
# CMake gets this right on Windows automatically with this definition.
IF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
SET(CMAKE_DEBUG_POSTFIX "_d" CACHE INTERNAL "" FORCE)
ENDIF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
# But on Unix or Cygwin we have to add the suffix manually
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(SHARED_TARGET ${SHARED_TARGET}_d)
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
# These are all the places to search for header files which are
# to be part of the API.
SET(API_INCLUDE_DIRS) # start empty
FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
# append
SET(API_INCLUDE_DIRS ${API_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include
${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include/internal)
ENDFOREACH(subdir)
# We'll need both *relative* path names, starting with their API_INCLUDE_DIRS,
# and absolute pathnames.
SET(API_REL_INCLUDE_FILES) # start these out empty
SET(API_ABS_INCLUDE_FILES)
FOREACH(dir ${API_INCLUDE_DIRS})
FILE(GLOB fullpaths ${dir}/*.h) # returns full pathnames
SET(API_ABS_INCLUDE_FILES ${API_ABS_INCLUDE_FILES} ${fullpaths})
FOREACH(pathname ${fullpaths})
GET_FILENAME_COMPONENT(filename ${pathname} NAME)
SET(API_REL_INCLUDE_FILES ${API_REL_INCLUDE_FILES} ${dir}/${filename})
ENDFOREACH(pathname)
ENDFOREACH(dir)
# collect up source files
SET(SOURCE_FILES) # empty
SET(SOURCE_INCLUDE_FILES)
FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
FILE(GLOB_RECURSE src_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.c)
FILE(GLOB incl_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${src_files}) #append
SET(SOURCE_INCLUDE_FILES ${SOURCE_INCLUDE_FILES} ${incl_files})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
ENDFOREACH(subdir)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/cuda/include)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/cuda/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_BINARY_DIR}/platforms/cuda/src)
# Set variables needed for encoding kernel sources into a C++ class
SET(CUDA_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
SET(CUDA_SOURCE_CLASS CudaRpmdKernelSources)
SET(CUDA_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/${CUDA_SOURCE_CLASS}.cpp)
SET(CUDA_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${CUDA_SOURCE_CLASS}.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/src)
# Create the library
INCLUDE_DIRECTORIES(${CUDA_TOOLKIT_INCLUDE})
FILE(GLOB CUDA_KERNELS ${CUDA_SOURCE_DIR}/kernels/*.cu)
ADD_CUSTOM_COMMAND(OUTPUT ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H}
COMMAND ${CMAKE_COMMAND}
ARGS -D CUDA_SOURCE_DIR=${CUDA_SOURCE_DIR} -D CUDA_KERNELS_CPP=${CUDA_KERNELS_CPP} -D CUDA_KERNELS_H=${CUDA_KERNELS_H} -D CUDA_SOURCE_CLASS=${CUDA_SOURCE_CLASS} -P ${CMAKE_SOURCE_DIR}/platforms/cuda/EncodeCUDAFiles.cmake
DEPENDS ${CUDA_KERNELS}
)
SET_SOURCE_FILES_PROPERTIES(${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H} PROPERTIES GENERATED TRUE)
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}_d)
ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${CUDA_LIBRARIES} ${PTHREADS_LIB})
TARGET_LINK_LIBRARIES(${SHARED_TARGET} debug ${OPENMM_LIBRARY_NAME}CUDA_d optimized ${OPENMM_LIBRARY_NAME}CUDA)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} debug ${SHARED_RPMD_TARGET} optimized ${SHARED_RPMD_TARGET})
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_BUILDING_SHARED_LIBRARY")
INSTALL(TARGETS ${SHARED_TARGET} DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/plugins)
# Ensure that links to the main CUDA library will be resolved.
IF (APPLE)
IF (CMAKE_BUILD_TYPE MATCHES Debug)
SET(CUDA_LIBRARY libOpenMMCUDA_d.dylib)
ELSE (CMAKE_BUILD_TYPE MATCHES Debug)
SET(CUDA_LIBRARY libOpenMMCUDA.dylib)
ENDIF (CMAKE_BUILD_TYPE MATCHES Debug)
INSTALL(CODE "EXECUTE_PROCESS(COMMAND install_name_tool -change ${CUDA_LIBRARY} @loader_path/${CUDA_LIBRARY} ${CMAKE_INSTALL_PREFIX}/lib/plugins/lib${SHARED_TARGET}.dylib)")
ENDIF (APPLE)
SUBDIRS (tests)
#ifndef OPENMM_CUDARPMDKERNELFACTORY_H_
#define OPENMM_CUDARPMDKERNELFACTORY_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/KernelFactory.h"
namespace OpenMM {
/**
* This KernelFactory creates kernels for the CUDA implementation of RPMDIntegrator.
*/
class CudaRpmdKernelFactory : public KernelFactory {
public:
KernelImpl* createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const;
};
} // namespace OpenMM
#endif /*OPENMM_CUDARPMDKERNELFACTORY_H_*/
/* -------------------------------------------------------------------------- *
* OpenMMAmoeba *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <exception>
#include "CudaRpmdKernelFactory.h"
#include "CudaRpmdKernels.h"
#include "openmm/internal/windowsExport.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
using namespace OpenMM;
extern "C" void registerPlatforms() {
}
extern "C" void registerKernelFactories() {
try {
Platform& platform = Platform::getPlatformByName("CUDA");
CudaRpmdKernelFactory* factory = new CudaRpmdKernelFactory();
platform.registerKernelFactory(IntegrateRPMDStepKernel::Name(), factory);
}
catch (std::exception ex) {
// Ignore
}
}
KernelImpl* CudaRpmdKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
CudaContext& cl = *static_cast<CudaPlatform::PlatformData*>(context.getPlatformData())->contexts[0];
if (name == IntegrateRPMDStepKernel::Name())
return new CudaIntegrateRPMDStepKernel(name, platform, cl);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaRpmdKernelSources.h"
using namespace OpenMM;
using namespace std;
#ifndef OPENMM_CUDARPMDKERNELSOURCES_H_
#define OPENMM_CUDARPMDKERNELSOURCES_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <string>
namespace OpenMM {
/**
* This class is a central holding place for the source code of CUDA kernels.
* The CMake build script inserts declarations into it based on the .cu files in the
* kernels subfolder.
*/
class CudaRpmdKernelSources {
public:
@CUDA_FILE_DECLARATIONS@
};
} // namespace OpenMM
#endif /*OPENMM_CUDARPMDKERNELSOURCES_H_*/
This diff is collapsed.
#ifndef CUDA_RPMD_KERNELS_H_
#define CUDA_RPMD_KERNELS_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/RpmdKernels.h"
#include "CudaContext.h"
#include "CudaArray.h"
namespace OpenMM {
/**
* This kernel is invoked by RPMDIntegrator to take one time step, and to get and
* set the state of system copies.
*/
class CudaIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel {
public:
CudaIntegrateRPMDStepKernel(std::string name, const Platform& platform, CudaContext& cu) :
IntegrateRPMDStepKernel(name, platform), cu(cu), forces(NULL), positions(NULL), velocities(NULL) {
}
~CudaIntegrateRPMDStepKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the RPMDIntegrator this kernel will be used for
*/
void initialize(const System& system, const RPMDIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the RPMDIntegrator this kernel is being used for
* @param forcesAreValid if the context has been modified since the last time step, this will be
* false to show that cached forces are invalid and must be recalculated
*/
void execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the RPMDIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const RPMDIntegrator& integrator);
/**
* Get the positions of all particles in one copy of the system.
*/
void setPositions(int copy, const std::vector<Vec3>& positions);
/**
* Get the velocities of all particles in one copy of the system.
*/
void setVelocities(int copy, const std::vector<Vec3>& velocities);
/**
* Copy positions and velocities for one copy into the context.
*/
void copyToContext(int copy, ContextImpl& context);
private:
void computeForces(ContextImpl& context);
std::string createFFT(int size, const std::string& variable, bool forward);
CudaContext& cu;
int numCopies, numParticles, workgroupSize;
CudaArray* forces;
CudaArray* positions;
CudaArray* velocities;
CUfunction pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
};
} // namespace OpenMM
#endif /*CUDA_RPMD_KERNELS_H_*/
__device__ float3 multiplyComplexRealPart(float2 c1, float3 c2r, float3 c2i) {
return c1.x*c2r-c1.y*c2i;
}
__device__ float3 multiplyComplexImagPart(float2 c1, float3 c2r, float3 c2i) {
return c1.x*c2i+c1.y*c2r;
}
__device__ float3 multiplyComplexRealPartConj(float2 c1, float3 c2r, float3 c2i) {
return c1.x*c2r+c1.y*c2i;
}
__device__ float3 multiplyComplexImagPartConj(float2 c1, float3 c2r, float3 c2i) {
return c1.x*c2i-c1.y*c2r;
}
/**
* Apply the PILE-L thermostat.
*/
extern "C" __global__ void applyPileThermostat(float4* velm, float4* random, unsigned int randomIndex,
float dt, float kT, float friction) {
const int numBlocks = blockDim.x*gridDim.x/NUM_COPIES;
const int blockStart = NUM_COPIES*(threadIdx.x/NUM_COPIES);
const int indexInBlock = threadIdx.x-blockStart;
const float nkT = NUM_COPIES*kT;
const float twown = 2.0f*nkT/HBAR;
const float c1_0 = EXP(-0.5f*dt*friction);
const float c2_0 = SQRT(1.0f-c1_0*c1_0);
__shared__ float3 v[2*THREAD_BLOCK_SIZE];
__shared__ float3 temp[2*THREAD_BLOCK_SIZE];
__shared__ float2 w[NUM_COPIES];
float3* vreal = &v[blockStart];
float3* vimag = &v[blockStart+blockDim.x];
if (threadIdx.x < NUM_COPIES)
w[indexInBlock] = make_float2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
__syncthreads();
randomIndex += NUM_COPIES*((blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES);
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
float4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
float invMass = particleVelm.w;
float c3_0 = c2_0*SQRT(nkT*invMass);
// Forward FFT.
vreal[indexInBlock] = SCALE*make_float3(particleVelm.x, particleVelm.y, particleVelm.z);
vimag[indexInBlock] = make_float3(0);
__syncthreads();
FFT_V_FORWARD
// Apply the thermostat.
if (indexInBlock == 0) {
// Apply a local Langevin thermostat to the centroid mode.
float4 rand = random[randomIndex];
vreal[0] = vreal[0]*c1_0 + c3_0*make_float3(rand.x, rand.y, rand.z);
}
else {
// Use critical damping white noise for the remaining modes.
int k = (indexInBlock <= NUM_COPIES/2 ? indexInBlock : NUM_COPIES-indexInBlock);
const bool isCenter = (NUM_COPIES%2 == 0 && k == NUM_COPIES/2);
const float wk = twown*sin(k*M_PI/NUM_COPIES);
const float c1 = EXP(-wk*dt);
const float c2 = SQRT((1.0f-c1*c1)/2.0f) * (isCenter ? sqrt(2.0f) : 1.0f);
const float c3 = c2*SQRT(nkT*invMass);
float4 rand1 = c3*random[randomIndex+k];
float4 rand2 = (isCenter ? make_float4(0) : c3*random[randomIndex+NUM_COPIES-k]);
vreal[indexInBlock] = c1*vreal[indexInBlock] + make_float3(rand1.x, rand1.y, rand1.z);
vimag[indexInBlock] = c1*vimag[indexInBlock] + (indexInBlock < NUM_COPIES/2 ? make_float3(rand2.x, rand2.y, rand2.z) : make_float3(-rand2.x, -rand2.y, -rand2.z));
}
__syncthreads();
// Inverse FFT.
FFT_V_BACKWARD
velm[particle+indexInBlock*PADDED_NUM_ATOMS] = make_float4(SCALE*vreal[indexInBlock].x, SCALE*vreal[indexInBlock].y, SCALE*vreal[indexInBlock].z, particleVelm.w);
randomIndex += blockDim.x*gridDim.x;
}
}
/**
* Advance the positions and velocities.
*/
extern "C" __global__ void integrateStep(float4* posq, float4* velm, long long* force, float dt, float kT) {
const int numBlocks = (blockDim.x*gridDim.x)/NUM_COPIES;
const int blockStart = NUM_COPIES*(threadIdx.x/NUM_COPIES);
const int indexInBlock = threadIdx.x-blockStart;
const float nkT = NUM_COPIES*kT;
const float twown = 2.0f*nkT/HBAR;
const float forceScale = 1/(float) 0xFFFFFFFF;
__shared__ float3 q[2*THREAD_BLOCK_SIZE];
__shared__ float3 v[2*THREAD_BLOCK_SIZE];
__shared__ float3 temp[2*THREAD_BLOCK_SIZE];
__shared__ float2 w[NUM_COPIES];
// Update velocities.
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
int index = particle+indexInBlock*PADDED_NUM_ATOMS;
int forceIndex = particle+indexInBlock*PADDED_NUM_ATOMS*3;
float4 particleVelm = velm[index];
particleVelm.x += forceScale*force[forceIndex]*(0.5f*dt*particleVelm.w);
particleVelm.y += forceScale*force[forceIndex+PADDED_NUM_ATOMS]*(0.5f*dt*particleVelm.w);
particleVelm.z += forceScale*force[forceIndex+PADDED_NUM_ATOMS*2]*(0.5f*dt*particleVelm.w);
velm[index] = particleVelm;
}
// Evolve the free ring polymer by transforming to the frequency domain.
float3* qreal = &q[blockStart];
float3* qimag = &q[blockStart+blockDim.x];
float3* vreal = &v[blockStart];
float3* vimag = &v[blockStart+blockDim.x];
if (threadIdx.x < NUM_COPIES)
w[indexInBlock] = make_float2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
__syncthreads();
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
float4 particlePosq = posq[particle+indexInBlock*PADDED_NUM_ATOMS];
float4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
// Forward FFT.
qreal[indexInBlock] = SCALE*make_float3(particlePosq.x, particlePosq.y, particlePosq.z);
qimag[indexInBlock] = make_float3(0);
vreal[indexInBlock] = SCALE*make_float3(particleVelm.x, particleVelm.y, particleVelm.z);
vimag[indexInBlock] = make_float3(0);
__syncthreads();
FFT_Q_FORWARD
FFT_V_FORWARD
// Apply the thermostat.
if (indexInBlock == 0) {
qreal[0] += vreal[0]*dt;
qimag[0] += vimag[0]*dt;
}
else {
const float wk = twown*sin(indexInBlock*M_PI/NUM_COPIES);
const float wt = wk*dt;
const float coswt = cos(wt);
const float sinwt = sin(wt);
const float3 vprimereal = vreal[indexInBlock]*coswt - qreal[indexInBlock]*(wk*sinwt); // Advance velocity from t to t+dt
const float3 vprimeimag = vimag[indexInBlock]*coswt - qimag[indexInBlock]*(wk*sinwt);
qreal[indexInBlock] = vreal[indexInBlock]*(sinwt/wk) + qreal[indexInBlock]*coswt; // Advance position from t to t+dt
qimag[indexInBlock] = vimag[indexInBlock]*(sinwt/wk) + qimag[indexInBlock]*coswt;
vreal[indexInBlock] = vprimereal;
vimag[indexInBlock] = vprimeimag;
}
__syncthreads();
// Inverse FFT.
FFT_Q_BACKWARD
FFT_V_BACKWARD
posq[particle+indexInBlock*PADDED_NUM_ATOMS] = make_float4(SCALE*qreal[indexInBlock].x, SCALE*qreal[indexInBlock].y, SCALE*qreal[indexInBlock].z, particlePosq.w);
velm[particle+indexInBlock*PADDED_NUM_ATOMS] = make_float4(SCALE*vreal[indexInBlock].x, SCALE*vreal[indexInBlock].y, SCALE*vreal[indexInBlock].z, particleVelm.w);
}
}
/**
* Advance the velocities by a half step.
*/
extern "C" __global__ void advanceVelocities(float4* velm, long long* force, float dt) {
const int numBlocks = (blockDim.x*gridDim.x)/NUM_COPIES;
const int blockStart = NUM_COPIES*(threadIdx.x/NUM_COPIES);
const int indexInBlock = threadIdx.x-blockStart;
const float forceScale = 1/(float) 0xFFFFFFFF;
// Update velocities.
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
int index = particle+indexInBlock*PADDED_NUM_ATOMS;
int forceIndex = particle+indexInBlock*PADDED_NUM_ATOMS*3;
float4 particleVelm = velm[index];
particleVelm.x += forceScale*force[forceIndex]*(0.5f*dt*particleVelm.w);
particleVelm.y += forceScale*force[forceIndex+PADDED_NUM_ATOMS]*(0.5f*dt*particleVelm.w);
particleVelm.z += forceScale*force[forceIndex+PADDED_NUM_ATOMS*2]*(0.5f*dt*particleVelm.w);
velm[index] = particleVelm;
}
}
/**
* Copy a set of per-atom values from the integrator's arrays to the context.
*/
extern "C" __global__ void copyToContext(float4* src, float4* dst, int* order, int copy) {
const int base = copy*PADDED_NUM_ATOMS;
for (int particle = blockIdx.x*blockDim.x+threadIdx.x; particle < NUM_ATOMS; particle += blockDim.x*gridDim.x) {
dst[particle] = src[base+order[particle]];
}
}
/**
* Copy a set of per-atom force values from the context to the integrator's arrays.
*/
extern "C" __global__ void copyFromContext(long long* src, long long* dst, int* order, int copy) {
const int base = copy*PADDED_NUM_ATOMS*3;
for (int particle = blockIdx.x*blockDim.x+threadIdx.x; particle < NUM_ATOMS; particle += blockDim.x*gridDim.x) {
dst[base+order[particle]] = src[particle];
dst[base+order[particle]+PADDED_NUM_ATOMS] = src[particle+PADDED_NUM_ATOMS];
dst[base+order[particle]+PADDED_NUM_ATOMS*2] = src[particle+PADDED_NUM_ATOMS*2];
}
}
/**
* Update atom positions so all copies are offset by the same number of periodic box widths.
*/
extern "C" __global__ void applyCellTranslations(float4* posq, float4* movedPos, int* order, int movedCopy) {
for (int particle = blockIdx.x*blockDim.x+threadIdx.x; particle < NUM_ATOMS; particle += blockDim.x*gridDim.x) {
int index = order[particle];
float4 delta = movedPos[particle]-posq[movedCopy*PADDED_NUM_ATOMS+index];
for (int copy = 0; copy < NUM_COPIES; copy++)
posq[copy*PADDED_NUM_ATOMS+index] += delta;
}
}
#
# Testing
#
ENABLE_TESTING()
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIR})
# Automatically create tests using files named "Test*.cpp"
FILE(GLOB TEST_PROGS "*Test*.cpp")
FOREACH(TEST_PROG ${TEST_PROGS})
GET_FILENAME_COMPONENT(TEST_ROOT ${TEST_PROG} NAME_WE)
# Link with shared library
ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG})
TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_RPMD_TARGET})
ADD_TEST(${TEST_ROOT} ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT})
ENDFOREACH(TEST_PROG ${TEST_PROGS})
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of RPMDIntegrator.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "openmm/CustomNonbondedForce.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/RPMDIntegrator.h"
#include "SimTKUtilities/SimTKOpenMMUtilities.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
void testFreeParticles() {
const int numParticles = 100;
const int numCopies = 30;
const double temperature = 300.0;
const double mass = 1.0;
System system;
for (int i = 0; i < numParticles; i++)
system.addParticle(mass);
RPMDIntegrator integ(numCopies, temperature, 10.0, 0.001);
Platform& platform = Platform::getPlatformByName("CUDA");
Context context(system, integ, platform);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<Vec3> positions(numParticles);
for (int i = 0; i < numCopies; i++)
{
for (int j = 0; j < numParticles; j++)
positions[j] = Vec3(0.02*genrand_real2(sfmt), 0.02*genrand_real2(sfmt), 0.02*genrand_real2(sfmt));
integ.setPositions(i, positions);
}
const int numSteps = 1000;
integ.step(1000);
vector<double> ke(numCopies, 0.0);
vector<double> rg(numParticles, 0.0);
const RealOpenMM hbar = 1.054571628e-34*AVOGADRO/(1000*1e-12);
for (int i = 0; i < numSteps; i++) {
integ.step(1);
vector<State> state(numCopies);
for (int j = 0; j < numCopies; j++)
state[j] = integ.getState(j, State::Positions | State::Velocities);
for (int j = 0; j < numParticles; j++) {
double rg2 = 0.0;
for (int k = 0; k < numCopies; k++) {
Vec3 v = state[k].getVelocities()[j];
ke[k] += 0.5*mass*v.dot(v);
for (int m = 0; m < numCopies; m++) {
Vec3 delta = state[k].getPositions()[j]-state[m].getPositions()[j];
rg2 += delta.dot(delta);
}
}
rg[j] += rg2/(2*numCopies*numCopies);
}
}
double meanKE = 0.0;
for (int i = 0; i < numCopies; i++)
meanKE += ke[i];
meanKE /= numSteps*numCopies;
double expectedKE = 0.5*numCopies*numParticles*3*BOLTZ*temperature;
ASSERT_USUALLY_EQUAL_TOL(expectedKE, meanKE, 1e-2);
double meanRg2 = 0.0;
for (int i = 0; i < numParticles; i++)
meanRg2 += rg[i];
meanRg2 /= numSteps*numParticles;
double expectedRg = hbar/(2*sqrt(mass*BOLTZ*temperature));
ASSERT_USUALLY_EQUAL_TOL(expectedRg, sqrt(meanRg2), 1e-3);
}
void testParaHydrogen() {
const int numParticles = 32;
const int numCopies = 12;
const double temperature = 25.0;
const double mass = 2.0;
const double boxSize = 1.1896;
const int numSteps = 1000;
const int numBins = 200;
const double reference[] = {
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 4.932814042206152e-5, 1.244331241336431e-4, 4.052316284060125e-4,
1.544810863683946e-3, 4.376197806690222e-3, 1.025847561714293e-2, 2.286702037465422e-2,
4.371052180263602e-2, 7.518538770734748e-2, 0.122351534531647, 0.185758975626622,
0.266399984652322, 0.363380262153250, 0.473696401293219, 0.595312098494172,
0.726049519422861, 0.862264551954547, 0.991102029379444, 1.1147503922535,
1.23587006992066, 1.33495411932817, 1.42208208736987, 1.49273884004107,
1.54633319690403, 1.58714702233941, 1.60439217751355, 1.61804190608902,
1.60680198476058, 1.58892222973695, 1.56387607986781, 1.52629494593350,
1.48421439018970, 1.43656176771959, 1.38752775598872, 1.33310695719931,
1.28363477223121, 1.23465642750248, 1.18874848666326, 1.14350496170519,
1.10292486009936, 1.06107270157688, 1.02348927970441, 0.989729345271297,
0.959273446941802, 0.932264875865758, 0.908818658748942, 0.890946420768315,
0.869332737718165, 0.856401736350349, 0.842370069917020, 0.834386614237393,
0.826268072171045, 0.821547250199453, 0.818786865315836, 0.819441757028076,
0.819156933383128, 0.822275325148621, 0.828919078023881, 0.837233720599450,
0.846961908186718, 0.855656955481099, 0.864520333201247, 0.876082425547566,
0.886950044046000, 0.900275658318995
};
// Create a box of para-hydrogen.
System system;
for (int i = 0; i < numParticles; i++)
system.addParticle(mass);
system.setDefaultPeriodicBoxVectors(Vec3(boxSize,0,0), Vec3(0,boxSize,0), Vec3(0,0,boxSize));
CustomNonbondedForce* nb = new CustomNonbondedForce("2625.49963*(exp(1.713-1.5671*p-0.00993*p*p)-(12.14/p^6+215.2/p^8-143.1/p^9+4813.9/p^10)*(step(rc-p)*exp(-(rc/p-1)^2)+1-step(rc-p))); p=r/0.05291772108; rc=8.32");
nb->setNonbondedMethod(CustomNonbondedForce::CutoffPeriodic);
nb->setCutoffDistance(boxSize/2);
vector<double> params;
for (int i = 0; i < numParticles; i++)
nb->addParticle(params);
system.addForce(nb);
RPMDIntegrator integ(numCopies, temperature, 10.0, 0.0005);
Platform& platform = Platform::getPlatformByName("CUDA");
Context context(system, integ, platform);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; i++)
positions[i] = Vec3(boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt));
for (int i = 0; i < numCopies; i++)
integ.setPositions(i, positions);
integ.step(1000);
// Simulate it.
vector<int> counts(numBins, 0);
const double invBoxSize = 1.0/boxSize;
double meanKE = 0.0;
const RealOpenMM hbar = 1.054571628e-34*AVOGADRO/(1000*1e-12);
for (int step = 0; step < numSteps; step++) {
integ.step(20);
vector<State> states(numCopies);
for (int i = 0; i < numCopies; i++)
states[i] = integ.getState(i, State::Positions | State::Forces);
// Record the radial distribution function.
const vector<Vec3>& pos = states[0].getPositions();
for (int j = 0; j < numParticles; j++)
for (int k = 0; k < j; k++) {
Vec3 delta = pos[j]-pos[k];
delta[0] -= floor(delta[0]*invBoxSize+0.5)*boxSize;
delta[1] -= floor(delta[1]*invBoxSize+0.5)*boxSize;
delta[2] -= floor(delta[2]*invBoxSize+0.5)*boxSize;
double dist = sqrt(delta.dot(delta));
int bin = (int) (numBins*(dist/boxSize));
counts[bin]++;
}
// Calculate the quantum contribution to the kinetic energy.
vector<Vec3> centroids(numParticles, Vec3());
for (int i = 0; i < numCopies; i++) {
const vector<Vec3>& pos = states[i].getPositions();
for (int j = 0; j < numParticles; j++)
centroids[j] += pos[j];
}
for (int j = 0; j < numParticles; j++)
centroids[j] *= 1.0/numCopies;
double ke = 0.0;
for (int i = 0; i < numCopies; i++) {
const vector<Vec3>& pos = states[i].getPositions();
const vector<Vec3>& f = states[i].getForces();
for (int j = 0; j < numParticles; j++) {
Vec3 delta = centroids[j]-pos[j];
ke += delta.dot(f[j]);
}
}
meanKE += ke/(2*numCopies*numParticles);
}
// Check against expected values.
double scale = (boxSize*boxSize*boxSize)/(numSteps*0.5*numParticles*numParticles);
for (int i = 0; i < numBins/2; i++) {
double r1 = i*boxSize/numBins;
double r2 = (i+1)*boxSize/numBins;
double volume = (4.0/3.0)*M_PI*(r2*r2*r2-r1*r1*r1);
ASSERT_USUALLY_EQUAL_TOL(reference[i], scale*counts[i]/volume, 0.1);
}
meanKE /= numSteps*BOLTZ;
ASSERT_USUALLY_EQUAL_TOL(60.0, 1.5*temperature+meanKE, 0.02);
}
int main() {
try {
Platform::loadPluginsFromDirectory(Platform::getDefaultPluginsDirectory());
testFreeParticles();
testParaHydrogen();
}
catch(const std::exception& e) {
std::cout << "exception: " << e.what() << std::endl;
std::cout << "FAIL - ERROR. Test failed." << std::endl;
return 1;
}
std::cout << "Done" << std::endl;
return 0;
}
......@@ -98,10 +98,7 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
}
void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) {
const System& system = context.getSystem();
const int paddedParticles = cl.getPaddedNumAtoms();
OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
if (!hasInitializedKernel) {
hasInitializedKernel = true;
pileKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
......
......@@ -107,7 +107,6 @@ __kernel void integrateStep(__global float4* posq, __global float4* velm, __glob
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
float4 particlePosq = posq[particle+indexInBlock*PADDED_NUM_ATOMS];
float4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
float invMass = particleVelm.w;
// Forward FFT.
......@@ -130,7 +129,6 @@ __kernel void integrateStep(__global float4* posq, __global float4* velm, __glob
const float wt = wk*dt;
const float coswt = cos(wt);
const float sinwt = sin(wt);
const float wm = wk/particleVelm.w;
const float4 vprimereal = vreal[indexInBlock]*coswt - qreal[indexInBlock]*(wk*sinwt); // Advance velocity from t to t+dt
const float4 vprimeimag = vimag[indexInBlock]*coswt - qimag[indexInBlock]*(wk*sinwt);
qreal[indexInBlock] = vreal[indexInBlock]*(sinwt/wk) + qreal[indexInBlock]*coswt; // Advance position from t to t+dt
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment