Commit a713aabf authored by Peter Eastman's avatar Peter Eastman
Browse files

Created OpenCL implementation of RPMD

parent f355f568
......@@ -135,8 +135,9 @@ INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
# Set variables needed for encoding kernel sources into a C++ class
SET(CL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
SET(CL_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/OpenCLKernelSources.cpp)
SET(CL_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/OpenCLKernelSources.h)
SET(CL_SOURCE_CLASS OpenCLKernelSources)
SET(CL_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/${CL_SOURCE_CLASS}.cpp)
SET(CL_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${CL_SOURCE_CLASS}.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${CL_KERNELS_CPP} ${CL_KERNELS_H})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/src)
......
FILE(GLOB OPENCL_KERNELS ${CL_SOURCE_DIR}/kernels/*.cl)
SET(CL_FILE_DECLARATIONS)
SET(CL_FILE_DEFINITIONS)
CONFIGURE_FILE(${CL_SOURCE_DIR}/OpenCLKernelSources.cpp.in ${CL_KERNELS_CPP})
CONFIGURE_FILE(${CL_SOURCE_DIR}/${CL_SOURCE_CLASS}.cpp.in ${CL_KERNELS_CPP})
FOREACH(file ${OPENCL_KERNELS})
# Load the file contents and process it.
FILE(STRINGS ${file} file_content NEWLINE_CONSUME)
......@@ -17,6 +17,6 @@ FOREACH(file ${OPENCL_KERNELS})
# Record the variable declaration and definition.
SET(CL_FILE_DECLARATIONS ${CL_FILE_DECLARATIONS}static\ const\ std::string\ ${variable_name};\n)
FILE(APPEND ${CL_KERNELS_CPP} const\ string\ OpenCLKernelSources::${variable_name}\ =\ \"${file_content}\"\;\n)
FILE(APPEND ${CL_KERNELS_CPP} const\ string\ ${CL_SOURCE_CLASS}::${variable_name}\ =\ \"${file_content}\"\;\n)
ENDFOREACH(file)
CONFIGURE_FILE(${CL_SOURCE_DIR}/OpenCLKernelSources.h.in ${CL_KERNELS_H})
CONFIGURE_FILE(${CL_SOURCE_DIR}/${CL_SOURCE_CLASS}.h.in ${CL_KERNELS_H})
......@@ -7,7 +7,7 @@ INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR})
FILE(GLOB OPENCL_KERNELS ${CL_SOURCE_DIR}/kernels/*.cl)
ADD_CUSTOM_COMMAND(OUTPUT ${CL_KERNELS_CPP} ${CL_KERNELS_H}
COMMAND ${CMAKE_COMMAND}
ARGS -D CL_SOURCE_DIR=${CL_SOURCE_DIR} -D CL_KERNELS_CPP=${CL_KERNELS_CPP} -D CL_KERNELS_H=${CL_KERNELS_H} -P ${CMAKE_CURRENT_SOURCE_DIR}/../EncodeCLFiles.cmake
ARGS -D CL_SOURCE_DIR=${CL_SOURCE_DIR} -D CL_KERNELS_CPP=${CL_KERNELS_CPP} -D CL_KERNELS_H=${CL_KERNELS_H} -D CL_SOURCE_CLASS=${CL_SOURCE_CLASS} -P ${CMAKE_CURRENT_SOURCE_DIR}/../EncodeCLFiles.cmake
DEPENDS ${OPENCL_KERNELS}
)
SET_SOURCE_FILES_PROPERTIES(${CL_KERNELS_CPP} ${CL_KERNELS_H} PROPERTIES GENERATED TRUE)
......
......@@ -204,4 +204,4 @@ ENDIF (EXECUTABLE_OUTPUT_PATH)
#INCLUDE(ApiDoxygen.cmake)
#ADD_SUBDIRECTORY(tests)
#ADD_SUBDIRECTORY(examples)
ADD_SUBDIRECTORY(platforms/opencl)
#---------------------------------------------------
# OpenMM OpenCL RPMD Integrator
#
# Creates OpenMM library, base name=OpenMMRPMDOpenCL.
# Default libraries are shared & optimized. Variants
# are created for debug (_d).
#
# Windows:
# OpenMMRPMDOpenCL[_d].dll
# OpenMMRPMDOpenCL[_d].lib
# Unix:
# libOpenMMRPMDOpenCL[_d].so
#----------------------------------------------------
IF (APPLE)
SET (CMAKE_OSX_DEPLOYMENT_TARGET "10.6")
SET (CMAKE_OSX_SYSROOT "/Developer/SDKs/MacOSX10.6.sdk")
ENDIF (APPLE)
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET(OPENMM_SOURCE_SUBDIRS .)
# Collect up information about the version of the OpenMM library we're building
# and make it available to the code so it can be built into the binaries.
SET(OPENMMRPMDOPENCL_LIBRARY_NAME OpenMMRPMDOpenCL)
SET(SHARED_TARGET ${OPENMMRPMDOPENCL_LIBRARY_NAME})
# Ensure that debug libraries have "_d" appended to their names.
# CMake gets this right on Windows automatically with this definition.
IF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
SET(CMAKE_DEBUG_POSTFIX "_d" CACHE INTERNAL "" FORCE)
ENDIF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
# But on Unix or Cygwin we have to add the suffix manually
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(SHARED_TARGET ${SHARED_TARGET}_d)
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
# These are all the places to search for header files which are
# to be part of the API.
SET(API_INCLUDE_DIRS) # start empty
FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
# append
SET(API_INCLUDE_DIRS ${API_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include
${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include/internal)
ENDFOREACH(subdir)
# We'll need both *relative* path names, starting with their API_INCLUDE_DIRS,
# and absolute pathnames.
SET(API_REL_INCLUDE_FILES) # start these out empty
SET(API_ABS_INCLUDE_FILES)
FOREACH(dir ${API_INCLUDE_DIRS})
FILE(GLOB fullpaths ${dir}/*.h) # returns full pathnames
SET(API_ABS_INCLUDE_FILES ${API_ABS_INCLUDE_FILES} ${fullpaths})
FOREACH(pathname ${fullpaths})
GET_FILENAME_COMPONENT(filename ${pathname} NAME)
SET(API_REL_INCLUDE_FILES ${API_REL_INCLUDE_FILES} ${dir}/${filename})
ENDFOREACH(pathname)
ENDFOREACH(dir)
# collect up source files
SET(SOURCE_FILES) # empty
SET(SOURCE_INCLUDE_FILES)
FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
FILE(GLOB_RECURSE src_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.c)
FILE(GLOB incl_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${src_files}) #append
SET(SOURCE_INCLUDE_FILES ${SOURCE_INCLUDE_FILES} ${incl_files})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
ENDFOREACH(subdir)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/opencl/include)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/opencl/src)
# Set variables needed for encoding kernel sources into a C++ class
SET(CL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
SET(CL_SOURCE_CLASS OpenCLRpmdKernelSources)
SET(CL_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/${CL_SOURCE_CLASS}.cpp)
SET(CL_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${CL_SOURCE_CLASS}.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${CL_KERNELS_CPP} ${CL_KERNELS_H})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/src)
# Create the library
INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR})
FILE(GLOB OPENCL_KERNELS ${CL_SOURCE_DIR}/kernels/*.cl)
ADD_CUSTOM_COMMAND(OUTPUT ${CL_KERNELS_CPP} ${CL_KERNELS_H}
COMMAND ${CMAKE_COMMAND}
ARGS -D CL_SOURCE_DIR=${CL_SOURCE_DIR} -D CL_KERNELS_CPP=${CL_KERNELS_CPP} -D CL_KERNELS_H=${CL_KERNELS_H} -D CL_SOURCE_CLASS=${CL_SOURCE_CLASS} -P ${CMAKE_SOURCE_DIR}/platforms/opencl/EncodeCLFiles.cmake
DEPENDS ${OPENCL_KERNELS}
)
SET_SOURCE_FILES_PROPERTIES(${CL_KERNELS_CPP} ${CL_KERNELS_H} PROPERTIES GENERATED TRUE)
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}_d)
ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${OPENCL_LIBRARIES} ${PTHREADS_LIB})
TARGET_LINK_LIBRARIES(${SHARED_TARGET} debug ${OPENMM_LIBRARY_NAME}OpenCL_d optimized ${OPENMM_LIBRARY_NAME}OpenCL)
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_BUILDING_SHARED_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
SUBDIRS (tests)
#ifndef OPENMM_OPENCLRPMDKERNELFACTORY_H_
#define OPENMM_OPENCLRPMDKERNELFACTORY_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/KernelFactory.h"
namespace OpenMM {
/**
* This KernelFactory creates kernels for the OpenCL implementation of RPMDIntegrator.
*/
class OpenCLRpmdKernelFactory : public KernelFactory {
public:
KernelImpl* createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const;
};
} // namespace OpenMM
#endif /*OPENMM_OPENCLRPMDKERNELFACTORY_H_*/
/* -------------------------------------------------------------------------- *
* OpenMMAmoeba *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLRpmdKernelFactory.h"
#include "OpenCLRpmdKernels.h"
#include "openmm/internal/windowsExport.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
using namespace OpenMM;
extern "C" void registerPlatforms() {
}
extern "C" void registerKernelFactories() {
Platform& platform = Platform::getPlatformByName("OpenCL");
OpenCLRpmdKernelFactory* factory = new OpenCLRpmdKernelFactory();
platform.registerKernelFactory(IntegrateRPMDStepKernel::Name(), factory);
}
KernelImpl* OpenCLRpmdKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
OpenCLContext& cl = *static_cast<OpenCLPlatform::PlatformData*>(context.getPlatformData())->contexts[0];
if (name == IntegrateRPMDStepKernel::Name())
return new OpenCLIntegrateRPMDStepKernel(name, platform, cl);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLRpmdKernelSources.h"
using namespace OpenMM;
using namespace std;
#ifndef OPENMM_OPENCLRPMDKERNELSOURCES_H_
#define OPENMM_OPENCLRPMDKERNELSOURCES_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <string>
namespace OpenMM {
/**
* This class is a central holding place for the source code of OpenCL kernels.
* The CMake build script inserts declarations into it based on the .cl files in the
* kernels subfolder.
*/
class OpenCLRpmdKernelSources {
public:
@CL_FILE_DECLARATIONS@
};
} // namespace OpenMM
#endif /*OPENMM_OPENCLRPMDKERNELSOURCES_H_*/
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "OpenCLRpmdKernels.h"
#include "OpenCLRpmdKernelSources.h"
#include "openmm/internal/ContextImpl.h"
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLExpressionUtilities.h"
#include "OpenCLFFT3D.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
using namespace OpenMM;
using namespace std;
OpenCLIntegrateRPMDStepKernel::~OpenCLIntegrateRPMDStepKernel() {
if (forces != NULL)
delete forces;
if (positions != NULL)
delete positions;
if (velocities != NULL)
delete velocities;
}
void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
cl.getPlatformData().initializeContexts(system);
numCopies = integrator.getNumCopies();
numParticles = system.getNumParticles();
workgroupSize = numCopies;
while (workgroupSize < 128-numCopies)
workgroupSize += numCopies;
if (numCopies != OpenCLFFT3D::findLegalDimension(numCopies))
throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5.");
int paddedParticles = cl.getPaddedNumAtoms();
forces = new OpenCLArray<mm_float4>(cl, numCopies*paddedParticles, "rpmdForces");
positions = new OpenCLArray<mm_float4>(cl, numCopies*paddedParticles, "rpmdPositions");
velocities = new OpenCLArray<mm_float4>(cl, numCopies*paddedParticles, "rpmdVelocities");
cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans.
vector<mm_float4> temp(positions->getSize());
for (int i = 0; i < positions->getSize(); i++)
temp[i] = mm_float4(0, 0, 0, 0);
for (int i = 0; i < velocities->getSize(); i++)
temp[i] = mm_float4(0, 0, 0, 1);
velocities->upload(temp);
// Create kernels.
map<string, string> defines;
defines["NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(cl.getPaddedNumAtoms());
defines["NUM_COPIES"] = OpenCLExpressionUtilities::intToString(numCopies);
defines["HBAR"] = OpenCLExpressionUtilities::doubleToString(1.054571628e-34*AVOGADRO/(1000*1e-12));
defines["SCALE"] = OpenCLExpressionUtilities::doubleToString(1.0/sqrt((double) numCopies));
defines["M_PI"] = OpenCLExpressionUtilities::doubleToString(M_PI);
map<string, string> replacements;
replacements["FFT_Q_FORWARD"] = createFFT(numCopies, "q", true);
replacements["FFT_Q_BACKWARD"] = createFFT(numCopies, "q", false);
replacements["FFT_V_FORWARD"] = createFFT(numCopies, "v", true);
replacements["FFT_V_BACKWARD"] = createFFT(numCopies, "v", false);
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLRpmdKernelSources::rpmd, replacements), defines, "");
pileKernel = cl::Kernel(program, "applyPileThermostat");
stepKernel = cl::Kernel(program, "integrateStep");
velocitiesKernel = cl::Kernel(program, "advanceVelocities");
}
void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) {
const System& system = context.getSystem();
const int paddedParticles = cl.getPaddedNumAtoms();
OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
if (!hasInitializedKernel) {
hasInitializedKernel = true;
pileKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
pileKernel.setArg(1, 2*workgroupSize*sizeof(mm_float4), NULL);
pileKernel.setArg(2, 2*workgroupSize*sizeof(mm_float4), NULL);
pileKernel.setArg(3, numCopies*sizeof(mm_float2), NULL);
pileKernel.setArg<cl::Buffer>(4, integration.getRandom().getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(1, velocities->getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(2, forces->getDeviceBuffer());
stepKernel.setArg(3, 2*workgroupSize*sizeof(mm_float4), NULL);
stepKernel.setArg(4, 2*workgroupSize*sizeof(mm_float4), NULL);
stepKernel.setArg(5, 2*workgroupSize*sizeof(mm_float4), NULL);
stepKernel.setArg(6, numCopies*sizeof(mm_float2), NULL);
velocitiesKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
velocitiesKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
}
// Loop over copies and compute the force on each one.
if (!forcesAreValid) {
for (int i = 0; i < numCopies; i++) {
cl.getQueue().enqueueCopyBuffer(positions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(),
i*paddedParticles, 0, paddedParticles*sizeof(mm_float4));
context.calcForcesAndEnergy(true, false);
cl.getQueue().enqueueCopyBuffer(cl.getForce().getDeviceBuffer(), forces->getDeviceBuffer(),
0, i*paddedParticles, paddedParticles*sizeof(mm_float4));
}
}
// Apply the PILE-L thermostat.
const double dt = integrator.getStepSize();
pileKernel.setArg<cl_uint>(5, integration.prepareRandomNumbers(cl.getPaddedNumAtoms()));
pileKernel.setArg<cl_float>(6, dt);
pileKernel.setArg<cl_float>(7, integrator.getTemperature()*BOLTZ);
pileKernel.setArg<cl_float>(8, integrator.getFriction());
cl.executeKernel(pileKernel, numParticles*numCopies, workgroupSize);
// Update positions and velocities.
stepKernel.setArg<cl_float>(7, dt);
stepKernel.setArg<cl_float>(8, integrator.getTemperature()*BOLTZ);
cl.executeKernel(stepKernel, numParticles*numCopies, workgroupSize);
// Calculate forces based on the updated positions.
for (int i = 0; i < numCopies; i++) {
cl.getQueue().enqueueCopyBuffer(positions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(),
i*paddedParticles, 0, paddedParticles*sizeof(mm_float4));
context.calcForcesAndEnergy(true, false);
cl.getQueue().enqueueCopyBuffer(cl.getForce().getDeviceBuffer(), forces->getDeviceBuffer(),
0, i*paddedParticles, paddedParticles*sizeof(mm_float4));
}
// Update velocities.
velocitiesKernel.setArg<cl_float>(2, dt);
cl.executeKernel(velocitiesKernel, numParticles*numCopies, workgroupSize);
// Apply the PILE-L thermostat again.
cl.executeKernel(pileKernel, numParticles*numCopies, workgroupSize);
// Update the time and step count.
cl.setTime(cl.getTime()+dt);
cl.setStepCount(cl.getStepCount()+1);
}
void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) {
if (positions == NULL)
throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
if (pos.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()");
vector<mm_float4> posq(numParticles);
for (int i = 0; i < numParticles; i++)
posq[i] = mm_float4(pos[i][0], pos[i][1], pos[i][2], cl.getPosq()[i].w);
cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]);
}
void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& vel) {
if (velocities == NULL)
throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context");
if (vel.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()");
vector<mm_float4> velm(numParticles);
for (int i = 0; i < numParticles; i++)
velm[i] = mm_float4(vel[i][0], vel[i][1], vel[i][2], cl.getVelm()[i].w);
cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]);
}
void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) const {
int paddedParticles = cl.getPaddedNumAtoms();
cl.getQueue().enqueueCopyBuffer(positions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(),
copy*paddedParticles*sizeof(mm_float4), 0, paddedParticles*sizeof(mm_float4));
cl.getQueue().enqueueCopyBuffer(velocities->getDeviceBuffer(), cl.getVelm().getDeviceBuffer(),
copy*paddedParticles*sizeof(mm_float4), 0, paddedParticles*sizeof(mm_float4));
}
string OpenCLIntegrateRPMDStepKernel::createFFT(int size, const string& variable, bool forward) {
stringstream source;
int unfactored = size;
int stage = 0;
int L = size;
int m = 1;
string sign = (forward ? "1.0f" : "-1.0f");
source<<"{\n";
source<<"__local float4* real0 = "<<variable<<"real;\n";
source<<"__local float4* imag0 = "<<variable<<"imag;\n";
source<<"__local float4* real1 = &temp[blockStart];\n";
source<<"__local float4* imag1 = &temp[blockStart+get_local_size(0)];\n";
// Factor size, generating an appropriate block of code for each factor.
while (unfactored > 1) {
int input = stage%2;
int output = 1-input;
source<<"{\n";
if (unfactored%5 == 0) {
L = L/5;
source<<"// Pass "<<(stage+1)<<" (radix 5)\n";
source<<"if (indexInBlock < "<<(L*m)<<") {\n";
source<<"int i = indexInBlock;\n";
source<<"int j = i/"<<m<<";\n";
source<<"float4 c0r = real"<<input<<"[i];\n";
source<<"float4 c0i = imag"<<input<<"[i];\n";
source<<"float4 c1r = real"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"float4 c1i = imag"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"float4 c2r = real"<<input<<"[i+"<<(2*L*m)<<"];\n";
source<<"float4 c2i = imag"<<input<<"[i+"<<(2*L*m)<<"];\n";
source<<"float4 c3r = real"<<input<<"[i+"<<(3*L*m)<<"];\n";
source<<"float4 c3i = imag"<<input<<"[i+"<<(3*L*m)<<"];\n";
source<<"float4 c4r = real"<<input<<"[i+"<<(4*L*m)<<"];\n";
source<<"float4 c4i = imag"<<input<<"[i+"<<(4*L*m)<<"];\n";
source<<"float4 d0r = c1r+c4r;\n";
source<<"float4 d0i = c1i+c4i;\n";
source<<"float4 d1r = c2r+c3r;\n";
source<<"float4 d1i = c2i+c3i;\n";
source<<"float4 d2r = "<<OpenCLExpressionUtilities::doubleToString(sin(0.4*M_PI))<<"*(c1r-c4r);\n";
source<<"float4 d2i = "<<OpenCLExpressionUtilities::doubleToString(sin(0.4*M_PI))<<"*(c1i-c4i);\n";
source<<"float4 d3r = "<<OpenCLExpressionUtilities::doubleToString(sin(0.4*M_PI))<<"*(c2r-c3r);\n";
source<<"float4 d3i = "<<OpenCLExpressionUtilities::doubleToString(sin(0.4*M_PI))<<"*(c2i-c3i);\n";
source<<"float4 d4r = d0r+d1r;\n";
source<<"float4 d4i = d0i+d1i;\n";
source<<"float4 d5r = "<<OpenCLExpressionUtilities::doubleToString(0.25*sqrt(5.0))<<"*(d0r-d1r);\n";
source<<"float4 d5i = "<<OpenCLExpressionUtilities::doubleToString(0.25*sqrt(5.0))<<"*(d0i-d1i);\n";
source<<"float4 d6r = c0r-0.25f*d4r;\n";
source<<"float4 d6i = c0i-0.25f*d4i;\n";
source<<"float4 d7r = d6r+d5r;\n";
source<<"float4 d7i = d6i+d5i;\n";
source<<"float4 d8r = d6r-d5r;\n";
source<<"float4 d8i = d6i-d5i;\n";
string coeff = OpenCLExpressionUtilities::doubleToString(sin(0.2*M_PI)/sin(0.4*M_PI));
source<<"float4 d9r = "<<sign<<"*(d2i+"<<coeff<<"*d3i);\n";
source<<"float4 d9i = "<<sign<<"*(-d2r-"<<coeff<<"*d3r);\n";
source<<"float4 d10r = "<<sign<<"*("<<coeff<<"*d2i-d3i);\n";
source<<"float4 d10i = "<<sign<<"*(d3r-"<<coeff<<"*d2r);\n";
source<<"real"<<output<<"[i+4*j*"<<m<<"] = c0r+d4r;\n";
source<<"imag"<<output<<"[i+4*j*"<<m<<"] = c0i+d4i;\n";
source<<"real"<<output<<"[i+(4*j+1)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<size<<"/"<<(5*L)<<"], d7r+d9r, d7i+d9i);\n";
source<<"imag"<<output<<"[i+(4*j+1)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<size<<"/"<<(5*L)<<"], d7r+d9r, d7i+d9i);\n";
source<<"real"<<output<<"[i+(4*j+2)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<(2*size)<<"/"<<(5*L)<<"], d8r+d10r, d8i+d10i);\n";
source<<"imag"<<output<<"[i+(4*j+2)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<(2*size)<<"/"<<(5*L)<<"], d8r+d10r, d8i+d10i);\n";
source<<"real"<<output<<"[i+(4*j+3)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<(3*size)<<"/"<<(5*L)<<"], d8r-d10r, d8i-d10i);\n";
source<<"imag"<<output<<"[i+(4*j+3)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<(3*size)<<"/"<<(5*L)<<"], d8r-d10r, d8i-d10i);\n";
source<<"real"<<output<<"[i+(4*j+4)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<(4*size)<<"/"<<(5*L)<<"], d7r-d9r, d7i-d9i);\n";
source<<"imag"<<output<<"[i+(4*j+4)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<(4*size)<<"/"<<(5*L)<<"], d7r-d9r, d7i-d9i);\n";
source<<"}\n";
m = m*5;
unfactored /= 5;
}
else if (unfactored%4 == 0) {
L = L/4;
source<<"// Pass "<<(stage+1)<<" (radix 4)\n";
source<<"if (indexInBlock < "<<(L*m)<<") {\n";
source<<"int i = indexInBlock;\n";
source<<"int j = i/"<<m<<";\n";
source<<"float4 c0r = real"<<input<<"[i];\n";
source<<"float4 c0i = imag"<<input<<"[i];\n";
source<<"float4 c1r = real"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"float4 c1i = imag"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"float4 c2r = real"<<input<<"[i+"<<(2*L*m)<<"];\n";
source<<"float4 c2i = imag"<<input<<"[i+"<<(2*L*m)<<"];\n";
source<<"float4 c3r = real"<<input<<"[i+"<<(3*L*m)<<"];\n";
source<<"float4 c3i = imag"<<input<<"[i+"<<(3*L*m)<<"];\n";
source<<"float4 d0r = c0r+c2r;\n";
source<<"float4 d0i = c0i+c2i;\n";
source<<"float4 d1r = c0r-c2r;\n";
source<<"float4 d1i = c0i-c2i;\n";
source<<"float4 d2r = c1r+c3r;\n";
source<<"float4 d2i = c1i+c3i;\n";
source<<"float4 d3r = "<<sign<<"*(c1i-c3i);\n";
source<<"float4 d3i = "<<sign<<"*(c3r-c1r);\n";
source<<"real"<<output<<"[i+3*j*"<<m<<"] = d0r+d2r;\n";
source<<"imag"<<output<<"[i+3*j*"<<m<<"] = d0i+d2i;\n";
source<<"real"<<output<<"[i+(3*j+1)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<size<<"/"<<(4*L)<<"], d1r+d3r, d1i+d3i);\n";
source<<"imag"<<output<<"[i+(3*j+1)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<size<<"/"<<(4*L)<<"], d1r+d3r, d1i+d3i);\n";
source<<"real"<<output<<"[i+(3*j+2)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<(2*size)<<"/"<<(4*L)<<"], d0r-d2r, d0i-d2i);\n";
source<<"imag"<<output<<"[i+(3*j+2)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<(2*size)<<"/"<<(4*L)<<"], d0r-d2r, d0i-d2i);\n";
source<<"real"<<output<<"[i+(3*j+3)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<(3*size)<<"/"<<(4*L)<<"], d1r-d3r, d1i-d3i);\n";
source<<"imag"<<output<<"[i+(3*j+3)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<(3*size)<<"/"<<(4*L)<<"], d1r-d3r, d1i-d3i);\n";
source<<"}\n";
m = m*4;
unfactored /= 4;
}
else if (unfactored%3 == 0) {
L = L/3;
source<<"// Pass "<<(stage+1)<<" (radix 3)\n";
source<<"if (indexInBlock < "<<(L*m)<<") {\n";
source<<"int i = indexInBlock;\n";
source<<"int j = i/"<<m<<";\n";
source<<"float4 c0r = real"<<input<<"[i];\n";
source<<"float4 c0i = imag"<<input<<"[i];\n";
source<<"float4 c1r = real"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"float4 c1i = imag"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"float4 c2r = real"<<input<<"[i+"<<(2*L*m)<<"];\n";
source<<"float4 c2i = imag"<<input<<"[i+"<<(2*L*m)<<"];\n";
source<<"float4 d0r = c1r+c2r;\n";
source<<"float4 d0i = c1i+c2i;\n";
source<<"float4 d1r = c0r-0.5f*d0r;\n";
source<<"float4 d1i = c0i-0.5f*d0i;\n";
source<<"float4 d2r = "<<sign<<"*"<<OpenCLExpressionUtilities::doubleToString(sin(M_PI/3.0))<<"*(c1i-c2i);\n";
source<<"float4 d2i = "<<sign<<"*"<<OpenCLExpressionUtilities::doubleToString(sin(M_PI/3.0))<<"*(c2r-c1r);\n";
source<<"real"<<output<<"[i+2*j*"<<m<<"] = c0r+d0r;\n";
source<<"imag"<<output<<"[i+2*j*"<<m<<"] = c0i+d0i;\n";
source<<"real"<<output<<"[i+(2*j+1)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<size<<"/"<<(3*L)<<"], d1r+d2r, d1i+d2i);\n";
source<<"imag"<<output<<"[i+(2*j+1)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<size<<"/"<<(3*L)<<"], d1r+d2r, d1i+d2i);\n";
source<<"real"<<output<<"[i+(2*j+2)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<(2*size)<<"/"<<(3*L)<<"], d1r-d2r, d1i-d2i);\n";
source<<"imag"<<output<<"[i+(2*j+2)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<(2*size)<<"/"<<(3*L)<<"], d1r-d2r, d1i-d2i);\n";
source<<"}\n";
m = m*3;
unfactored /= 3;
}
else if (unfactored%2 == 0) {
L = L/2;
source<<"// Pass "<<(stage+1)<<" (radix 2)\n";
source<<"if (indexInBlock < "<<(L*m)<<") {\n";
source<<"int i = indexInBlock;\n";
source<<"int j = i/"<<m<<";\n";
source<<"float4 c0r = real"<<input<<"[i];\n";
source<<"float4 c0i = imag"<<input<<"[i];\n";
source<<"float4 c1r = real"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"float4 c1i = imag"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"real"<<output<<"[i+j*"<<m<<"] = c0r+c1r;\n";
source<<"imag"<<output<<"[i+j*"<<m<<"] = c0i+c1i;\n";
source<<"real"<<output<<"[i+(j+1)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<size<<"/"<<(2*L)<<"], c0r-c1r, c0i-c1i);\n";
source<<"imag"<<output<<"[i+(j+1)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<size<<"/"<<(2*L)<<"], c0r-c1r, c0i-c1i);\n";
source<<"}\n";
m = m*2;
unfactored /= 2;
}
else
throw OpenMMException("Illegal size for FFT: "+OpenCLExpressionUtilities::intToString(size));
source<<"barrier(CLK_LOCAL_MEM_FENCE);\n";
source<<"}\n";
++stage;
}
// Create the kernel.
if (stage%2 == 1) {
source<<variable<<"real[indexInBlock] = real1[indexInBlock];\n";
source<<variable<<"imag[indexInBlock] = imag1[indexInBlock];\n";
}
source<<"}\n";
return source.str();
}
#ifndef OPENCL_RPMD_KERNELS_H_
#define OPENCL_RPMD_KERNELS_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/RpmdKernels.h"
#include "OpenCLContext.h"
#include "OpenCLArray.h"
namespace OpenMM {
/**
* This kernel is invoked by RPMDIntegrator to take one time step, and to get and
* set the state of system copies.
*/
class OpenCLIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel {
public:
OpenCLIntegrateRPMDStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
IntegrateRPMDStepKernel(name, platform), cl(cl), hasInitializedKernel(false), forces(NULL), positions(NULL), velocities(NULL) {
}
~OpenCLIntegrateRPMDStepKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the RPMDIntegrator this kernel will be used for
*/
void initialize(const System& system, const RPMDIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the RPMDIntegrator this kernel is being used for
* @param forcesAreValid if the context has been modified since the last time step, this will be
* false to show that cached forces are invalid and must be recalculated
*/
void execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid);
/**
* Get the positions of all particles in one copy of the system.
*/
void setPositions(int copy, const std::vector<Vec3>& positions);
/**
* Get the velocities of all particles in one copy of the system.
*/
void setVelocities(int copy, const std::vector<Vec3>& velocities);
/**
* Copy positions and velocities for one copy into the context.
*/
void copyToContext(int copy, ContextImpl& context) const;
private:
std::string createFFT(int size, const std::string& variable, bool forward);
OpenCLContext& cl;
bool hasInitializedKernel;
int numCopies, numParticles, workgroupSize;
OpenCLArray<mm_float4>* forces;
OpenCLArray<mm_float4>* positions;
OpenCLArray<mm_float4>* velocities;
cl::Kernel pileKernel, stepKernel, velocitiesKernel;
};
} // namespace OpenMM
#endif /*OPENCL_RPMD_KERNELS_H_*/
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
float4 multiplyComplexRealPart(float2 c1, float4 c2r, float4 c2i) {
return c1.x*c2r-c1.y*c2i;
}
float4 multiplyComplexImagPart(float2 c1, float4 c2r, float4 c2i) {
return c1.x*c2i+c1.y*c2r;
}
/**
* Apply the PILE-L thermostat.
*/
__kernel void applyPileThermostat(__global float4* velm, __local float4* v, __local float4* temp, __local float2* w, __global float4* random, unsigned int randomIndex,
float dt, float kT, float friction) {
const int numBlocks = get_global_size(0)/NUM_COPIES;
const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES);
const int indexInBlock = get_local_id(0)-blockStart;
const float nkT = NUM_COPIES*kT;
const float twown = 2.0f*nkT/HBAR;
const float c1_0 = EXP(-0.5f*dt*friction);
const float c2_0 = SQRT(1.0f-c1_0*c1_0);
__local float4* vreal = &v[blockStart];
__local float4* vimag = &v[blockStart+get_local_size(0)];
if (get_local_id(0) < NUM_COPIES)
w[get_local_id(0)] = (float2) (cos(get_local_id(0)*2*M_PI/NUM_COPIES), sin(-get_local_id(0)*2*M_PI/NUM_COPIES));
barrier(CLK_LOCAL_MEM_FENCE);
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
float4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
float invMass = particleVelm.w;
float c3_0 = c2_0*SQRT(nkT*invMass);
// Forward FFT.
vreal[indexInBlock] = SCALE*particleVelm;
vimag[indexInBlock] = (float4) (0.0f, 0.0f, 0.0f, 0.0f);
barrier(CLK_GLOBAL_MEM_FENCE);
FFT_V_FORWARD
// Apply the thermostat.
if (indexInBlock == 0) {
// Apply a local Langevin thermostat to the centroid mode.
vreal[0].xyz = vreal[0].xyz*c1_0 + c3_0*random[randomIndex].xyz;
}
else {
// Use critical damping white noise for the remaining modes.
int k = (indexInBlock <= NUM_COPIES/2 ? indexInBlock : NUM_COPIES-indexInBlock);
const bool isCenter = (NUM_COPIES%2 == 0 && k == NUM_COPIES/2);
const float wk = twown*sin(k*M_PI/NUM_COPIES);
const float c1 = EXP(-wk*dt);
const float c2 = SQRT((1.0f-c1*c1)/2.0f) * (isCenter ? sqrt(2.0f) : 1.0f);
const float c3 = c2*SQRT(nkT*invMass);
float4 rand1 = c3*random[randomIndex+k];
float4 rand2 = (isCenter ? 0.0f : c3*random[randomIndex+NUM_COPIES-k]);
vreal[indexInBlock].xyz = c1*vreal[indexInBlock].xyz + rand1.xyz;
vimag[indexInBlock].xyz = c1*vimag[indexInBlock].xyz + (indexInBlock < NUM_COPIES/2 ? rand2.xyz : -rand2.xyz);
}
barrier(CLK_GLOBAL_MEM_FENCE);
// Inverse FFT.
FFT_V_BACKWARD
velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*vreal[indexInBlock].xyz;
randomIndex += numBlocks*NUM_COPIES;
}
}
/**
* Advance the positions and velocities.
*/
__kernel void integrateStep(__global float4* posq, __global float4* velm, __global float4* force,
__local float4* q, __local float4* v, __local float4* temp, __local float2* w, float dt, float kT) {
const int numBlocks = get_global_size(0)/NUM_COPIES;
const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES);
const int indexInBlock = get_local_id(0)-blockStart;
const float nkT = NUM_COPIES*kT;
const float twown = 2.0f*nkT/HBAR;
// Update velocities.
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
int index = particle+indexInBlock*PADDED_NUM_ATOMS;
float4 particleVelm = velm[index];
particleVelm.xyz += force[index].xyz*(0.5f*dt*particleVelm.w);
velm[index] = particleVelm;
}
// Evolve the free ring polymer by transforming to the frequency domain.
__local float4* qreal = &q[blockStart];
__local float4* qimag = &q[blockStart+get_local_size(0)];
__local float4* vreal = &v[blockStart];
__local float4* vimag = &v[blockStart+get_local_size(0)];
if (get_local_id(0) < NUM_COPIES)
w[get_local_id(0)] = (float2) (cos(get_local_id(0)*2*M_PI/NUM_COPIES), sin(-get_local_id(0)*2*M_PI/NUM_COPIES));
barrier(CLK_LOCAL_MEM_FENCE);
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
float4 particlePosq = posq[particle+indexInBlock*PADDED_NUM_ATOMS];
float4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
float invMass = particleVelm.w;
// Forward FFT.
qreal[indexInBlock] = SCALE*particlePosq;
qimag[indexInBlock] = (float4) (0.0f, 0.0f, 0.0f, 0.0f);
vreal[indexInBlock] = SCALE*particleVelm;
vimag[indexInBlock] = (float4) (0.0f, 0.0f, 0.0f, 0.0f);
barrier(CLK_GLOBAL_MEM_FENCE);
FFT_Q_FORWARD
FFT_V_FORWARD
// Apply the thermostat.
if (indexInBlock == 0) {
qreal[0].xyz += vreal[0].xyz*dt;
qimag[0].xyz += vimag[0].xyz*dt;
}
else {
const float wk = twown*sin(indexInBlock*M_PI/NUM_COPIES);
const float wt = wk*dt;
const float coswt = cos(wt);
const float sinwt = sin(wt);
const float wm = wk/particleVelm.w;
const float4 vprimereal = vreal[indexInBlock]*coswt - qreal[indexInBlock]*(wk*sinwt); // Advance velocity from t to t+dt
const float4 vprimeimag = vimag[indexInBlock]*coswt - qimag[indexInBlock]*(wk*sinwt);
qreal[indexInBlock] = vreal[indexInBlock]*(sinwt/wk) + qreal[indexInBlock]*coswt; // Advance position from t to t+dt
qimag[indexInBlock] = vimag[indexInBlock]*(sinwt/wk) + qimag[indexInBlock]*coswt;
vreal[indexInBlock] = vprimereal;
vimag[indexInBlock] = vprimeimag;
}
barrier(CLK_GLOBAL_MEM_FENCE);
// Inverse FFT.
FFT_Q_BACKWARD
FFT_V_BACKWARD
posq[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*qreal[indexInBlock].xyz;
velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*vreal[indexInBlock].xyz;
}
}
/**
* Advance the velocities by a half step.
*/
__kernel void advanceVelocities(__global float4* velm, __global float4* force, float dt) {
const int numBlocks = get_global_size(0)/NUM_COPIES;
const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES);
const int indexInBlock = get_local_id(0)-blockStart;
// Update velocities.
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
int index = particle+indexInBlock*PADDED_NUM_ATOMS;
float4 particleVelm = velm[index];
particleVelm.xyz += force[index].xyz*(0.5f*dt*particleVelm.w);
velm[index] = particleVelm;
}
}
#
# Testing
#
ENABLE_TESTING()
INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR})
# Automatically create tests using files named "Test*.cpp"
FILE(GLOB TEST_PROGS "*Test*.cpp")
FOREACH(TEST_PROG ${TEST_PROGS})
GET_FILENAME_COMPONENT(TEST_ROOT ${TEST_PROG} NAME_WE)
# Link with shared library
ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG})
TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_TARGET})
ADD_TEST(${TEST_ROOT} ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT})
ENDFOREACH(TEST_PROG ${TEST_PROGS})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment