Commit abb8cb4b authored by Peter Eastman's avatar Peter Eastman
Browse files

Very early beginnings of new CUDA platform

parent 18501459
#---------------------------------------------------
# OpenMM CUDA Platform
#
# Creates OpenMM library, base name=OpenMMCUDA.
# Default libraries are shared & optimized. Variants
# are created for static (_static) and debug (_d).
#
# Windows:
# OpenMMCUDA[_d].dll
# OpenMMCUDA[_d].lib
# OpenMMCUDA_static[_d].lib
# Unix:
# libOpenMMCUDA[_d].so
# libOpenMMCUDA_static[_d].a
#----------------------------------------------------
IF (APPLE)
SET (CMAKE_OSX_DEPLOYMENT_TARGET "10.6")
SET (CMAKE_OSX_SYSROOT "/Developer/SDKs/MacOSX10.6.sdk")
ENDIF (APPLE)
set(OPENMM_BUILD_CUDA_TESTS TRUE CACHE BOOL "Whether to build CUDA test cases")
if(OPENMM_BUILD_CUDA_TESTS)
SUBDIRS (tests)
endif(OPENMM_BUILD_CUDA_TESTS)
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET(OPENMM_SOURCE_SUBDIRS .)
# Collect up information about the version of the OpenMM library we're building
# and make it available to the code so it can be built into the binaries.
SET(OPENMMCUDA_LIBRARY_NAME OpenMMCUDA)
SET(SHARED_TARGET ${OPENMMCUDA_LIBRARY_NAME})
SET(STATIC_TARGET ${OPENMMCUDA_LIBRARY_NAME}_static)
# Ensure that debug libraries have "_d" appended to their names.
# CMake gets this right on Windows automatically with this definition.
IF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
SET(CMAKE_DEBUG_POSTFIX "_d" CACHE INTERNAL "" FORCE)
ENDIF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
# But on Unix or Cygwin we have to add the suffix manually
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(SHARED_TARGET ${SHARED_TARGET}_d)
SET(STATIC_TARGET ${STATIC_TARGET}_d)
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
# These are all the places to search for header files which are
# to be part of the API.
SET(API_INCLUDE_DIRS) # start empty
FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
# append
SET(API_INCLUDE_DIRS ${API_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include
${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include/internal)
ENDFOREACH(subdir)
# We'll need both *relative* path names, starting with their API_INCLUDE_DIRS,
# and absolute pathnames.
SET(API_REL_INCLUDE_FILES) # start these out empty
SET(API_ABS_INCLUDE_FILES)
FOREACH(dir ${API_INCLUDE_DIRS})
FILE(GLOB fullpaths ${dir}/*.h) # returns full pathnames
SET(API_ABS_INCLUDE_FILES ${API_ABS_INCLUDE_FILES} ${fullpaths})
FOREACH(pathname ${fullpaths})
GET_FILENAME_COMPONENT(filename ${pathname} NAME)
SET(API_REL_INCLUDE_FILES ${API_REL_INCLUDE_FILES} ${dir}/${filename})
ENDFOREACH(pathname)
ENDFOREACH(dir)
# collect up source files
SET(SOURCE_FILES) # empty
SET(SOURCE_INCLUDE_FILES)
FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
FILE(GLOB_RECURSE src_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.c)
FILE(GLOB incl_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${src_files}) #append
SET(SOURCE_INCLUDE_FILES ${SOURCE_INCLUDE_FILES} ${incl_files})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
ENDFOREACH(subdir)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
# Set variables needed for encoding kernel sources into a C++ class
SET(CUDA_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
SET(CUDA_SOURCE_CLASS CudaKernelSources)
SET(CUDA_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/${CUDA_SOURCE_CLASS}.cpp)
SET(CUDA_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${CUDA_SOURCE_CLASS}.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/src)
SUBDIRS (sharedTarget)
FILE(GLOB CUDA_KERNELS ${CUDA_SOURCE_DIR}/kernels/*.cu)
SET(CUDA_FILE_DECLARATIONS)
SET(CUDA_FILE_DEFINITIONS)
CONFIGURE_FILE(${CUDA_SOURCE_DIR}/${CUDA_SOURCE_CLASS}.cpp.in ${CUDA_KERNELS_CPP})
FOREACH(file ${CUDA_KERNELS})
# Load the file contents and process it.
FILE(STRINGS ${file} file_content NEWLINE_CONSUME)
# Replace all backslashes by double backslashes as they are being put in a C string.
# Be careful not to replace the backslash before a semicolon as that is the CMAKE
# internal escaping of a semicolon to prevent it from acting as a list seperator.
STRING(REGEX REPLACE "\\\\([^;])" "\\\\\\\\\\1" file_content "${file_content}")
# Escape double quotes as being put in a C string.
STRING(REPLACE "\"" "\\\"" file_content "${file_content}")
# Split in separate C strings for each line.
STRING(REPLACE "\n" "\\n\"\n\"" file_content "${file_content}")
# Determine a name for the variable that will contain this file's contents
FILE(RELATIVE_PATH filename ${CUDA_SOURCE_DIR}/kernels ${file})
STRING(LENGTH ${filename} filename_length)
MATH(EXPR filename_length ${filename_length}-3)
STRING(SUBSTRING ${filename} 0 ${filename_length} variable_name)
# Record the variable declaration and definition.
SET(CUDA_FILE_DECLARATIONS ${CUDA_FILE_DECLARATIONS}static\ const\ std::string\ ${variable_name};\n)
FILE(APPEND ${CUDA_KERNELS_CPP} const\ string\ ${CUDA_SOURCE_CLASS}::${variable_name}\ =\ \"${file_content}\"\;\n)
ENDFOREACH(file)
CONFIGURE_FILE(${CUDA_SOURCE_DIR}/${CUDA_SOURCE_CLASS}.h.in ${CUDA_KERNELS_H})
#ifndef OPENMM_CUDAKERNELFACTORY_H_
#define OPENMM_CUDAKERNELFACTORY_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/KernelFactory.h"
namespace OpenMM {
/**
* This KernelFactory creates all kernels for CudaPlatform.
*/
class CudaKernelFactory : public KernelFactory {
public:
KernelImpl* createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const;
};
} // namespace OpenMM
#endif /*OPENMM_CUDAKERNELFACTORY_H_*/
#ifndef OPENMM_CUDAPLATFORM_H_
#define OPENMM_CUDAPLATFORM_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/Platform.h"
#include "openmm/System.h"
namespace OpenMM {
class CudaContext;
/**
* This Platform subclass uses CUDA implementations of the OpenMM kernels.
*/
class OPENMM_EXPORT CudaPlatform : public Platform {
public:
class PlatformData;
CudaPlatform();
const std::string& getName() const {
static const std::string name = "CUDA";
return name;
}
double getSpeed() const {
return 100;
}
bool supportsDoublePrecision() const;
const std::string& getPropertyValue(const Context& context, const std::string& property) const;
void setPropertyValue(Context& context, const std::string& property, const std::string& value) const;
void contextCreated(ContextImpl& context, const std::map<std::string, std::string>& properties) const;
void contextDestroyed(ContextImpl& context) const;
/**
* This is the name of the parameter for selecting which CUDA device or devices to use.
*/
static const std::string& CudaDeviceIndex() {
static const std::string key = "CudaDeviceIndex";
return key;
}
/**
* This is the name of the parameter for selecting whether CUDA should sync or spin loop while waiting for results.
*/
static const std::string& CudaUseBlockingSync() {
static const std::string key = "CudaUseBlockingSync";
return key;
}
/**
* This is the name of the parameter for selecting what numerical precision to use.
*/
static const std::string& CudaPrecision() {
static const std::string key = "CudaPrecision";
return key;
}
/**
* This is the name of the parameter for specifying the path to the CUDA compiler.
*/
static const std::string& CudaCompiler() {
static const std::string key = "CudaCompiler";
return key;
}
/**
* This is the name of the parameter for specifying the path to the directory for creating temporary files.
*/
static const std::string& CudaTempDirectory() {
static const std::string key = "CudaTempDirectory";
return key;
}
};
class OPENMM_EXPORT CudaPlatform::PlatformData {
public:
PlatformData(const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty,
const std::string& compilerProperty, const std::string& tempProperty);
~PlatformData();
void initializeContexts(const System& system);
void syncContexts();
std::vector<CudaContext*> contexts;
std::vector<double> contextEnergy;
bool removeCM;
int cmMotionFrequency;
int stepCount, computeForceCount;
double time;
std::map<std::string, std::string> propertyValues;
};
} // namespace OpenMM
#endif /*OPENMM_CUDAPLATFORM_H_*/
#
# Include CUDA related files.
#
INCLUDE(FindCUDA)
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
FILE(GLOB CUDA_KERNELS ${CUDA_SOURCE_DIR}/kernels/*.cu)
ADD_CUSTOM_COMMAND(OUTPUT ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H}
COMMAND ${CMAKE_COMMAND}
ARGS -D CUDA_SOURCE_DIR=${CUDA_SOURCE_DIR} -D CUDA_KERNELS_CPP=${CUDA_KERNELS_CPP} -D CUDA_KERNELS_H=${CUDA_KERNELS_H} -D CUDA_SOURCE_CLASS=${CUDA_SOURCE_CLASS} -P ${CMAKE_CURRENT_SOURCE_DIR}/../EncodeCUDAFiles.cmake
DEPENDS ${CUDA_KERNELS}
)
SET_SOURCE_FILES_PROPERTIES(${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H} PROPERTIES GENERATED TRUE)
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}_d)
ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${CUDA_LIBRARIES} ${PTHREADS_LIB})
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_BUILDING_SHARED_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaArray.h"
#include <iostream>
#include <sstream>
#include <vector>
using namespace OpenMM;
CudaArray::CudaArray(int size, int elementSize, const std::string& name) :
size(size), elementSize(elementSize), name(name), ownsMemory(true) {
CUresult result = cuMemAlloc(&pointer, size*elementSize);
if (result != CUDA_SUCCESS) {
std::stringstream str;
str<<"Error creating array "<<name<<": "<<result;
throw OpenMMException(str.str());
}
}
CudaArray::~CudaArray() {
if (ownsMemory) {
CUresult result = cuMemFree(pointer);
if (result != CUDA_SUCCESS) {
std::stringstream str;
str<<"Error deleting array "<<name<<": "<<result;
throw OpenMMException(str.str());
}
}
}
void CudaArray::upload(void* data, bool blocking) {
CUresult result;
if (blocking)
result = cuMemcpyHtoD(pointer, data, size*elementSize);
else
result = cuMemcpyHtoDAsync(pointer, data, size*elementSize, 0);
if (result != CUDA_SUCCESS) {
std::stringstream str;
str<<"Error uploading array "<<name<<": "<<result;
throw OpenMMException(str.str());
}
}
void CudaArray::download(void* data, bool blocking) const {
CUresult result;
if (blocking)
result = cuMemcpyDtoH(data, pointer, size*elementSize);
else
result = cuMemcpyDtoHAsync(data, pointer, size*elementSize, 0);
if (result != CUDA_SUCCESS) {
std::stringstream str;
str<<"Error downloading array "<<name<<": "<<result;
throw OpenMMException(str.str());
}
}
#ifndef OPENMM_CUDAARRAY_H_
#define OPENMM_CUDAARRAY_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/OpenMMException.h"
#include <cuda.h>
#include <iostream>
#include <sstream>
#include <vector>
namespace OpenMM {
/**
* This class encapsulates a block of CUDA device memory. It provides a simplified API
* for working with it and for copying data to and from device memory.
*/
class CudaArray {
public:
/**
* Create a CudaArray object. The object is allocated on the heap with the "new" operator.
* The template argument is the data type of each array element.
*
* @param size the number of elements in the array
* @param name the name of the array
*/
template <class T>
static CudaArray* create(int size, const std::string& name) {
return new CudaArray(size, sizeof(T), name);
}
/**
* Create a CudaArray object.
*
* @param size the number of elements in the array
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
CudaArray(int size, int elementSize, const std::string& name);
~CudaArray();
/**
* Get the number of elements in the array.
*/
int getSize() const {
return size;
}
/**
* Get the size of each element in bytes.
*/
int getElementSize() const {
return elementSize;
}
/**
* Get the name of the array.
*/
const std::string& getName() const {
return name;
}
/**
* Get a pointer to the device memory.
*/
CUdeviceptr getDevicePointer() {
return pointer;
}
/**
* Copy the values in a vector to the device memory.
*/
template <class T>
void upload(std::vector<T>& data) {
if (sizeof(T) != elementSize || data.size() != size)
throw OpenMMException("Error uploading array "+name+": The specified vector does not match the size of the array");
upload(&data[0], true);
}
/**
* Copy the values in the Buffer to a vector.
*/
template <class T>
void download(std::vector<T>& data) const {
if (sizeof(T) != elementSize)
throw OpenMMException("Error downloading array "+name+": The specified vector has the wrong element size");
if (data.size() != size)
data.resize(size);
download(&data[0], true);
}
/**
* Copy the values in an array to the device memory.
*
* @param data the data to copy
* @param blocking if true, this call will block until the transfer is complete. If false,
* the source array must be in page-locked memory.
*/
void upload(void* data, bool blocking = true);
/**
* Copy the values in the device memory to an array.
*
* @param data the array to copy the memory to
* @param blocking if true, this call will block until the transfer is complete. If false,
* the destination array must be in page-locked memory.
*/
void download(void* data, bool blocking = true) const;
private:
CUdeviceptr pointer;
int size, elementSize;
bool ownsMemory;
std::string name;
};
} // namespace OpenMM
#endif /*OPENMM_CUDAARRAY_H_*/
This diff is collapsed.
#ifndef OPENMM_CUDACONTEXT_H_
#define OPENMM_CUDACONTEXT_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <map>
#include <queue>
#include <string>
#include <pthread.h>
#define __CL_ENABLE_EXCEPTIONS
#ifdef _MSC_VER
// Prevent Windows from defining macros that interfere with other code.
#define NOMINMAX
#endif
#include <cuda.h>
#include <builtin_types.h>
#include <vector_functions.h>
#include "openmm/internal/windowsExport.h"
#include "CudaPlatform.h"
namespace OpenMM {
class CudaArray;
class CudaForceInfo;
class CudaIntegrationUtilities;
class CudaBondedUtilities;
class CudaNonbondedUtilities;
class System;
/**
* This class contains the information associated with a Context by the CUDA Platform. Each CudaContext is
* specific to a particular device, and manages data structures and kernels for that device. When running a simulation
* in parallel on multiple devices, there is a separate CudaContext for each one. The list of all contexts is
* stored in the CudaPlatform::PlatformData.
* <p>
* In addition, a worker thread is created for each CudaContext. This is used for parallel computations, so that
* blocking calls to one device will not block other devices. When only a single device is being used, the worker
* thread is not used and calculations are performed on the main application thread.
*/
class OPENMM_EXPORT CudaContext {
public:
class WorkTask;
class WorkThread;
class ReorderListener;
static const int ThreadBlockSize;
static const int TileSize;
CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const std::string& precision,
const std::string& compiler, const std::string& tempDir, CudaPlatform::PlatformData& platformData);
~CudaContext();
// /**
// * This is called to initialize internal data structures after all Forces in the system
// * have been initialized.
// */
// void initialize();
/**
* Add a CudaForce to this context.
*/
void addForce(CudaForceInfo* force);
/**
* Get the CUcontext associated with this object.
*/
CUcontext getContext() {
return context;
}
/**
* Get the CUdevice associated with this object.
*/
CUdevice getDevice() {
return device;
}
/**
* Get the index of the CUdevice associated with this object.
*/
int getDeviceIndex() {
return deviceIndex;
}
/**
* Get the PlatformData object this context is part of.
*/
CudaPlatform::PlatformData& getPlatformData() {
return platformData;
}
/**
* Get the index of this context in the list stored in the PlatformData.
*/
int getContextIndex() const {
return contextIndex;
}
/**
* Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
*/
CudaArray& getPosq() {
return *posq;
}
/**
* Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom.
*/
CudaArray& getVelm() {
return *velm;
}
// /**
// * Get the array which contains the force on each atom.
// */
// CudaArray<mm_float4>& getForce() {
// return *force;
// }
// /**
// * Get the array which contains the buffers in which forces are computed.
// */
// CudaArray<mm_float4>& getForceBuffers() {
// return *forceBuffers;
// }
// /**
// * Get the array which contains a contribution to each force represented as 64 bit fixed point.
// */
// CudaArray<cl_long>& getLongForceBuffer() {
// return *longForceBuffer;
// }
// /**
// * Get the array which contains the buffer in which energy is computed.
// */
// CudaArray<cl_float>& getEnergyBuffer() {
// return *energyBuffer;
// }
// /**
// * Get the array which contains the index of each atom.
// */
// CudaArray<cl_int>& getAtomIndex() {
// return *atomIndex;
// }
// /**
// * Get the number of cells by which the positions are offset.
// */
// std::vector<mm_int4>& getPosCellOffsets() {
// return posCellOffsets;
// }
/**
* Replace all occurrences of a list of substrings.
*
* @param input a string to process
* @param replacements a set of strings that should be replaced with new strings wherever they appear in the input string
* @return a new string produced by performing the replacements
*/
std::string replaceStrings(const std::string& input, const std::map<std::string, std::string>& replacements) const;
/**
* Create a CUDA module from source code.
*
* @param source the source code of the module
* @param optimizationFlags the optimization flags to pass to the CUDA compiler. If this is
* omitted, a default set of options will be used
*/
CUmodule createModule(const std::string source, const char* optimizationFlags = NULL);
/**
* Create a CUDA module from source code.
*
* @param source the source code of the module
* @param defines a set of preprocessor definitions (name, value) to define when compiling the program
* @param optimizationFlags the optimization flags to pass to the CUDA compiler. If this is
* omitted, a default set of options will be used
*/
CUmodule createModule(const std::string source, const std::map<std::string, std::string>& defines, const char* optimizationFlags = NULL);
// /**
// * Execute a kernel.
// *
// * @param kernel the kernel to execute
// * @param workUnits the maximum number of work units that should be used
// * @param blockSize the size of each thread block to use
// */
// void executeKernel(cl::Kernel& kernel, int workUnits, int blockSize = -1);
// /**
// * Set all elements of an array to 0.
// */
// void clearBuffer(CudaArray<float>& array);
// /**
// * Set all elements of an array to 0.
// */
// void clearBuffer(CudaArray<mm_float4>& array);
// /**
// * Set all elements of an array to 0.
// *
// * @param memory the Memory to clear
// * @param size the number of float elements in the buffer
// */
// void clearBuffer(cl::Memory& memory, int size);
// /**
// * Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
// *
// * @param memory the Memory to clear
// * @param size the number of float elements in the buffer
// */
// void addAutoclearBuffer(cl::Memory& memory, int size);
// /**
// * Clear all buffers that have been registered with addAutoclearBuffer().
// */
// void clearAutoclearBuffers();
// /**
// * Given a collection of buffers packed into an array, sum them and store
// * the sum in the first buffer.
// *
// * @param array the array containing the buffers to reduce
// * @param numBuffers the number of buffers packed into the array
// */
// void reduceBuffer(CudaArray<mm_float4>& array, int numBuffers);
// /**
// * Sum the buffesr containing forces.
// */
// void reduceForces();
// /**
// * Get the current simulation time.
// */
// double getTime() {
// return time;
// }
// /**
// * Set the current simulation time.
// */
// void setTime(double t) {
// time = t;
// }
// /**
// * Get the number of integration steps that have been taken.
// */
// int getStepCount() {
// return stepCount;
// }
// /**
// * Set the number of integration steps that have been taken.
// */
// void setStepCount(int steps) {
// stepCount = steps;
// }
// /**
// * Get the number of times forces or energy has been computed.
// */
// int getComputeForceCount() {
// return computeForceCount;
// }
// /**
// * Set the number of times forces or energy has been computed.
// */
// void setComputeForceCount(int count) {
// computeForceCount = count;
// }
// /**
// * Get the number of atoms.
// */
// int getNumAtoms() const {
// return numAtoms;
// }
// /**
// * Get the number of atoms, rounded up to a multiple of TileSize. This is the actual size of
// * most arrays with one element per atom.
// */
// int getPaddedNumAtoms() const {
// return paddedNumAtoms;
// }
// /**
// * Get the number of blocks of TileSize atoms.
// */
// int getNumAtomBlocks() const {
// return numAtomBlocks;
// }
// /**
// * Get the standard number of thread blocks to use when executing kernels.
// */
// int getNumThreadBlocks() const {
// return numThreadBlocks;
// }
// /**
// * Get the number of force buffers.
// */
// int getNumForceBuffers() const {
// return numForceBuffers;
// }
// /**
// * Get the SIMD width of the device being used.
// */
// int getSIMDWidth() const {
// return simdWidth;
// }
// /**
// * Get whether the device being used supports 64 bit atomic operations on global memory.
// */
// bool getSupports64BitGlobalAtomics() {
// return supports64BitGlobalAtomics;
// }
// /**
// * Get whether the device being used supports double precision math.
// */
// bool getSupportsDoublePrecision() {
// return supportsDoublePrecision;
// }
// /**
// * Get the size of the periodic box.
// */
// mm_float4 getPeriodicBoxSize() const {
// return periodicBoxSize;
// }
// /**
// * Set the size of the periodic box.
// */
// void setPeriodicBoxSize(double xsize, double ysize, double zsize) {
// periodicBoxSize = mm_float4((float) xsize, (float) ysize, (float) zsize, 0);
// invPeriodicBoxSize = mm_float4((float) (1.0/xsize), (float) (1.0/ysize), (float) (1.0/zsize), 0);
// }
// /**
// * Get the inverse of the size of the periodic box.
// */
// mm_float4 getInvPeriodicBoxSize() const {
// return invPeriodicBoxSize;
// }
// /**
// * Get the CudaIntegrationUtilities for this context.
// */
// CudaIntegrationUtilities& getIntegrationUtilities() {
// return *integration;
// }
// /**
// * Get the CudaBondedUtilities for this context.
// */
// CudaBondedUtilities& getBondedUtilities() {
// return *bonded;
// }
// /**
// * Get the CudaNonbondedUtilities for this context.
// */
// CudaNonbondedUtilities& getNonbondedUtilities() {
// return *nonbonded;
// }
// /**
// * Get the thread used by this context for executing parallel computations.
// */
// WorkThread& getWorkThread() {
// return *thread;
// }
// /**
// * Get whether atoms were reordered during the most recent force/energy computation.
// */
// bool getAtomsWereReordered() const {
// return atomsWereReordered;
// }
// /**
// * Set whether atoms were reordered during the most recent force/energy computation.
// */
// void setAtomsWereReordered(bool wereReordered) {
// atomsWereReordered = wereReordered;
// }
// /**
// * Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
// * together in the arrays.
// *
// * @param enforcePeriodic if true, the atom positions may be altered to enforce periodic boundary conditions
// */
// void reorderAtoms(bool enforcePeriodic);
// /**
// * Add a listener that should be called whenever atoms get reordered. The CudaContext
// * assumes ownership of the object, and deletes it when the context itself is deleted.
// */
// void addReorderListener(ReorderListener* listener);
// /**
// * Get the list of ReorderListeners.
// */
// std::vector<ReorderListener*>& getReorderListeners() {
// return reorderListeners;
// }
// /**
// * Mark that the current molecule definitions (and hence the atom order) may be invalid.
// * This should be called whenever force field parameters change. It will cause the definitions
// * and order to be revalidated the next to reorderAtoms() is called.
// */
// void invalidateMolecules();
// /**
// * Get whether the current molecule definitions are valid.
// */
// bool getMoleculesAreInvalid() {
// return moleculesInvalid;
// }
private:
struct Molecule;
struct MoleculeGroup;
class VirtualSiteInfo;
// void findMoleculeGroups();
// static void tagAtomsInMolecule(int atom, int molecule, std::vector<int>& atomMolecule, std::vector<std::vector<int> >& atomBonds);
// /**
// * Ensure that all molecules marked as "identical" really are identical. This should be
// * called whenever force field parameters change. If necessary, it will rebuild the list
// * of molecules and resort the atoms.
// */
// void validateMolecules();
static bool hasInitializedCuda;
const System& system;
double time;
CudaPlatform::PlatformData& platformData;
int deviceIndex;
int contextIndex;
int stepCount;
int computeForceCount;
int numAtoms;
int paddedNumAtoms;
int numAtomBlocks;
int numThreadBlocks;
// int numForceBuffers;
// int simdWidth;
bool useBlockingSync, useDoublePrecision, accumulateInDouble, contextIsValid, atomsWereReordered, moleculesInvalid;
std::string compiler, tempDir, gpuArchitecture;
float4 periodicBoxSize;
float4 invPeriodicBoxSize;
std::string defaultOptimizationOptions;
std::map<std::string, std::string> compilationDefines;
CUcontext context;
CUdevice device;
CUfunction clearBufferKernel;
CUfunction clearTwoBuffersKernel;
CUfunction clearThreeBuffersKernel;
CUfunction clearFourBuffersKernel;
CUfunction clearFiveBuffersKernel;
CUfunction clearSixBuffersKernel;
CUfunction reduceFloat4Kernel;
CUfunction reduceForcesKernel;
std::vector<CudaForceInfo*> forces;
std::vector<Molecule> molecules;
std::vector<MoleculeGroup> moleculeGroups;
std::vector<int4> posCellOffsets;
CudaArray* posq;
CudaArray* velm;
// CudaArray<mm_float4>* force;
// CudaArray<mm_float4>* forceBuffers;
// CudaArray<cl_long>* longForceBuffer;
// CudaArray<cl_float>* energyBuffer;
// CudaArray<cl_int>* atomIndex;
// std::vector<cl::Memory*> autoclearBuffers;
// std::vector<int> autoclearBufferSizes;
std::vector<ReorderListener*> reorderListeners;
// CudaIntegrationUtilities* integration;
// CudaBondedUtilities* bonded;
// CudaNonbondedUtilities* nonbonded;
WorkThread* thread;
};
struct CudaContext::Molecule {
std::vector<int> atoms;
std::vector<int> constraints;
std::vector<std::vector<int> > groups;
};
struct CudaContext::MoleculeGroup {
std::vector<int> atoms;
std::vector<int> instances;
std::vector<int> offsets;
};
/**
* This abstract class defines a task to be executed on the worker thread.
*/
class CudaContext::WorkTask {
public:
virtual void execute() = 0;
virtual ~WorkTask() {
}
};
class CudaContext::WorkThread {
public:
struct ThreadData;
WorkThread();
~WorkThread();
/**
* Request that a task be executed on the worker thread. The argument should have been allocated on the
* heap with the "new" operator. After its execute() method finishes, the object will be deleted automatically.
*/
void addTask(CudaContext::WorkTask* task);
/**
* Get whether the worker thread is idle, waiting for a task to be added.
*/
bool isWaiting();
/**
* Get whether the worker thread has exited.
*/
bool isFinished();
/**
* Block until all tasks have finished executing and the worker thread is idle.
*/
void flush();
private:
std::queue<CudaContext::WorkTask*> tasks;
bool waiting, finished;
pthread_mutex_t queueLock;
pthread_cond_t waitForTaskCondition, queueEmptyCondition;
pthread_t thread;
};
/**
* This abstract class defines a function to be executed whenever atoms get reordered.
* Objects that need to know when reordering happens should create a reorderListener
* and register it by calling addReorderListener().
*/
class CudaContext::ReorderListener {
public:
virtual void execute() = 0;
virtual ~ReorderListener() {
}
};
} // namespace OpenMM
#endif /*OPENMM_CUDACONTEXT_H_*/
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaExpressionUtilities.h"
#include "openmm/OpenMMException.h"
#include "openmm/internal/SplineFitter.h"
#include "lepton/Operation.h"
using namespace OpenMM;
using namespace Lepton;
using namespace std;
string CudaExpressionUtilities::doubleToString(double value) {
stringstream s;
s.precision(8);
s << scientific << value << "f";
return s.str();
}
string CudaExpressionUtilities::intToString(int value) {
stringstream s;
s << value;
return s.str();
}
string CudaExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const map<string, string>& variables,
const vector<pair<string, string> >& functions, const string& prefix, const string& functionParams, const string& tempType) {
vector<pair<ExpressionTreeNode, string> > variableNodes;
for (map<string, string>::const_iterator iter = variables.begin(); iter != variables.end(); ++iter)
variableNodes.push_back(make_pair(ExpressionTreeNode(new Operation::Variable(iter->first)), iter->second));
return createExpressions(expressions, variableNodes, functions, prefix, functionParams, tempType);
}
string CudaExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const vector<pair<ExpressionTreeNode, string> >& variables,
const vector<pair<string, string> >& functions, const string& prefix, const string& functionParams, const string& tempType) {
stringstream out;
vector<ParsedExpression> allExpressions;
for (map<string, ParsedExpression>::const_iterator iter = expressions.begin(); iter != expressions.end(); ++iter)
allExpressions.push_back(iter->second);
vector<pair<ExpressionTreeNode, string> > temps = variables;
for (map<string, ParsedExpression>::const_iterator iter = expressions.begin(); iter != expressions.end(); ++iter) {
processExpression(out, iter->second.getRootNode(), temps, functions, prefix, functionParams, allExpressions, tempType);
out << iter->first << getTempName(iter->second.getRootNode(), temps) << ";\n";
}
return out.str();
}
void CudaExpressionUtilities::processExpression(stringstream& out, const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, string> >& temps,
const vector<pair<string, string> >& functions, const string& prefix, const string& functionParams, const vector<ParsedExpression>& allExpressions, const string& tempType) {
for (int i = 0; i < (int) temps.size(); i++)
if (temps[i].first == node)
return;
for (int i = 0; i < (int) node.getChildren().size(); i++)
processExpression(out, node.getChildren()[i], temps, functions, prefix, functionParams, allExpressions, tempType);
string name = prefix+intToString(temps.size());
bool hasRecordedNode = false;
out << tempType << " " << name << " = ";
switch (node.getOperation().getId()) {
case Operation::CONSTANT:
out << doubleToString(dynamic_cast<const Operation::Constant*>(&node.getOperation())->getValue());
break;
case Operation::VARIABLE:
throw OpenMMException("Unknown variable in expression: "+node.getOperation().getName());
case Operation::CUSTOM:
{
int i;
for (i = 0; i < (int) functions.size() && functions[i].first != node.getOperation().getName(); i++)
;
if (i == functions.size())
throw OpenMMException("Unknown function in expression: "+node.getOperation().getName());
bool isDeriv = (dynamic_cast<const Operation::Custom*>(&node.getOperation())->getDerivOrder()[0] == 1);
out << "0.0f;\n";
temps.push_back(make_pair(node, name));
hasRecordedNode = true;
// If both the value and derivative of the function are needed, it's faster to calculate them both
// at once, so check to see if both are needed.
const ExpressionTreeNode* valueNode = NULL;
const ExpressionTreeNode* derivNode = NULL;
for (int j = 0; j < (int) allExpressions.size(); j++)
findRelatedTabulatedFunctions(node, allExpressions[j].getRootNode(), valueNode, derivNode);
string valueName = name;
string derivName = name;
if (valueNode != NULL && derivNode != NULL) {
string name2 = prefix+intToString(temps.size());
out << tempType << " " << name2 << " = 0.0f;\n";
if (isDeriv) {
valueName = name2;
temps.push_back(make_pair(*valueNode, name2));
}
else {
derivName = name2;
temps.push_back(make_pair(*derivNode, name2));
}
}
out << "{\n";
out << "float4 params = " << functionParams << "[" << i << "];\n";
out << "float x = " << getTempName(node.getChildren()[0], temps) << ";\n";
out << "if (x >= params.x && x <= params.y) {\n";
out << "x = (x-params.x)*params.z;\n";
out << "int index = (int) (floor(x));\n";
out << "index = min(index, (int) params.w);\n";
out << "float4 coeff = " << functions[i].second << "[index];\n";
out << "float b = x-index;\n";
out << "float a = 1.0f-b;\n";
if (valueNode != NULL)
out << valueName << " = a*coeff.x+b*coeff.y+((a*a*a-a)*coeff.z+(b*b*b-b)*coeff.w)/(params.z*params.z);\n";
if (derivNode != NULL)
out << derivName << " = (coeff.y-coeff.x)*params.z+((1.0f-3.0f*a*a)*coeff.z+(3.0f*b*b-1.0f)*coeff.w)/params.z;\n";
out << "}\n";
out << "}";
break;
}
case Operation::ADD:
out << getTempName(node.getChildren()[0], temps) << "+" << getTempName(node.getChildren()[1], temps);
break;
case Operation::SUBTRACT:
out << getTempName(node.getChildren()[0], temps) << "-" << getTempName(node.getChildren()[1], temps);
break;
case Operation::MULTIPLY:
out << getTempName(node.getChildren()[0], temps) << "*" << getTempName(node.getChildren()[1], temps);
break;
case Operation::DIVIDE:
{
bool haveReciprocal = false;
for (int i = 0; i < (int) temps.size(); i++)
if (temps[i].first.getOperation().getId() == Operation::RECIPROCAL && temps[i].first.getChildren()[0] == node.getChildren()[1]) {
haveReciprocal = true;
out << getTempName(node.getChildren()[0], temps) << "*" << temps[i].second;
}
if (!haveReciprocal)
out << getTempName(node.getChildren()[0], temps) << "/" << getTempName(node.getChildren()[1], temps);
break;
}
case Operation::POWER:
out << "pow(" << getTempName(node.getChildren()[0], temps) << ", " << getTempName(node.getChildren()[1], temps) << ")";
break;
case Operation::NEGATE:
out << "-" << getTempName(node.getChildren()[0], temps);
break;
case Operation::SQRT:
out << "sqrt(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::EXP:
out << "EXP(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::LOG:
out << "LOG(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::SIN:
out << "sin(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::COS:
out << "cos(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::SEC:
out << "1.0f/cos(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::CSC:
out << "1.0f/sin(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::TAN:
out << "tan(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::COT:
out << "1.0f/tan(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ASIN:
out << "asin(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ACOS:
out << "acos(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ATAN:
out << "atan(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::SINH:
out << "sinh(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::COSH:
out << "cosh(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::TANH:
out << "tanh(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ERF:
out << "erf(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ERFC:
out << "erfc(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::STEP:
out << getTempName(node.getChildren()[0], temps) << " >= 0.0f ? 1.0f : 0.0f";
break;
case Operation::DELTA:
out << getTempName(node.getChildren()[0], temps) << " == 0.0f ? 1.0f : 0.0f";
break;
case Operation::SQUARE:
{
string arg = getTempName(node.getChildren()[0], temps);
out << arg << "*" << arg;
break;
}
case Operation::CUBE:
{
string arg = getTempName(node.getChildren()[0], temps);
out << arg << "*" << arg << "*" << arg;
break;
}
case Operation::RECIPROCAL:
out << "RECIP(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ADD_CONSTANT:
out << doubleToString(dynamic_cast<const Operation::AddConstant*>(&node.getOperation())->getValue()) << "+" << getTempName(node.getChildren()[0], temps);
break;
case Operation::MULTIPLY_CONSTANT:
out << doubleToString(dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()) << "*" << getTempName(node.getChildren()[0], temps);
break;
case Operation::POWER_CONSTANT:
{
double exponent = dynamic_cast<const Operation::PowerConstant*>(&node.getOperation())->getValue();
if (exponent == 0.0)
out << "1.0f";
else if (exponent == (int) exponent) {
out << "0.0f;\n";
temps.push_back(make_pair(node, name));
hasRecordedNode = true;
// If multiple integral powers of the same base are needed, it's faster to calculate all of them
// at once, so check to see if others are also needed.
map<int, const ExpressionTreeNode*> powers;
powers[(int) exponent] = &node;
for (int j = 0; j < (int) allExpressions.size(); j++)
findRelatedPowers(node, allExpressions[j].getRootNode(), powers);
vector<int> exponents;
vector<string> names;
vector<bool> hasAssigned(powers.size(), false);
exponents.push_back((int) fabs(exponent));
names.push_back(name);
for (map<int, const ExpressionTreeNode*>::const_iterator iter = powers.begin(); iter != powers.end(); ++iter) {
if (iter->first != exponent) {
exponents.push_back(iter->first >= 0 ? iter->first : -iter->first);
string name2 = prefix+intToString(temps.size());
names.push_back(name2);
temps.push_back(make_pair(*iter->second, name2));
out << tempType << " " << name2 << " = 0.0f;\n";
}
}
out << "{\n";
out << "float multiplier = " << (exponent < 0.0 ? "1.0f/" : "") << getTempName(node.getChildren()[0], temps) << ";\n";
bool done = false;
while (!done) {
done = true;
for (int i = 0; i < (int) exponents.size(); i++) {
if (exponents[i]%2 == 1) {
if (!hasAssigned[i])
out << names[i] << " = multiplier;\n";
else
out << names[i] << " *= multiplier;\n";
hasAssigned[i] = true;
}
exponents[i] >>= 1;
if (exponents[i] != 0)
done = false;
}
if (!done)
out << "multiplier *= multiplier;\n";
}
out << "}";
}
else
out << "pow(" << getTempName(node.getChildren()[0], temps) << ", " << doubleToString(exponent) << ")";
break;
}
case Operation::MIN:
out << "min(" << getTempName(node.getChildren()[0], temps) << ", " << getTempName(node.getChildren()[1], temps) << ")";
break;
case Operation::MAX:
out << "max(" << getTempName(node.getChildren()[0], temps) << ", " << getTempName(node.getChildren()[1], temps) << ")";
break;
case Operation::ABS:
out << "fabs(" << getTempName(node.getChildren()[0], temps) << ")";
break;
default:
throw OpenMMException("Internal error: Unknown operation in user-defined expression: "+node.getOperation().getName());
}
out << ";\n";
if (!hasRecordedNode)
temps.push_back(make_pair(node, name));
}
string CudaExpressionUtilities::getTempName(const ExpressionTreeNode& node, const vector<pair<ExpressionTreeNode, string> >& temps) {
for (int i = 0; i < (int) temps.size(); i++)
if (temps[i].first == node)
return temps[i].second;
stringstream out;
out << "Internal error: No temporary variable for expression node: " << node;
throw OpenMMException(out.str());
}
void CudaExpressionUtilities::findRelatedTabulatedFunctions(const ExpressionTreeNode& node, const ExpressionTreeNode& searchNode,
const ExpressionTreeNode*& valueNode, const ExpressionTreeNode*& derivNode) {
if (searchNode.getOperation().getId() == Operation::CUSTOM && node.getChildren()[0] == searchNode.getChildren()[0]) {
if (dynamic_cast<const Operation::Custom*>(&searchNode.getOperation())->getDerivOrder()[0] == 0)
valueNode = &searchNode;
else
derivNode = &searchNode;
}
else
for (int i = 0; i < (int) searchNode.getChildren().size(); i++)
findRelatedTabulatedFunctions(node, searchNode.getChildren()[i], valueNode, derivNode);
}
void CudaExpressionUtilities::findRelatedPowers(const ExpressionTreeNode& node, const ExpressionTreeNode& searchNode, map<int, const ExpressionTreeNode*>& powers) {
if (searchNode.getOperation().getId() == Operation::POWER_CONSTANT && node.getChildren()[0] == searchNode.getChildren()[0]) {
double realPower = dynamic_cast<const Operation::PowerConstant*>(&searchNode.getOperation())->getValue();
int power = (int) realPower;
if (power != realPower)
return; // We are only interested in integer powers.
if (powers.find(power) != powers.end())
return; // This power is already in the map.
if (powers.begin()->first*power < 0)
return; // All powers must have the same sign.
powers[power] = &searchNode;
}
else
for (int i = 0; i < (int) searchNode.getChildren().size(); i++)
findRelatedPowers(node, searchNode.getChildren()[i], powers);
}
vector<float4> CudaExpressionUtilities::computeFunctionCoefficients(const vector<double>& values, double min, double max) {
// Compute the spline coefficients.
int numValues = values.size();
vector<double> x(numValues), derivs;
for (int i = 0; i < numValues; i++)
x[i] = min+i*(max-min)/(numValues-1);
SplineFitter::createNaturalSpline(x, values, derivs);
vector<float4> f(numValues-1);
for (int i = 0; i < (int) values.size()-1; i++)
f[i] = make_float4((float) values[i], (float) values[i+1], (float) (derivs[i]/6.0), (float) (derivs[i+1]/6.0));
return f;
}
#ifndef OPENMM_CUDAEXPRESSIONUTILITIES_H_
#define OPENMM_CUDAEXPRESSIONUTILITIES_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaContext.h"
#include "lepton/CustomFunction.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/ParsedExpression.h"
#include <map>
#include <sstream>
#include <string>
#include <utility>
namespace OpenMM {
/**
* This class is used by various classes to generate CUDA source code implementing
* user defined mathematical expressions.
*/
class OPENMM_EXPORT CudaExpressionUtilities {
public:
/**
* Generate the source code for calculating a set of expressions.
*
* @param expressions the expressions to generate code for (keys are the variables to store the output values in)
* @param variables defines the source code to generate for each variable that may appear in the expressions. Keys are
* variable names, and the values are the code to generate for them.
* @param functions defines the variable name for each tabulated function that may appear in the expressions
* @param prefix a prefix to put in front of temporary variables
* @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "float")
*/
static std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::map<std::string, std::string>& variables,
const std::vector<std::pair<std::string, std::string> >& functions, const std::string& prefix, const std::string& functionParams, const std::string& tempType="float");
/**
* Generate the source code for calculating a set of expressions.
*
* @param expressions the expressions to generate code for (keys are the variables to store the output values in)
* @param variables defines the source code to generate for each variable or precomputed sub-expression that may appear in the expressions.
* Each entry is an ExpressionTreeNode, and the code to generate wherever an identical node appears.
* @param functions defines the variable name for each tabulated function that may appear in the expressions
* @param prefix a prefix to put in front of temporary variables
* @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "float")
*/
static std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variables,
const std::vector<std::pair<std::string, std::string> >& functions, const std::string& prefix, const std::string& functionParams, const std::string& tempType="float");
/**
* Calculate the spline coefficients for a tabulated function that appears in expressions.
*
* @param values the tabulated values of the function
* @param min the value of the independent variable corresponding to the first element of values
* @param max the value of the independent variable corresponding to the last element of values
* @return the spline coefficients
*/
static std::vector<float4> computeFunctionCoefficients(const std::vector<double>& values, double min, double max);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static std::string doubleToString(double value);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static std::string intToString(int value);
class FunctionPlaceholder;
private:
static void processExpression(std::stringstream& out, const Lepton::ExpressionTreeNode& node,
std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps,
const std::vector<std::pair<std::string, std::string> >& functions, const std::string& prefix, const std::string& functionParams,
const std::vector<Lepton::ParsedExpression>& allExpressions, const std::string& tempType);
static std::string getTempName(const Lepton::ExpressionTreeNode& node, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps);
static void findRelatedTabulatedFunctions(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
const Lepton::ExpressionTreeNode*& valueNode, const Lepton::ExpressionTreeNode*& derivNode);
static void findRelatedPowers(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
std::map<int, const Lepton::ExpressionTreeNode*>& powers);
};
/**
* This class serves as a placeholder for custom functions in expressions.
*/
class CudaExpressionUtilities::FunctionPlaceholder : public Lepton::CustomFunction {
public:
int getNumArguments() const {
return 1;
}
double evaluate(const double* arguments) const {
return 0.0;
}
double evaluateDerivative(const double* arguments, const int* derivOrder) const {
return 0.0;
}
CustomFunction* clone() const {
return new FunctionPlaceholder();
}
};
} // namespace OpenMM
#endif /*OPENMM_CUDAEXPRESSIONUTILITIES_H_*/
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaForceInfo.h"
using namespace OpenMM;
using namespace std;
bool CudaForceInfo::areParticlesIdentical(int particle1, int particle2) {
return true;
}
int CudaForceInfo::getNumParticleGroups() {
return 0;
}
void CudaForceInfo::getParticlesInGroup(int index, vector<int>& particles) {
return;
}
bool CudaForceInfo::areGroupsIdentical(int group1, int group2) {
return true;
}
#ifndef OPENMM_CUDAFORCEINFO_H_
#define OPENMM_CUDAFORCEINFO_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/internal/windowsExport.h"
#include <vector>
namespace OpenMM {
/**
* This class is used by the Cuda implementation of a Force class to convey information
* about the behavior and requirements of that force.
*/
class OPENMM_EXPORT CudaForceInfo {
public:
CudaForceInfo(int requiredForceBuffers) : requiredForceBuffers(requiredForceBuffers) {
}
/**
* Get the number of force buffers this force requires.
*/
int getRequiredForceBuffers() {
return requiredForceBuffers;
}
/**
* Get whether or not two particles have identical force field parameters.
*/
virtual bool areParticlesIdentical(int particle1, int particle2);
/**
* Get the number of particle groups defined by this force.
*/
virtual int getNumParticleGroups();
/**
* Get the list of particles in a particular group.
*/
virtual void getParticlesInGroup(int index, std::vector<int>& particles);
/**
* Get whether two particle groups are identical.
*/
virtual bool areGroupsIdentical(int group1, int group2);
private:
int requiredForceBuffers;
};
} // namespace OpenMM
#endif /*OPENMM_CUDAFORCEINFO_H_*/
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaKernelFactory.h"
//#include "CudaParallelKernels.h"
#include "CudaPlatform.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
using namespace OpenMM;
KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
CudaPlatform::PlatformData& data = *static_cast<CudaPlatform::PlatformData*>(context.getPlatformData());
// if (data.contexts.size() > 1) {
// // We are running in parallel on multiple devices, so we may want to create a parallel kernel.
//
// if (name == CalcForcesAndEnergyKernel::Name())
// return new CudaParallelCalcForcesAndEnergyKernel(name, platform, data);
// if (name == CalcHarmonicBondForceKernel::Name())
// return new CudaParallelCalcHarmonicBondForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomBondForceKernel::Name())
// return new CudaParallelCalcCustomBondForceKernel(name, platform, data, context.getSystem());
// if (name == CalcHarmonicAngleForceKernel::Name())
// return new CudaParallelCalcHarmonicAngleForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomAngleForceKernel::Name())
// return new CudaParallelCalcCustomAngleForceKernel(name, platform, data, context.getSystem());
// if (name == CalcPeriodicTorsionForceKernel::Name())
// return new CudaParallelCalcPeriodicTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcRBTorsionForceKernel::Name())
// return new CudaParallelCalcRBTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCMAPTorsionForceKernel::Name())
// return new CudaParallelCalcCMAPTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomTorsionForceKernel::Name())
// return new CudaParallelCalcCustomTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcNonbondedForceKernel::Name())
// return new CudaParallelCalcNonbondedForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomNonbondedForceKernel::Name())
// return new CudaParallelCalcCustomNonbondedForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomExternalForceKernel::Name())
// return new CudaParallelCalcCustomExternalForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomHbondForceKernel::Name())
// return new CudaParallelCalcCustomHbondForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomCompoundBondForceKernel::Name())
// return new CudaParallelCalcCustomCompoundBondForceKernel(name, platform, data, context.getSystem());
// }
// CudaContext& cl = *data.contexts[0];
// if (name == CalcForcesAndEnergyKernel::Name())
// return new CudaCalcForcesAndEnergyKernel(name, platform, cl);
// if (name == UpdateStateDataKernel::Name())
// return new CudaUpdateStateDataKernel(name, platform, cl);
// if (name == ApplyConstraintsKernel::Name())
// return new CudaApplyConstraintsKernel(name, platform, cl);
// if (name == VirtualSitesKernel::Name())
// return new CudaVirtualSitesKernel(name, platform, cl);
// if (name == CalcHarmonicBondForceKernel::Name())
// return new CudaCalcHarmonicBondForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomBondForceKernel::Name())
// return new CudaCalcCustomBondForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcHarmonicAngleForceKernel::Name())
// return new CudaCalcHarmonicAngleForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomAngleForceKernel::Name())
// return new CudaCalcCustomAngleForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcPeriodicTorsionForceKernel::Name())
// return new CudaCalcPeriodicTorsionForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcRBTorsionForceKernel::Name())
// return new CudaCalcRBTorsionForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCMAPTorsionForceKernel::Name())
// return new CudaCalcCMAPTorsionForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomTorsionForceKernel::Name())
// return new CudaCalcCustomTorsionForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcNonbondedForceKernel::Name())
// return new CudaCalcNonbondedForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomNonbondedForceKernel::Name())
// return new CudaCalcCustomNonbondedForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcGBSAOBCForceKernel::Name())
// return new CudaCalcGBSAOBCForceKernel(name, platform, cl);
// if (name == CalcCustomGBForceKernel::Name())
// return new CudaCalcCustomGBForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomExternalForceKernel::Name())
// return new CudaCalcCustomExternalForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomHbondForceKernel::Name())
// return new CudaCalcCustomHbondForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomCompoundBondForceKernel::Name())
// return new CudaCalcCustomCompoundBondForceKernel(name, platform, cl, context.getSystem());
// if (name == IntegrateVerletStepKernel::Name())
// return new CudaIntegrateVerletStepKernel(name, platform, cl);
// if (name == IntegrateLangevinStepKernel::Name())
// return new CudaIntegrateLangevinStepKernel(name, platform, cl);
// if (name == IntegrateBrownianStepKernel::Name())
// return new CudaIntegrateBrownianStepKernel(name, platform, cl);
// if (name == IntegrateVariableVerletStepKernel::Name())
// return new CudaIntegrateVariableVerletStepKernel(name, platform, cl);
// if (name == IntegrateVariableLangevinStepKernel::Name())
// return new CudaIntegrateVariableLangevinStepKernel(name, platform, cl);
// if (name == IntegrateCustomStepKernel::Name())
// return new CudaIntegrateCustomStepKernel(name, platform, cl);
// if (name == ApplyAndersenThermostatKernel::Name())
// return new CudaApplyAndersenThermostatKernel(name, platform, cl);
// if (name == ApplyMonteCarloBarostatKernel::Name())
// return new CudaApplyMonteCarloBarostatKernel(name, platform, cl);
// if (name == CalcKineticEnergyKernel::Name())
// return new CudaCalcKineticEnergyKernel(name, platform, cl);
// if (name == RemoveCMMotionKernel::Name())
// return new CudaRemoveCMMotionKernel(name, platform, cl);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaKernelSources.h"
using namespace OpenMM;
using namespace std;
#ifndef OPENMM_CUDAKERNELSOURCES_H_
#define OPENMM_CUDAKERNELSOURCES_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/internal/windowsExport.h"
#include <string>
namespace OpenMM {
/**
* This class is a central holding place for the source code of CUDA kernels.
* The CMake build script inserts declarations into it based on the .cu files in the
* kernels subfolder.
*/
class OPENMM_EXPORT CudaKernelSources {
public:
@CUDA_FILE_DECLARATIONS@
};
} // namespace OpenMM
#endif /*OPENMM_CUDAKERNELSOURCES_H_*/
This diff is collapsed.
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaContext.h"
#include "CudaExpressionUtilities.h"
#include "CudaPlatform.h"
#include "CudaKernelFactory.h"
#include "CudaKernels.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/Context.h"
#include "openmm/System.h"
#include <algorithm>
#include <cctype>
#include <sstream>
using namespace OpenMM;
using namespace std;
extern "C" OPENMM_EXPORT void registerPlatforms() {
Platform::registerPlatform(new CudaPlatform());
}
CudaPlatform::CudaPlatform() {
CudaKernelFactory* factory = new CudaKernelFactory();
registerKernelFactory(CalcForcesAndEnergyKernel::Name(), factory);
registerKernelFactory(UpdateStateDataKernel::Name(), factory);
registerKernelFactory(ApplyConstraintsKernel::Name(), factory);
registerKernelFactory(VirtualSitesKernel::Name(), factory);
registerKernelFactory(CalcHarmonicBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomBondForceKernel::Name(), factory);
registerKernelFactory(CalcHarmonicAngleForceKernel::Name(), factory);
registerKernelFactory(CalcCustomAngleForceKernel::Name(), factory);
registerKernelFactory(CalcPeriodicTorsionForceKernel::Name(), factory);
registerKernelFactory(CalcRBTorsionForceKernel::Name(), factory);
registerKernelFactory(CalcCMAPTorsionForceKernel::Name(), factory);
registerKernelFactory(CalcCustomTorsionForceKernel::Name(), factory);
registerKernelFactory(CalcNonbondedForceKernel::Name(), factory);
registerKernelFactory(CalcCustomNonbondedForceKernel::Name(), factory);
registerKernelFactory(CalcGBSAOBCForceKernel::Name(), factory);
registerKernelFactory(CalcCustomGBForceKernel::Name(), factory);
registerKernelFactory(CalcCustomExternalForceKernel::Name(), factory);
registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateVariableLangevinStepKernel::Name(), factory);
registerKernelFactory(IntegrateCustomStepKernel::Name(), factory);
registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory);
registerKernelFactory(ApplyMonteCarloBarostatKernel::Name(), factory);
registerKernelFactory(CalcKineticEnergyKernel::Name(), factory);
registerKernelFactory(RemoveCMMotionKernel::Name(), factory);
platformProperties.push_back(CudaDeviceIndex());
platformProperties.push_back(CudaUseBlockingSync());
platformProperties.push_back(CudaPrecision());
platformProperties.push_back(CudaCompiler());
platformProperties.push_back(CudaTempDirectory());
setPropertyDefaultValue(CudaDeviceIndex(), "");
setPropertyDefaultValue(CudaUseBlockingSync(), "true");
setPropertyDefaultValue(CudaPrecision(), "single");
#ifdef _MSC_VER
setPropertyDefaultValue(CudaCompiler(), "nvcc");
setPropertyDefaultValue(CudaTempDirectory(), string(getenv("TEMP")));
#else
setPropertyDefaultValue(CudaCompiler(), "/usr/local/cuda/bin/nvcc");
setPropertyDefaultValue(CudaTempDirectory(), string(getenv("TMPDIR")));
#endif
}
bool CudaPlatform::supportsDoublePrecision() const {
return false;
}
const string& CudaPlatform::getPropertyValue(const Context& context, const string& property) const {
const ContextImpl& impl = getContextImpl(context);
const PlatformData* data = reinterpret_cast<const PlatformData*>(impl.getPlatformData());
map<string, string>::const_iterator value = data->propertyValues.find(property);
if (value != data->propertyValues.end())
return value->second;
return Platform::getPropertyValue(context, property);
}
void CudaPlatform::setPropertyValue(Context& context, const string& property, const string& value) const {
}
void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string>& properties) const {
const string& devicePropValue = (properties.find(CudaDeviceIndex()) == properties.end() ?
getPropertyDefaultValue(CudaDeviceIndex()) : properties.find(CudaDeviceIndex())->second);
string blockingPropValue = (properties.find(CudaUseBlockingSync()) == properties.end() ?
getPropertyDefaultValue(CudaUseBlockingSync()) : properties.find(CudaUseBlockingSync())->second);
string precisionPropValue = (properties.find(CudaPrecision()) == properties.end() ?
getPropertyDefaultValue(CudaPrecision()) : properties.find(CudaPrecision())->second);
const string& compilerPropValue = (properties.find(CudaCompiler()) == properties.end() ?
getPropertyDefaultValue(CudaCompiler()) : properties.find(CudaCompiler())->second);
const string& tempPropValue = (properties.find(CudaTempDirectory()) == properties.end() ?
getPropertyDefaultValue(CudaTempDirectory()) : properties.find(CudaTempDirectory())->second);
transform(blockingPropValue.begin(), blockingPropValue.end(), blockingPropValue.begin(), ::tolower);
transform(precisionPropValue.begin(), precisionPropValue.end(), precisionPropValue.begin(), ::tolower);
context.setPlatformData(new PlatformData(context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, compilerPropValue, tempPropValue));
}
void CudaPlatform::contextDestroyed(ContextImpl& context) const {
PlatformData* data = reinterpret_cast<PlatformData*>(context.getPlatformData());
delete data;
}
CudaPlatform::PlatformData::PlatformData(const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& compilerProperty, const string& tempProperty) : removeCM(false), stepCount(0), computeForceCount(0), time(0.0) {
bool blocking = (blockingProperty == "true");
vector<string> devices;
size_t searchPos = 0, nextPos;
while ((nextPos = deviceIndexProperty.find_first_of(", ", searchPos)) != string::npos) {
devices.push_back(deviceIndexProperty.substr(searchPos, nextPos-searchPos));
searchPos = nextPos+1;
}
devices.push_back(deviceIndexProperty.substr(searchPos));
for (int i = 0; i < (int) devices.size(); i++) {
if (devices[i].length() > 0) {
unsigned int deviceIndex;
stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new CudaContext(system, deviceIndex, blocking, precisionProperty, compilerProperty, tempProperty, *this));
}
}
if (contexts.size() == 0)
contexts.push_back(new CudaContext(system, -1, blocking, precisionProperty, compilerProperty, tempProperty, *this));
stringstream device;
for (int i = 0; i < (int) contexts.size(); i++) {
if (i > 0)
device << ',';
device << contexts[i]->getDeviceIndex();
}
propertyValues[CudaPlatform::CudaDeviceIndex()] = device.str();
propertyValues[CudaPlatform::CudaPrecision()] = precisionProperty;
propertyValues[CudaPlatform::CudaCompiler()] = compilerProperty;
propertyValues[CudaPlatform::CudaTempDirectory()] = tempProperty;
contextEnergy.resize(contexts.size());
}
CudaPlatform::PlatformData::~PlatformData() {
for (int i = 0; i < (int) contexts.size(); i++)
delete contexts[i];
}
void CudaPlatform::PlatformData::initializeContexts(const System& system) {
// for (int i = 0; i < (int) contexts.size(); i++)
// contexts[i]->initialize();
}
void CudaPlatform::PlatformData::syncContexts() {
// for (int i = 0; i < (int) contexts.size(); i++)
// contexts[i]->getWorkThread().flush();
}
/**
* This is called by the various functions below to clear a buffer.
*/
__device__ void clearSingleBuffer(int* __restrict__ buffer, int size) {
int index = blockDim.x*blockIdx.x+threadIdx.x;
int4* buffer4 = (int4*) buffer;
int sizeDiv4 = size/4;
while (index < sizeDiv4) {
buffer4[index] = make_int4(0);
index += blockDim.x*gridDim.x;
}
if (blockDim.x*blockIdx.x+threadIdx.x == 0)
for (int i = sizeDiv4*4; i < size; i++)
buffer[i] = 0;
}
/**
* Fill a buffer with 0.
*/
__global__ void clearBuffer(int* __restrict__ buffer, int size) {
clearSingleBuffer(buffer, size);
}
/**
* Fill two buffers with 0.
*/
__global__ void clearTwoBuffers(int* __restrict__ buffer1, int size1, int* __restrict__ buffer2, int size2) {
clearSingleBuffer(buffer1, size1);
clearSingleBuffer(buffer2, size2);
}
/**
* Fill three buffers with 0.
*/
__global__ void clearThreeBuffers(int* __restrict__ buffer1, int size1, int* __restrict__ buffer2, int size2, int* __restrict__ buffer3, int size3) {
clearSingleBuffer(buffer1, size1);
clearSingleBuffer(buffer2, size2);
clearSingleBuffer(buffer3, size3);
}
/**
* Fill four buffers with 0.
*/
__global__ void clearFourBuffers(int* __restrict__ buffer1, int size1, int* __restrict__ buffer2, int size2, int* __restrict__ buffer3, int size3, int* __restrict__ buffer4, int size4) {
clearSingleBuffer(buffer1, size1);
clearSingleBuffer(buffer2, size2);
clearSingleBuffer(buffer3, size3);
clearSingleBuffer(buffer4, size4);
}
/**
* Fill five buffers with 0.
*/
__global__ void clearFiveBuffers(int* __restrict__ buffer1, int size1, int* __restrict__ buffer2, int size2, int* __restrict__ buffer3, int size3, int* __restrict__ buffer4, int size4, int* __restrict__ buffer5, int size5) {
clearSingleBuffer(buffer1, size1);
clearSingleBuffer(buffer2, size2);
clearSingleBuffer(buffer3, size3);
clearSingleBuffer(buffer4, size4);
clearSingleBuffer(buffer5, size5);
}
/**
* Fill six buffers with 0.
*/
__global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __restrict__ buffer2, int size2, int* __restrict__ buffer3, int size3, int* __restrict__ buffer4, int size4, int* __restrict__ buffer5, int size5, int* __restrict__ buffer6, int size6) {
clearSingleBuffer(buffer1, size1);
clearSingleBuffer(buffer2, size2);
clearSingleBuffer(buffer3, size3);
clearSingleBuffer(buffer4, size4);
clearSingleBuffer(buffer5, size5);
clearSingleBuffer(buffer6, size6);
}
/**
* Sum a collection of buffers into the first one.
*/
__global__ void reduceFloat4Buffer(float4* __restrict__ buffer, int bufferSize, int numBuffers) {
int index = blockDim.x*blockIdx.x+threadIdx.x;
int totalSize = bufferSize*numBuffers;
while (index < bufferSize) {
float4 sum = buffer[index];
for (int i = index+bufferSize; i < totalSize; i += bufferSize)
sum += buffer[i];
buffer[index] = sum;
index += blockDim.x*gridDim.x;
}
}
/**
* Sum the various buffers containing forces.
*/
__global__ void reduceForces(const long* __restrict__ longBuffer, float4* __restrict__ buffer, int bufferSize, int numBuffers) {
int totalSize = bufferSize*numBuffers;
float scale = 1.0f/(float) 0xFFFFFFFF;
for (int index = blockDim.x*blockIdx.x+threadIdx.x; index < bufferSize; index += blockDim.x*gridDim.x) {
float4 sum = make_float4(scale*longBuffer[index], scale*longBuffer[index+bufferSize], scale*longBuffer[index+2*bufferSize], 0.0f);
for (int i = index; i < totalSize; i += bufferSize)
sum += buffer[i];
buffer[index] = sum;
}
}
/**
* This file defines vector operations to simplify code elsewhere.
*/
// Versions of make_x() that take a single value and set all components to that.
inline __device__ int2 make_int2(int a) {
return make_int2(a, a);
}
inline __device__ int3 make_int3(int a) {
return make_int3(a, a, a);
}
inline __device__ int4 make_int4(int a) {
return make_int4(a, a, a, a);
}
inline __device__ float2 make_float2(float a) {
return make_float2(a, a);
}
inline __device__ float3 make_float3(float a) {
return make_float3(a, a, a);
}
inline __device__ float4 make_float4(float a) {
return make_float4(a, a, a, a);
}
inline __device__ double2 make_double2(double a) {
return make_double2(a, a);
}
inline __device__ double3 make_double3(double a) {
return make_double3(a, a, a);
}
inline __device__ double4 make_double4(double a) {
return make_double4(a, a, a, a);
}
// Negate a vector.
inline __device__ int2 operator*(int2 a) {
return make_int2(-a.x, -a.y);
}
inline __device__ int3 operator-(int3 a) {
return make_int3(-a.x, -a.y, -a.z);
}
inline __device__ int4 operator-(int4 a) {
return make_int4(-a.x, -a.y, -a.z, -a.w);
}
inline __device__ float2 operator-(float2 a) {
return make_float2(-a.x, -a.y);
}
inline __device__ float3 operator-(float3 a) {
return make_float3(-a.x, -a.y, -a.z);
}
inline __device__ float4 operator-(float4 a) {
return make_float4(-a.x, -a.y, -a.z, -a.w);
}
inline __device__ double2 operator-(double2 a) {
return make_double2(-a.x, -a.y);
}
inline __device__ double3 operator-(double3 a) {
return make_double3(-a.x, -a.y, -a.z);
}
inline __device__ double4 operator-(double4 a) {
return make_double4(-a.x, -a.y, -a.z, -a.w);
}
// Add two vectors.
inline __device__ int2 operator+(int2 a, int2 b) {
return make_int2(a.x+b.x, a.y+b.y);
}
inline __device__ int3 operator+(int3 a, int3 b) {
return make_int3(a.x+b.x, a.y+b.y, a.z+b.z);
}
inline __device__ int4 operator+(int4 a, int4 b) {
return make_int4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
}
inline __device__ float2 operator+(float2 a, float2 b) {
return make_float2(a.x+b.x, a.y+b.y);
}
inline __device__ float3 operator+(float3 a, float3 b) {
return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
}
inline __device__ float4 operator+(float4 a, float4 b) {
return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
}
inline __device__ double2 operator+(double2 a, double2 b) {
return make_double2(a.x+b.x, a.y+b.y);
}
inline __device__ double3 operator+(double3 a, double3 b) {
return make_double3(a.x+b.x, a.y+b.y, a.z+b.z);
}
inline __device__ double4 operator+(double4 a, double4 b) {
return make_double4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
}
// Subtract two vectors.
inline __device__ int2 operator-(int2 a, int2 b) {
return make_int2(a.x-b.x, a.y-b.y);
}
inline __device__ int3 operator-(int3 a, int3 b) {
return make_int3(a.x-b.x, a.y-b.y, a.z-b.z);
}
inline __device__ int4 operator-(int4 a, int4 b) {
return make_int4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
}
inline __device__ float2 operator-(float2 a, float2 b) {
return make_float2(a.x-b.x, a.y-b.y);
}
inline __device__ float3 operator-(float3 a, float3 b) {
return make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
}
inline __device__ float4 operator-(float4 a, float4 b) {
return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
}
inline __device__ double2 operator-(double2 a, double2 b) {
return make_double2(a.x-b.x, a.y-b.y);
}
inline __device__ double3 operator-(double3 a, double3 b) {
return make_double3(a.x-b.x, a.y-b.y, a.z-b.z);
}
inline __device__ double4 operator-(double4 a, double4 b) {
return make_double4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
}
// Multiply two vectors.
inline __device__ int2 operator*(int2 a, int2 b) {
return make_int2(a.x*b.x, a.y*b.y);
}
inline __device__ int3 operator*(int3 a, int3 b) {
return make_int3(a.x*b.x, a.y*b.y, a.z*b.z);
}
inline __device__ int4 operator*(int4 a, int4 b) {
return make_int4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
}
inline __device__ float2 operator*(float2 a, float2 b) {
return make_float2(a.x*b.x, a.y*b.y);
}
inline __device__ float3 operator*(float3 a, float3 b) {
return make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
}
inline __device__ float4 operator*(float4 a, float4 b) {
return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
}
inline __device__ double2 operator*(double2 a, double2 b) {
return make_double2(a.x*b.x, a.y*b.y);
}
inline __device__ double3 operator*(double3 a, double3 b) {
return make_double3(a.x*b.x, a.y*b.y, a.z*b.z);
}
inline __device__ double4 operator*(double4 a, double4 b) {
return make_double4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
}
// Divide two vectors.
inline __device__ int2 operator/(int2 a, int2 b) {
return make_int2(a.x/b.x, a.y/b.y);
}
inline __device__ int3 operator/(int3 a, int3 b) {
return make_int3(a.x/b.x, a.y/b.y, a.z/b.z);
}
inline __device__ int4 operator/(int4 a, int4 b) {
return make_int4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
}
inline __device__ float2 operator/(float2 a, float2 b) {
return make_float2(a.x/b.x, a.y/b.y);
}
inline __device__ float3 operator/(float3 a, float3 b) {
return make_float3(a.x/b.x, a.y/b.y, a.z/b.z);
}
inline __device__ float4 operator/(float4 a, float4 b) {
return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
}
inline __device__ double2 operator/(double2 a, double2 b) {
return make_double2(a.x/b.x, a.y/b.y);
}
inline __device__ double3 operator/(double3 a, double3 b) {
return make_double3(a.x/b.x, a.y/b.y, a.z/b.z);
}
inline __device__ double4 operator/(double4 a, double4 b) {
return make_double4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
}
// += operator
inline __device__ void operator+=(int2& a, int2 b) {
a.x += b.x; a.y += b.y;
}
inline __device__ void operator+=(int3& a, int3 b) {
a.x += b.x; a.y += b.y; a.z += b.z;
}
inline __device__ void operator+=(int4& a, int4 b) {
a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
}
inline __device__ void operator+=(float2& a, float2 b) {
a.x += b.x; a.y += b.y;
}
inline __device__ void operator+=(float3& a, float3 b) {
a.x += b.x; a.y += b.y; a.z += b.z;
}
inline __device__ void operator+=(float4& a, float4 b) {
a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
}
inline __device__ void operator+=(double2& a, double2 b) {
a.x += b.x; a.y += b.y;
}
inline __device__ void operator+=(double3& a, double3 b) {
a.x += b.x; a.y += b.y; a.z += b.z;
}
inline __device__ void operator+=(double4& a, double4 b) {
a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
}
// -= operator
inline __device__ void operator-=(int2& a, int2 b) {
a.x -= b.x; a.y -= b.y;
}
inline __device__ void operator-=(int3& a, int3 b) {
a.x -= b.x; a.y -= b.y; a.z -= b.z;
}
inline __device__ void operator-=(int4& a, int4 b) {
a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
}
inline __device__ void operator-=(float2& a, float2 b) {
a.x -= b.x; a.y -= b.y;
}
inline __device__ void operator-=(float3& a, float3 b) {
a.x -= b.x; a.y -= b.y; a.z -= b.z;
}
inline __device__ void operator-=(float4& a, float4 b) {
a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
}
inline __device__ void operator-=(double2& a, double2 b) {
a.x -= b.x; a.y -= b.y;
}
inline __device__ void operator-=(double3& a, double3 b) {
a.x -= b.x; a.y -= b.y; a.z -= b.z;
}
inline __device__ void operator-=(double4& a, double4 b) {
a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
}
// *= operator
inline __device__ void operator*=(int2& a, int2 b) {
a.x *= b.x; a.y *= b.y;
}
inline __device__ void operator*=(int3& a, int3 b) {
a.x *= b.x; a.y *= b.y; a.z *= b.z;
}
inline __device__ void operator*=(int4& a, int4 b) {
a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w;
}
inline __device__ void operator*=(float2& a, float2 b) {
a.x *= b.x; a.y *= b.y;
}
inline __device__ void operator*=(float3& a, float3 b) {
a.x *= b.x; a.y *= b.y; a.z *= b.z;
}
inline __device__ void operator*=(float4& a, float4 b) {
a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w;
}
inline __device__ void operator*=(double2& a, double2 b) {
a.x *= b.x; a.y *= b.y;
}
inline __device__ void operator*=(double3& a, double3 b) {
a.x *= b.x; a.y *= b.y; a.z *= b.z;
}
inline __device__ void operator*=(double4& a, double4 b) {
a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w;
}
// /= operator
inline __device__ void operator/=(int2& a, int2 b) {
a.x /= b.x; a.y /= b.y;
}
inline __device__ void operator/=(int3& a, int3 b) {
a.x /= b.x; a.y /= b.y; a.z /= b.z;
}
inline __device__ void operator/=(int4& a, int4 b) {
a.x /= b.x; a.y /= b.y; a.z /= b.z; a.w /= b.w;
}
inline __device__ void operator/=(float2& a, float2 b) {
a.x /= b.x; a.y /= b.y;
}
inline __device__ void operator/=(float3& a, float3 b) {
a.x /= b.x; a.y /= b.y; a.z /= b.z;
}
inline __device__ void operator/=(float4& a, float4 b) {
a.x /= b.x; a.y /= b.y; a.z /= b.z; a.w /= b.w;
}
inline __device__ void operator/=(double2& a, double2 b) {
a.x /= b.x; a.y /= b.y;
}
inline __device__ void operator/=(double3& a, double3 b) {
a.x /= b.x; a.y /= b.y; a.z /= b.z;
}
inline __device__ void operator/=(double4& a, double4 b) {
a.x /= b.x; a.y /= b.y; a.z /= b.z; a.w /= b.w;
}
// Multiply a vector by a constant.
inline __device__ int2 operator*(int2 a, int b) {
return make_int2(a.x*b, a.y*b);
}
inline __device__ int3 operator*(int3 a, int b) {
return make_int3(a.x*b, a.y*b, a.z*b);
}
inline __device__ int4 operator*(int4 a, int b) {
return make_int4(a.x*b, a.y*b, a.z*b, a.w*b);
}
inline __device__ int2 operator*(int a, int2 b) {
return make_int2(a*b.x, a*b.y);
}
inline __device__ int3 operator*(int a, int3 b) {
return make_int3(a*b.x, a*b.y, a*b.z);
}
inline __device__ int4 operator*(int a, int4 b) {
return make_int4(a*b.x, a*b.y, a*b.z, a*b.w);
}
inline __device__ float2 operator*(float2 a, float b) {
return make_float2(a.x*b, a.y*b);
}
inline __device__ float3 operator*(float3 a, float b) {
return make_float3(a.x*b, a.y*b, a.z*b);
}
inline __device__ float4 operator*(float4 a, float b) {
return make_float4(a.x*b, a.y*b, a.z*b, a.w*b);
}
inline __device__ float2 operator*(float a, float2 b) {
return make_float2(a*b.x, a*b.y);
}
inline __device__ float3 operator*(float a, float3 b) {
return make_float3(a*b.x, a*b.y, a*b.z);
}
inline __device__ float4 operator*(float a, float4 b) {
return make_float4(a*b.x, a*b.y, a*b.z, a*b.w);
}
inline __device__ double2 operator*(double2 a, double b) {
return make_double2(a.x*b, a.y*b);
}
inline __device__ double3 operator*(double3 a, double b) {
return make_double3(a.x*b, a.y*b, a.z*b);
}
inline __device__ double4 operator*(double4 a, double b) {
return make_double4(a.x*b, a.y*b, a.z*b, a.w*b);
}
inline __device__ double2 operator*(double a, double2 b) {
return make_double2(a*b.x, a*b.y);
}
inline __device__ double3 operator*(double a, double3 b) {
return make_double3(a*b.x, a*b.y, a*b.z);
}
inline __device__ double4 operator*(double a, double4 b) {
return make_double4(a*b.x, a*b.y, a*b.z, a*b.w);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment