diff --git a/platforms/cuda-old/CMakeLists.txt b/platforms/cuda-old/CMakeLists.txt
deleted file mode 100644
index 5763ba730149fa579a9592324f9683978b88abbd..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/CMakeLists.txt
+++ /dev/null
@@ -1,90 +0,0 @@
-#---------------------------------------------------
-# OpenMM CUDA Platform
-#
-# Creates OpenMM library, base name=OpenMMCuda.
-# Default libraries are shared & optimized. Variants
-# are created for static (_static) and debug (_d).
-#
-# Windows:
-# OpenMMCuda[_d].dll
-# OpenMMCuda[_d].lib
-# OpenMMCuda_static[_d].lib
-# Unix:
-# libOpenMMCuda[_d].so
-# libOpenMMCuda_static[_d].a
-#----------------------------------------------------
-
-set(OPENMM_BUILD_CUDA_TESTS TRUE CACHE BOOL "Whether to build CUDA test cases")
-if(OPENMM_BUILD_CUDA_TESTS)
- SUBDIRS (tests)
-endif(OPENMM_BUILD_CUDA_TESTS)
-
-# The source is organized into subdirectories, but we handle them all from
-# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
-SET(OPENMM_SOURCE_SUBDIRS .)
-
-
-# Collect up information about the version of the OpenMM library we're building
-# and make it available to the code so it can be built into the binaries.
-
-SET(OPENMMCUDA_LIBRARY_NAME OpenMMCuda)
-
-SET(SHARED_TARGET ${OPENMMCUDA_LIBRARY_NAME})
-SET(STATIC_TARGET ${OPENMMCUDA_LIBRARY_NAME}_static)
-
-
-# Ensure that debug libraries have "_d" appended to their names.
-# CMake gets this right on Windows automatically with this definition.
-IF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
- SET(CMAKE_DEBUG_POSTFIX "_d" CACHE INTERNAL "" FORCE)
-ENDIF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
-
-# But on Unix or Cygwin we have to add the suffix manually
-IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
- SET(SHARED_TARGET ${SHARED_TARGET}_d)
- SET(STATIC_TARGET ${STATIC_TARGET}_d)
-ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
-
-
-# These are all the places to search for header files which are
-# to be part of the API.
-SET(API_INCLUDE_DIRS) # start empty
-FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
- # append
- SET(API_INCLUDE_DIRS ${API_INCLUDE_DIRS}
- ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include
- ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include/internal)
-ENDFOREACH(subdir)
-
-# We'll need both *relative* path names, starting with their API_INCLUDE_DIRS,
-# and absolute pathnames.
-SET(API_REL_INCLUDE_FILES) # start these out empty
-SET(API_ABS_INCLUDE_FILES)
-
-FOREACH(dir ${API_INCLUDE_DIRS})
- FILE(GLOB fullpaths ${dir}/*.h) # returns full pathnames
- SET(API_ABS_INCLUDE_FILES ${API_ABS_INCLUDE_FILES} ${fullpaths})
-
- FOREACH(pathname ${fullpaths})
- GET_FILENAME_COMPONENT(filename ${pathname} NAME)
- SET(API_REL_INCLUDE_FILES ${API_REL_INCLUDE_FILES} ${dir}/${filename})
- ENDFOREACH(pathname)
-ENDFOREACH(dir)
-
-# collect up source files
-SET(SOURCE_FILES) # empty
-SET(SOURCE_INCLUDE_FILES)
-
-FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
- FILE(GLOB_RECURSE src_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.c)
- FILE(GLOB incl_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.h)
- SET(SOURCE_FILES ${SOURCE_FILES} ${src_files}) #append
- SET(SOURCE_INCLUDE_FILES ${SOURCE_INCLUDE_FILES} ${incl_files})
- INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
-ENDFOREACH(subdir)
-
-INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
-
-# SET(FINDCUDA_DIR ${CMAKE_CURRENT_SOURCE_DIR}/cuda-cmake)
-
-SUBDIRS (sharedTarget)
diff --git a/platforms/cuda-old/include/CudaKernelFactory.h b/platforms/cuda-old/include/CudaKernelFactory.h
deleted file mode 100644
index a6e04eb7b2b5a67667752a2a15c738a56e9c82d6..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/include/CudaKernelFactory.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef OPENMM_CUDAKERNELFACTORY_H_
-#define OPENMM_CUDAKERNELFACTORY_H_
-
-/* -------------------------------------------------------------------------- *
- * OpenMM *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from *
- * Simbios, the NIH National Center for Physics-Based Simulation of *
- * Biological Structures at Stanford, funded under the NIH Roadmap for *
- * Medical Research, grant U54 GM072970. See https://simtk.org. *
- * *
- * Portions copyright (c) 2008 Stanford University and the Authors. *
- * Authors: Peter Eastman *
- * Contributors: *
- * *
- * This program is free software: you can redistribute it and/or modify *
- * it under the terms of the GNU Lesser General Public License as published *
- * by the Free Software Foundation, either version 3 of the License, or *
- * (at your option) any later version. *
- * *
- * This program is distributed in the hope that it will be useful, *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
- * GNU Lesser General Public License for more details. *
- * *
- * You should have received a copy of the GNU Lesser General Public License *
- * along with this program. If not, see . *
- * -------------------------------------------------------------------------- */
-
-#include "openmm/KernelFactory.h"
-#include "windowsExportCuda.h"
-
-namespace OpenMM {
-
-/**
- * This KernelFactory creates all kernels for CudaPlatform.
- */
-
-class CudaKernelFactory : public KernelFactory {
-public:
- OPENMMCUDA_EXPORT KernelImpl* createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const;
-};
-
-} // namespace OpenMM
-
-#endif /*OPENMM_CUDAKERNELFACTORY_H_*/
diff --git a/platforms/cuda-old/include/CudaPlatform.h b/platforms/cuda-old/include/CudaPlatform.h
deleted file mode 100644
index 91dad2d89908a7a48adf8a0ac7f095f8cda8033f..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/include/CudaPlatform.h
+++ /dev/null
@@ -1,88 +0,0 @@
-#ifndef OPENMM_CUDAPLATFORM_H_
-#define OPENMM_CUDAPLATFORM_H_
-
-/* -------------------------------------------------------------------------- *
- * OpenMM *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from *
- * Simbios, the NIH National Center for Physics-Based Simulation of *
- * Biological Structures at Stanford, funded under the NIH Roadmap for *
- * Medical Research, grant U54 GM072970. See https://simtk.org. *
- * *
- * Portions copyright (c) 2008 Stanford University and the Authors. *
- * Authors: Peter Eastman *
- * Contributors: *
- * *
- * This program is free software: you can redistribute it and/or modify *
- * it under the terms of the GNU Lesser General Public License as published *
- * by the Free Software Foundation, either version 3 of the License, or *
- * (at your option) any later version. *
- * *
- * This program is distributed in the hope that it will be useful, *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
- * GNU Lesser General Public License for more details. *
- * *
- * You should have received a copy of the GNU Lesser General Public License *
- * along with this program. If not, see . *
- * -------------------------------------------------------------------------- */
-
-#include "openmm/Platform.h"
-#include "windowsExportCuda.h"
-
-struct _gpuContext;
-
-namespace OpenMM {
-
-/**
- * This Platform subclass uses CUDA implementations of the OpenMM kernels to run on NVidia GPUs.
- */
-
-class OPENMMCUDA_EXPORT CudaPlatform : public Platform {
-public:
- class PlatformData;
- CudaPlatform();
- const std::string& getName() const {
- static const std::string name = "Cuda";
- return name;
- }
- double getSpeed() const {
- return 50;
- }
- bool supportsDoublePrecision() const;
- const std::string& getPropertyValue(const Context& context, const std::string& property) const;
- void setPropertyValue(Context& context, const std::string& property, const std::string& value) const;
- void contextCreated(ContextImpl& context, const std::map& properties) const;
- void contextDestroyed(ContextImpl& context) const;
- /**
- * This is the name of the parameter for selecting which CUDA device to use.
- */
- static const std::string& CudaDevice() {
- static const std::string key = "CudaDevice";
- return key;
- }
- /**
- * This is the name of the parameter for selecting whether CUDA should sync or spin loop while waiting for results.
- */
- static const std::string& CudaUseBlockingSync() {
- static const std::string key = "CudaUseBlockingSync";
- return key;
- }
-};
-
-class CudaPlatform::PlatformData {
-public:
- OPENMMCUDA_EXPORT PlatformData(_gpuContext* gpu);
- _gpuContext* gpu;
- bool removeCM;
- bool hasBonds, hasAngles, hasPeriodicTorsions, hasRB, hasNonbonded, hasCustomNonbonded;
- int nonbondedMethod, customNonbondedMethod;
- int cmMotionFrequency;
- int stepCount, computeForceCount;
- double time, ewaldSelfEnergy, dispersionCoefficient;
- std::map propertyValues;
-};
-
-} // namespace OpenMM
-
-#endif /*OPENMM_CUDAPLATFORM_H_*/
diff --git a/platforms/cuda-old/include/windowsExportCuda.h b/platforms/cuda-old/include/windowsExportCuda.h
deleted file mode 100644
index df8011b903c0e7c850174207755e6526aeba4148..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/include/windowsExportCuda.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef OPENMM_WINDOWSEXPORTCUDA_H_
-#define OPENMM_WINDOWSEXPORTCUDA_H_
-
-/*
- * Shared libraries are messy in Visual Studio. We have to distinguish three
- * cases:
- * (1) this header is being used to build the OpenMM shared library
- * (dllexport)
- * (2) this header is being used by a *client* of the OpenMM shared
- * library (dllimport)
- * (3) we are building the OpenMM static library, or the client is
- * being compiled with the expectation of linking with the
- * OpenMM static library (nothing special needed)
- * In the CMake script for building this library, we define one of the symbols
- * OpenMMCUDA_BUILDING_{SHARED|STATIC}_LIBRARY
- * Client code normally has no special symbol defined, in which case we'll
- * assume it wants to use the shared library. However, if the client defines
- * the symbol OPENMM_USE_STATIC_LIBRARIES we'll suppress the dllimport so
- * that the client code can be linked with static libraries. Note that
- * the client symbol is not library dependent, while the library symbols
- * affect only the OpenMM library, meaning that other libraries can
- * be clients of this one. However, we are assuming all-static or all-shared.
- */
-
-#ifdef _MSC_VER
- // We don't want to hear about how sprintf is "unsafe".
- #pragma warning(disable:4996)
- // Keep MS VC++ quiet about lack of dll export of private members.
- #pragma warning(disable:4251)
- #if defined(OPENMMCUDA_BUILDING_SHARED_LIBRARY)
- #define OPENMMCUDA_EXPORT __declspec(dllexport)
- #elif defined(OPENMMCUDA_BUILDING_STATIC_LIBRARY) || defined(OPENMMCUDA_USE_STATIC_LIBRARIES)
- #define OPENMMCUDA_EXPORT
- #else
- #define OPENMMCUDA_EXPORT __declspec(dllimport) // i.e., a client of a shared library
- #endif
-#else
- #define OPENMMCUDA_EXPORT // Linux, Mac
-#endif
-
-#endif // OPENMM_WINDOWSEXPORTCUDA_H_
diff --git a/platforms/cuda-old/sharedTarget/CMakeLists.txt b/platforms/cuda-old/sharedTarget/CMakeLists.txt
deleted file mode 100644
index 8f99886b7b32a5d61883ed9e3bed2e67d0e9f00a..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/sharedTarget/CMakeLists.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-#
-# Include CUDA related files.
-#
-# INCLUDE(${FINDCUDA_DIR}/FindCuda.cmake)
-INCLUDE_DIRECTORIES(${CUDA_INCLUDE})
-LINK_DIRECTORIES(${CUDA_TARGET_LINK})
-FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
- FILE(GLOB src_files ${CMAKE_SOURCE_DIR}/platforms/cuda/${subdir}/src/*.cu ${CMAKE_SOURCE_DIR}/platforms/cuda/${subdir}/src/*/*.cu)
- SET(SOURCE_FILES ${SOURCE_FILES} ${src_files})
- CUDA_INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/cuda/${subdir}/include)
- CUDA_INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/cuda/${subdir}/src)
-ENDFOREACH(subdir)
-CUDA_INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/jama/include)
-CUDA_INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/openmmapi/include)
-
-IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
- SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}_d)
-ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
- SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
-ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
-
-IF(APPLE AND CMAKE_OSX_ARCHITECTURES AND CMAKE_OSX_ARCHITECTURES MATCHES .*i386.* AND CMAKE_OSX_ARCHITECTURES MATCHES .*x86_64.*)
- # NVCC doesn't know how to build universal binaries, so we need to build two separate versions.
-
- SET(BASE_FLAGS ${CUDA_NVCC_FLAGS})
- SET(CMAKE_OSX_ARCHITECTURES i386)
- SET(CUDA_NVCC_FLAGS ${BASE_FLAGS} -m32)
- CUDA_ADD_LIBRARY("${SHARED_TARGET}32" SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
- TARGET_LINK_LIBRARIES(${SHARED_TARGET}32 ${MAIN_OPENMM_LIB} ${CUFFT_TARGET_LINK})
- SET_TARGET_PROPERTIES(${SHARED_TARGET}32 PROPERTIES COMPILE_FLAGS "-DOPENMMCUDA_BUILDING_SHARED_LIBRARY")
- SET(CMAKE_OSX_ARCHITECTURES x86_64)
- SET(CUDA_NVCC_FLAGS ${BASE_FLAGS} -m64)
- CUDA_ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
- TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${CUFFT_TARGET_LINK})
- SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMMCUDA_BUILDING_SHARED_LIBRARY")
- ADD_DEPENDENCIES(${SHARED_TARGET} "${SHARED_TARGET}32")
-
- # Join them into a single universal binary.
-
- ADD_CUSTOM_COMMAND(
- TARGET ${SHARED_TARGET}
- POST_BUILD
- COMMAND /usr/bin/lipo lib${SHARED_TARGET}.dylib lib${SHARED_TARGET}32.dylib -create -output lib${SHARED_TARGET}.dylib
- WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
- COMMENT "Creating universal binary")
-ELSE(APPLE AND CMAKE_OSX_ARCHITECTURES AND CMAKE_OSX_ARCHITECTURES MATCHES .*i386.* AND CMAKE_OSX_ARCHITECTURES MATCHES .*x86_64.*)
- CUDA_ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
- TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${CUFFT_TARGET_LINK})
- SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMMCUDA_BUILDING_SHARED_LIBRARY")
-ENDIF(APPLE AND CMAKE_OSX_ARCHITECTURES AND CMAKE_OSX_ARCHITECTURES MATCHES .*i386.* AND CMAKE_OSX_ARCHITECTURES MATCHES .*x86_64.*)
-
-INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
diff --git a/platforms/cuda-old/src/CudaForceInfo.cpp b/platforms/cuda-old/src/CudaForceInfo.cpp
deleted file mode 100644
index bc138e0b3831f97d28022a1052f1f215cb159721..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/src/CudaForceInfo.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/* -------------------------------------------------------------------------- *
- * OpenMM *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from *
- * Simbios, the NIH National Center for Physics-Based Simulation of *
- * Biological Structures at Stanford, funded under the NIH Roadmap for *
- * Medical Research, grant U54 GM072970. See https://simtk.org. *
- * *
- * Portions copyright (c) 2009 Stanford University and the Authors. *
- * Authors: Peter Eastman *
- * Contributors: *
- * *
- * This program is free software: you can redistribute it and/or modify *
- * it under the terms of the GNU Lesser General Public License as published *
- * by the Free Software Foundation, either version 3 of the License, or *
- * (at your option) any later version. *
- * *
- * This program is distributed in the hope that it will be useful, *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
- * GNU Lesser General Public License for more details. *
- * *
- * You should have received a copy of the GNU Lesser General Public License *
- * along with this program. If not, see . *
- * -------------------------------------------------------------------------- */
-
-#include "CudaForceInfo.h"
-
-using namespace OpenMM;
-using namespace std;
-
-bool CudaForceInfo::areParticlesIdentical(int particle1, int particle2) {
- return true;
-}
-
-int CudaForceInfo::getNumParticleGroups() {
- return 0;
-}
-
-void CudaForceInfo::getParticlesInGroup(int index, vector& particles) {
- return;
-}
-
-bool CudaForceInfo::areGroupsIdentical(int group1, int group2) {
- return true;
-}
diff --git a/platforms/cuda-old/src/CudaForceInfo.h b/platforms/cuda-old/src/CudaForceInfo.h
deleted file mode 100644
index a5652cabcfd9b26d90521fa6111103dfdcb0c1e4..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/src/CudaForceInfo.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#ifndef OPENMM_CUDAFORCEINFO_H_
-#define OPENMM_CUDAFORCEINFO_H_
-
-/* -------------------------------------------------------------------------- *
- * OpenMM *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from *
- * Simbios, the NIH National Center for Physics-Based Simulation of *
- * Biological Structures at Stanford, funded under the NIH Roadmap for *
- * Medical Research, grant U54 GM072970. See https://simtk.org. *
- * *
- * Portions copyright (c) 2009 Stanford University and the Authors. *
- * Authors: Peter Eastman *
- * Contributors: *
- * *
- * This program is free software: you can redistribute it and/or modify *
- * it under the terms of the GNU Lesser General Public License as published *
- * by the Free Software Foundation, either version 3 of the License, or *
- * (at your option) any later version. *
- * *
- * This program is distributed in the hope that it will be useful, *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
- * GNU Lesser General Public License for more details. *
- * *
- * You should have received a copy of the GNU Lesser General Public License *
- * along with this program. If not, see . *
- * -------------------------------------------------------------------------- */
-
-#include "openmm/internal/windowsExport.h"
-#include
-
-namespace OpenMM {
-
-/**
- * This class is used by the Cuda implementation of a Force class to convey information
- * about the behavior and requirements of that force.
- */
-
-class CudaForceInfo {
-public:
- CudaForceInfo() {
- }
- virtual ~CudaForceInfo() {
- }
- /**
- * Get whether or not two particles have identical force field parameters.
- */
- virtual OPENMM_EXPORT bool areParticlesIdentical(int particle1, int particle2);
- /**
- * Get the number of particle groups defined by this force.
- */
- virtual OPENMM_EXPORT int getNumParticleGroups();
- /**
- * Get the list of particles in a particular group.
- */
- virtual OPENMM_EXPORT void getParticlesInGroup(int index, std::vector& particles);
- /**
- * Get whether two particle groups are identical.
- */
- virtual OPENMM_EXPORT bool areGroupsIdentical(int group1, int group2);
-};
-
-} // namespace OpenMM
-
-#endif /*OPENMM_CUDAFORCEINFO_H_*/
diff --git a/platforms/cuda-old/src/CudaKernelFactory.cpp b/platforms/cuda-old/src/CudaKernelFactory.cpp
deleted file mode 100644
index 3dfae2e5192cf85e39b9c0914c3ebe7e9c433624..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/src/CudaKernelFactory.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-/* -------------------------------------------------------------------------- *
- * OpenMM *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from *
- * Simbios, the NIH National Center for Physics-Based Simulation of *
- * Biological Structures at Stanford, funded under the NIH Roadmap for *
- * Medical Research, grant U54 GM072970. See https://simtk.org. *
- * *
- * Portions copyright (c) 2008 Stanford University and the Authors. *
- * Authors: Peter Eastman *
- * Contributors: *
- * *
- * This program is free software: you can redistribute it and/or modify *
- * it under the terms of the GNU Lesser General Public License as published *
- * by the Free Software Foundation, either version 3 of the License, or *
- * (at your option) any later version. *
- * *
- * This program is distributed in the hope that it will be useful, *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
- * GNU Lesser General Public License for more details. *
- * *
- * You should have received a copy of the GNU Lesser General Public License *
- * along with this program. If not, see . *
- * -------------------------------------------------------------------------- */
-
-#include "CudaKernelFactory.h"
-#include "CudaKernels.h"
-#include "openmm/internal/ContextImpl.h"
-#include "openmm/OpenMMException.h"
-
-using namespace OpenMM;
-
-OPENMMCUDA_EXPORT KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
- CudaPlatform::PlatformData& data = *static_cast(context.getPlatformData());
- if (name == CalcForcesAndEnergyKernel::Name())
- return new CudaCalcForcesAndEnergyKernel(name, platform, data);
- if (name == UpdateStateDataKernel::Name())
- return new CudaUpdateStateDataKernel(name, platform, data);
- if (name == ApplyConstraintsKernel::Name())
- return new CudaApplyConstraintsKernel(name, platform, data);
- if (name == VirtualSitesKernel::Name())
- return new CudaVirtualSitesKernel(name, platform);
- if (name == CalcHarmonicBondForceKernel::Name())
- return new CudaCalcHarmonicBondForceKernel(name, platform, data, context.getSystem());
- if (name == CalcCustomBondForceKernel::Name())
- return new CudaCalcCustomBondForceKernel(name, platform, data, context.getSystem());
- if (name == CalcHarmonicAngleForceKernel::Name())
- return new CudaCalcHarmonicAngleForceKernel(name, platform, data, context.getSystem());
- if (name == CalcCustomAngleForceKernel::Name())
- return new CudaCalcCustomAngleForceKernel(name, platform, data, context.getSystem());
- if (name == CalcPeriodicTorsionForceKernel::Name())
- return new CudaCalcPeriodicTorsionForceKernel(name, platform, data, context.getSystem());
- if (name == CalcRBTorsionForceKernel::Name())
- return new CudaCalcRBTorsionForceKernel(name, platform, data, context.getSystem());
- if (name == CalcCMAPTorsionForceKernel::Name())
- return new CudaCalcCMAPTorsionForceKernel(name, platform, data, context.getSystem());
- if (name == CalcCustomTorsionForceKernel::Name())
- return new CudaCalcCustomTorsionForceKernel(name, platform, data, context.getSystem());
- if (name == CalcNonbondedForceKernel::Name())
- return new CudaCalcNonbondedForceKernel(name, platform, data, context.getSystem());
- if (name == CalcCustomNonbondedForceKernel::Name())
- return new CudaCalcCustomNonbondedForceKernel(name, platform, data, context.getSystem());
- if (name == CalcGBSAOBCForceKernel::Name())
- return new CudaCalcGBSAOBCForceKernel(name, platform, data);
- if (name == CalcGBVIForceKernel::Name())
- return new CudaCalcGBVIForceKernel(name, platform, data);
- if (name == CalcCustomExternalForceKernel::Name())
- return new CudaCalcCustomExternalForceKernel(name, platform, data, context.getSystem());
- if (name == IntegrateVerletStepKernel::Name())
- return new CudaIntegrateVerletStepKernel(name, platform, data);
- if (name == IntegrateLangevinStepKernel::Name())
- return new CudaIntegrateLangevinStepKernel(name, platform, data);
- if (name == IntegrateBrownianStepKernel::Name())
- return new CudaIntegrateBrownianStepKernel(name, platform, data);
- if (name == IntegrateVariableVerletStepKernel::Name())
- return new CudaIntegrateVariableVerletStepKernel(name, platform, data);
- if (name == IntegrateVariableLangevinStepKernel::Name())
- return new CudaIntegrateVariableLangevinStepKernel(name, platform, data);
- if (name == ApplyAndersenThermostatKernel::Name())
- return new CudaApplyAndersenThermostatKernel(name, platform, data);
- if (name == ApplyMonteCarloBarostatKernel::Name())
- return new CudaApplyMonteCarloBarostatKernel(name, platform, data);
- if (name == CalcKineticEnergyKernel::Name())
- return new CudaCalcKineticEnergyKernel(name, platform, data);
- if (name == RemoveCMMotionKernel::Name())
- return new CudaRemoveCMMotionKernel(name, platform, data);
- throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
-}
diff --git a/platforms/cuda-old/src/CudaKernels.cpp b/platforms/cuda-old/src/CudaKernels.cpp
deleted file mode 100644
index 208713a1dad05579698a6e489e5ef3e70cb30999..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/src/CudaKernels.cpp
+++ /dev/null
@@ -1,1585 +0,0 @@
-/* -------------------------------------------------------------------------- *
- * OpenMM *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from *
- * Simbios, the NIH National Center for Physics-Based Simulation of *
- * Biological Structures at Stanford, funded under the NIH Roadmap for *
- * Medical Research, grant U54 GM072970. See https://simtk.org. *
- * *
- * Portions copyright (c) 2008-2009 Stanford University and the Authors. *
- * Authors: Peter Eastman *
- * Contributors: *
- * *
- * This program is free software: you can redistribute it and/or modify *
- * it under the terms of the GNU Lesser General Public License as published *
- * by the Free Software Foundation, either version 3 of the License, or *
- * (at your option) any later version. *
- * *
- * This program is distributed in the hope that it will be useful, *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
- * GNU Lesser General Public License for more details. *
- * *
- * You should have received a copy of the GNU Lesser General Public License *
- * along with this program. If not, see . *
- * -------------------------------------------------------------------------- */
-
-#include "CudaKernels.h"
-#include "CudaForceInfo.h"
-#include "openmm/LangevinIntegrator.h"
-#include "openmm/Context.h"
-#include "openmm/OpenMMException.h"
-#include "openmm/internal/AndersenThermostatImpl.h"
-#include "openmm/internal/CMAPTorsionForceImpl.h"
-#include "openmm/internal/ContextImpl.h"
-#include "openmm/internal/NonbondedForceImpl.h"
-#include "kernels/gputypes.h"
-#include "kernels/cudaKernels.h"
-#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
-#include
-
-extern "C" int OPENMMCUDA_EXPORT gpuSetConstants( gpuContext gpu );
-
-using namespace OpenMM;
-using namespace std;
-
-void CudaCalcForcesAndEnergyKernel::initialize(const System& system) {
-}
-
-void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
- _gpuContext* gpu = data.gpu;
- if (data.nonbondedMethod != NO_CUTOFF && data.computeForceCount%100 == 0)
- gpuReorderAtoms(gpu);
- if ((data.hasNonbonded && data.nonbondedMethod != NO_CUTOFF && data.nonbondedMethod != CUTOFF) ||
- (data.hasCustomNonbonded && data.customNonbondedMethod != NO_CUTOFF && data.customNonbondedMethod != CUTOFF)) {
- double minAllowedSize = 1.999999*gpu->sim.nonbondedCutoff;
- if (gpu->sim.periodicBoxSizeX < minAllowedSize || gpu->sim.periodicBoxSizeY < minAllowedSize || gpu->sim.periodicBoxSizeZ < minAllowedSize)
- throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff.");
- }
- data.computeForceCount++;
- if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
- kClearBornSumAndForces(gpu);
- else if (includeForces)
- kClearForces(gpu);
- if (includeEnergy)
- kClearEnergy(gpu);
-}
-
-double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
- _gpuContext* gpu = data.gpu;
- if (gpu->bIncludeGBSA || gpu->bIncludeGBVI) {
- gpu->bRecalculateBornRadii = true;
- kCalculateCDLJObcGbsaForces1(gpu);
- kReduceObcGbsaBornForces(gpu);
- if (gpu->bIncludeGBSA ) {
- kCalculateObcGbsaForces2(gpu);
- } else {
- kCalculateGBVIForces2(gpu);
- }
- }
- else if (data.hasNonbonded)
- kCalculateCDLJForces(gpu);
- if (data.hasCustomNonbonded)
- kCalculateCustomNonbondedForces(gpu, data.hasNonbonded);
- kCalculateLocalForces(gpu);
- if (includeForces)
- kReduceForces(gpu);
- double energy = 0.0;
- if (includeEnergy) {
- energy = kReduceEnergy(gpu)+data.ewaldSelfEnergy;
- if (data.dispersionCoefficient != 0.0)
- energy += data.dispersionCoefficient/(gpu->sim.periodicBoxSizeX*gpu->sim.periodicBoxSizeY*gpu->sim.periodicBoxSizeZ);
- }
- return energy;
-}
-
-void CudaUpdateStateDataKernel::initialize(const System& system) {
-}
-
-double CudaUpdateStateDataKernel::getTime(const ContextImpl& context) const {
- return data.time;
-}
-
-void CudaUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
- data.time = time;
-}
-
-void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, std::vector& positions) {
- _gpuContext* gpu = data.gpu;
- gpu->psPosq4->Download();
- int* order = gpu->psAtomIndex->_pSysData;
- int numParticles = context.getSystem().getNumParticles();
- positions.resize(numParticles);
- for (int i = 0; i < numParticles; ++i) {
- float4 pos = (*gpu->psPosq4)[i];
- int3 offset = gpu->posCellOffsets[i];
- positions[order[i]] = Vec3(pos.x-offset.x*gpu->sim.periodicBoxSizeX, pos.y-offset.y*gpu->sim.periodicBoxSizeY, pos.z-offset.z*gpu->sim.periodicBoxSizeZ);
- }
-}
-
-void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const std::vector& positions) {
- _gpuContext* gpu = data.gpu;
- int* order = gpu->psAtomIndex->_pSysData;
- int numParticles = context.getSystem().getNumParticles();
- for (int i = 0; i < numParticles; ++i) {
- float4& pos = (*gpu->psPosq4)[i];
- const Vec3& p = positions[order[i]];
- pos.x = (float) p[0];
- pos.y = (float) p[1];
- pos.z = (float) p[2];
- }
- gpu->psPosq4->Upload();
- for (int i = 0; i < (int) gpu->posCellOffsets.size(); i++)
- gpu->posCellOffsets[i] = make_int3(0, 0, 0);
-}
-
-void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, std::vector& velocities) {
- _gpuContext* gpu = data.gpu;
- gpu->psVelm4->Download();
- int* order = gpu->psAtomIndex->_pSysData;
- int numParticles = context.getSystem().getNumParticles();
- velocities.resize(numParticles);
- for (int i = 0; i < numParticles; ++i) {
- float4 vel = (*gpu->psVelm4)[i];
- velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
- }
-}
-
-void CudaUpdateStateDataKernel::setVelocities(ContextImpl& context, const std::vector& velocities) {
- _gpuContext* gpu = data.gpu;
- int* order = gpu->psAtomIndex->_pSysData;
- int numParticles = context.getSystem().getNumParticles();
- for (int i = 0; i < numParticles; ++i) {
- float4& vel = (*gpu->psVelm4)[i];
- const Vec3& v = velocities[order[i]];
- vel.x = (float) v[0];
- vel.y = (float) v[1];
- vel.z = (float) v[2];
- }
- gpu->psVelm4->Upload();
-}
-
-void CudaUpdateStateDataKernel::getForces(ContextImpl& context, std::vector& forces) {
- _gpuContext* gpu = data.gpu;
- int* order = gpu->psAtomIndex->_pSysData;
- gpu->psForce4->Download();
- int numParticles = context.getSystem().getNumParticles();
- forces.resize(numParticles);
- for (int i = 0; i < numParticles; ++i) {
- float4 force = (*gpu->psForce4)[i];
- forces[order[i]] = Vec3(force.x, force.y, force.z);
- }
-}
-
-void CudaUpdateStateDataKernel::getPeriodicBoxVectors(ContextImpl& context, Vec3& a, Vec3& b, Vec3& c) const {
- _gpuContext* gpu = data.gpu;
- a = Vec3(gpu->sim.periodicBoxSizeX, 0, 0);
- b = Vec3(0, gpu->sim.periodicBoxSizeY, 0);
- c = Vec3(0, 0, gpu->sim.periodicBoxSizeZ);
-}
-
-void CudaUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) const {
- _gpuContext* gpu = data.gpu;
- gpuSetPeriodicBoxSize(gpu, a[0], b[1], c[2]);
- gpuSetConstants(gpu);
-}
-
-void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) {
- throw OpenMMException("CudaPlatform does not support checkpointing");
-}
-
-void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) {
- throw OpenMMException("CudaPlatform does not support checkpointing");
-}
-
-void CudaApplyConstraintsKernel::initialize(const System& system) {
-}
-
-void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
- kApplyConstraints(data.gpu);
-}
-
-void CudaVirtualSitesKernel::initialize(const System& system) {
-}
-
-void CudaVirtualSitesKernel::computePositions(ContextImpl& context) {
-}
-
-class CudaCalcHarmonicBondForceKernel::ForceInfo : public CudaForceInfo {
-public:
- ForceInfo(const HarmonicBondForce& force) : force(force) {
- }
- int getNumParticleGroups() {
- return force.getNumBonds();
- }
- void getParticlesInGroup(int index, std::vector& particles) {
- int particle1, particle2;
- double length, k;
- force.getBondParameters(index, particle1, particle2, length, k);
- particles.resize(2);
- particles[0] = particle1;
- particles[1] = particle2;
- }
- bool areGroupsIdentical(int group1, int group2) {
- int particle1, particle2;
- double length1, length2, k1, k2;
- force.getBondParameters(group1, particle1, particle2, length1, k1);
- force.getBondParameters(group2, particle1, particle2, length2, k2);
- return (length1 == length2 && k1 == k2);
- }
-private:
- const HarmonicBondForce& force;
-};
-
-CudaCalcHarmonicBondForceKernel::~CudaCalcHarmonicBondForceKernel() {
-}
-
-void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
- data.hasBonds = true;
- numBonds = force.getNumBonds();
- vector particle1(numBonds);
- vector particle2(numBonds);
- vector length(numBonds);
- vector k(numBonds);
- for (int i = 0; i < numBonds; i++) {
- double lengthValue, kValue;
- force.getBondParameters(i, particle1[i], particle2[i], lengthValue, kValue);
- length[i] = (float) lengthValue;
- k[i] = (float) kValue;
- }
- gpuSetBondParameters(data.gpu, particle1, particle2, length, k);
- data.gpu->forces.push_back(new ForceInfo(force));
-}
-
-double CudaCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
- return 0.0;
-}
-
-void CudaCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force) {
- throw OpenMMException("CudaPlatform does not support copyParametersToContext");
-}
-
-class CudaCalcCustomBondForceKernel::ForceInfo : public CudaForceInfo {
-public:
- ForceInfo(const CustomBondForce& force) : force(force) {
- }
- int getNumParticleGroups() {
- return force.getNumBonds();
- }
- void getParticlesInGroup(int index, std::vector& particles) {
- int particle1, particle2;
- vector parameters;
- force.getBondParameters(index, particle1, particle2, parameters);
- particles.resize(2);
- particles[0] = particle1;
- particles[1] = particle2;
- }
- bool areGroupsIdentical(int group1, int group2) {
- int particle1, particle2;
- vector parameters1, parameters2;
- force.getBondParameters(group1, particle1, particle2, parameters1);
- force.getBondParameters(group2, particle1, particle2, parameters2);
- for (int i = 0; i < (int) parameters1.size(); i++)
- if (parameters1[i] != parameters2[i])
- return false;
- return true;
- }
-private:
- const CustomBondForce& force;
-};
-
-CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() {
-}
-
-void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
- numBonds = force.getNumBonds();
- vector particle1(numBonds);
- vector particle2(numBonds);
- vector > params(numBonds);
- for (int i = 0; i < numBonds; i++)
- force.getBondParameters(i, particle1[i], particle2[i], params[i]);
- vector paramNames;
- for (int i = 0; i < force.getNumPerBondParameters(); i++)
- paramNames.push_back(force.getPerBondParameterName(i));
- globalParamNames.resize(force.getNumGlobalParameters());
- globalParamValues.resize(force.getNumGlobalParameters());
- for (int i = 0; i < force.getNumGlobalParameters(); i++) {
- globalParamNames[i] = force.getGlobalParameterName(i);
- globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
- }
- gpuSetCustomBondParameters(data.gpu, particle1, particle2, params, force.getEnergyFunction(), paramNames, globalParamNames);
- if (globalParamValues.size() > 0)
- SetCustomBondGlobalParams(globalParamValues);
- data.gpu->forces.push_back(new ForceInfo(force));
-}
-
-double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
- updateGlobalParams(context);
- kCalculateCustomBondForces(data.gpu);
- return 0.0;
-}
-
-void CudaCalcCustomBondForceKernel::updateGlobalParams(ContextImpl& context) {
- bool changed = false;
- for (int i = 0; i < (int) globalParamNames.size(); i++) {
- float value = (float) context.getParameter(globalParamNames[i]);
- if (value != globalParamValues[i])
- changed = true;
- globalParamValues[i] = value;
- }
- if (changed)
- SetCustomBondGlobalParams(globalParamValues);
-}
-
-void CudaCalcCustomBondForceKernel::copyParametersToContext(ContextImpl& context, const CustomBondForce& force) {
- throw OpenMMException("CudaPlatform does not support copyParametersToContext");
-}
-
-class CudaCalcHarmonicAngleForceKernel::ForceInfo : public CudaForceInfo {
-public:
- ForceInfo(const HarmonicAngleForce& force) : force(force) {
- }
- int getNumParticleGroups() {
- return force.getNumAngles();
- }
- void getParticlesInGroup(int index, std::vector& particles) {
- int particle1, particle2, particle3;
- double angle, k;
- force.getAngleParameters(index, particle1, particle2, particle3, angle, k);
- particles.resize(3);
- particles[0] = particle1;
- particles[1] = particle2;
- particles[2] = particle3;
- }
- bool areGroupsIdentical(int group1, int group2) {
- int particle1, particle2, particle3;
- double angle1, angle2, k1, k2;
- force.getAngleParameters(group1, particle1, particle2, particle3, angle1, k1);
- force.getAngleParameters(group2, particle1, particle2, particle3, angle2, k2);
- return (angle1 == angle2 && k1 == k2);
- }
-private:
- const HarmonicAngleForce& force;
-};
-
-CudaCalcHarmonicAngleForceKernel::~CudaCalcHarmonicAngleForceKernel() {
-}
-
-void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
- data.hasAngles = true;
- numAngles = force.getNumAngles();
- const float RadiansToDegrees = (float) (180.0/3.14159265);
- vector particle1(numAngles);
- vector particle2(numAngles);
- vector particle3(numAngles);
- vector angle(numAngles);
- vector k(numAngles);
- for (int i = 0; i < numAngles; i++) {
- double angleValue, kValue;
- force.getAngleParameters(i, particle1[i], particle2[i], particle3[i], angleValue, kValue);
- angle[i] = (float) (angleValue*RadiansToDegrees);
- k[i] = (float) kValue;
- }
- gpuSetBondAngleParameters(data.gpu, particle1, particle2, particle3, angle, k);
- data.gpu->forces.push_back(new ForceInfo(force));
-}
-
-double CudaCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
- return 0.0;
-}
-
-void CudaCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force) {
- throw OpenMMException("CudaPlatform does not support copyParametersToContext");
-}
-
-class CudaCalcCustomAngleForceKernel::ForceInfo : public CudaForceInfo {
-public:
- ForceInfo(const CustomAngleForce& force) : force(force) {
- }
- int getNumParticleGroups() {
- return force.getNumAngles();
- }
- void getParticlesInGroup(int index, std::vector& particles) {
- int particle1, particle2, particle3;
- vector parameters;
- force.getAngleParameters(index, particle1, particle2, particle3, parameters);
- particles.resize(3);
- particles[0] = particle1;
- particles[1] = particle2;
- particles[2] = particle3;
- }
- bool areGroupsIdentical(int group1, int group2) {
- int particle1, particle2, particle3;
- vector parameters1, parameters2;
- force.getAngleParameters(group1, particle1, particle2, particle3, parameters1);
- force.getAngleParameters(group2, particle1, particle2, particle3, parameters2);
- for (int i = 0; i < (int) parameters1.size(); i++)
- if (parameters1[i] != parameters2[i])
- return false;
- return true;
- }
-private:
- const CustomAngleForce& force;
-};
-
-CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() {
-}
-
-void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
- numAngles = force.getNumAngles();
- vector particle1(numAngles);
- vector particle2(numAngles);
- vector particle3(numAngles);
- vector > params(numAngles);
- for (int i = 0; i < numAngles; i++)
- force.getAngleParameters(i, particle1[i], particle2[i], particle3[i], params[i]);
- vector paramNames;
- for (int i = 0; i < force.getNumPerAngleParameters(); i++)
- paramNames.push_back(force.getPerAngleParameterName(i));
- globalParamNames.resize(force.getNumGlobalParameters());
- globalParamValues.resize(force.getNumGlobalParameters());
- for (int i = 0; i < force.getNumGlobalParameters(); i++) {
- globalParamNames[i] = force.getGlobalParameterName(i);
- globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
- }
- gpuSetCustomAngleParameters(data.gpu, particle1, particle2, particle3, params, force.getEnergyFunction(), paramNames, globalParamNames);
- if (globalParamValues.size() > 0)
- SetCustomAngleGlobalParams(globalParamValues);
- data.gpu->forces.push_back(new ForceInfo(force));
-}
-
-double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
- updateGlobalParams(context);
- kCalculateCustomAngleForces(data.gpu);
- return 0.0;
-}
-
-void CudaCalcCustomAngleForceKernel::updateGlobalParams(ContextImpl& context) {
- bool changed = false;
- for (int i = 0; i < (int) globalParamNames.size(); i++) {
- float value = (float) context.getParameter(globalParamNames[i]);
- if (value != globalParamValues[i])
- changed = true;
- globalParamValues[i] = value;
- }
- if (changed)
- SetCustomAngleGlobalParams(globalParamValues);
-}
-
-void CudaCalcCustomAngleForceKernel::copyParametersToContext(ContextImpl& context, const CustomAngleForce& force) {
- throw OpenMMException("CudaPlatform does not support copyParametersToContext");
-}
-
-class CudaCalcPeriodicTorsionForceKernel::ForceInfo : public CudaForceInfo {
-public:
- ForceInfo(const PeriodicTorsionForce& force) : force(force) {
- }
- int getNumParticleGroups() {
- return force.getNumTorsions();
- }
- void getParticlesInGroup(int index, std::vector& particles) {
- int particle1, particle2, particle3, particle4, periodicity;
- double phase, k;
- force.getTorsionParameters(index, particle1, particle2, particle3, particle4, periodicity, phase, k);
- particles.resize(4);
- particles[0] = particle1;
- particles[1] = particle2;
- particles[2] = particle3;
- particles[3] = particle4;
- }
- bool areGroupsIdentical(int group1, int group2) {
- int particle1, particle2, particle3, particle4, periodicity1, periodicity2;
- double phase1, phase2, k1, k2;
- force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, periodicity1, phase1, k1);
- force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, periodicity2, phase2, k2);
- return (periodicity1 == periodicity2 && phase1 == phase2 && k1 == k2);
- }
-private:
- const PeriodicTorsionForce& force;
-};
-
-CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() {
-}
-
-void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
- data.hasPeriodicTorsions = true;
- numTorsions = force.getNumTorsions();
- const float RadiansToDegrees = (float)(180.0/3.14159265);
- vector particle1(numTorsions);
- vector particle2(numTorsions);
- vector particle3(numTorsions);
- vector particle4(numTorsions);
- vector k(numTorsions);
- vector phase(numTorsions);
- vector periodicity(numTorsions);
- for (int i = 0; i < numTorsions; i++) {
- double kValue, phaseValue;
- force.getTorsionParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], periodicity[i], phaseValue, kValue);
- k[i] = (float) kValue;
- phase[i] = (float) (phaseValue*RadiansToDegrees);
- }
- gpuSetDihedralParameters(data.gpu, particle1, particle2, particle3, particle4, k, phase, periodicity);
- data.gpu->forces.push_back(new ForceInfo(force));
-}
-
-double CudaCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
- return 0.0;
-}
-
-void CudaCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force) {
- throw OpenMMException("CudaPlatform does not support copyParametersToContext");
-}
-
-class CudaCalcRBTorsionForceKernel::ForceInfo : public CudaForceInfo {
-public:
- ForceInfo(const RBTorsionForce& force) : force(force) {
- }
- int getNumParticleGroups() {
- return force.getNumTorsions();
- }
- void getParticlesInGroup(int index, std::vector& particles) {
- int particle1, particle2, particle3, particle4;
- double c0, c1, c2, c3, c4, c5;
- force.getTorsionParameters(index, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
- particles.resize(4);
- particles[0] = particle1;
- particles[1] = particle2;
- particles[2] = particle3;
- particles[3] = particle4;
- }
- bool areGroupsIdentical(int group1, int group2) {
- int particle1, particle2, particle3, particle4;
- double c0a, c0b, c1a, c1b, c2a, c2b, c3a, c3b, c4a, c4b, c5a, c5b;
- force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, c0a, c1a, c2a, c3a, c4a, c5a);
- force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, c0b, c1b, c2b, c3b, c4b, c5b);
- return (c0a == c0b && c1a == c1b && c2a == c2b && c3a == c3b && c4a == c4b && c5a == c5b);
- }
-private:
- const RBTorsionForce& force;
-};
-
-CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() {
-}
-
-void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
- data.hasRB = true;
- numTorsions = force.getNumTorsions();
- vector particle1(numTorsions);
- vector particle2(numTorsions);
- vector particle3(numTorsions);
- vector particle4(numTorsions);
- vector c0(numTorsions);
- vector c1(numTorsions);
- vector c2(numTorsions);
- vector c3(numTorsions);
- vector c4(numTorsions);
- vector c5(numTorsions);
- for (int i = 0; i < numTorsions; i++) {
- double c[6];
- force.getTorsionParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], c[0], c[1], c[2], c[3], c[4], c[5]);
- c0[i] = (float) c[0];
- c1[i] = (float) c[1];
- c2[i] = (float) c[2];
- c3[i] = (float) c[3];
- c4[i] = (float) c[4];
- c5[i] = (float) c[5];
- }
- gpuSetRbDihedralParameters(data.gpu, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
- data.gpu->forces.push_back(new ForceInfo(force));
-}
-
-double CudaCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
- return 0.0;
-}
-
-void CudaCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context, const RBTorsionForce& force) {
- throw OpenMMException("CudaPlatform does not support copyParametersToContext");
-}
-
-class CudaCalcCMAPTorsionForceKernel::ForceInfo : public CudaForceInfo {
-public:
- ForceInfo(const CMAPTorsionForce& force) : force(force) {
- }
- int getNumParticleGroups() {
- return force.getNumTorsions();
- }
- void getParticlesInGroup(int index, std::vector& particles) {
- int map, a1, a2, a3, a4, b1, b2, b3, b4;
- force.getTorsionParameters(index, map, a1, a2, a3, a4, b1, b2, b3, b4);
- particles.resize(8);
- particles[0] = a1;
- particles[1] = a2;
- particles[2] = a3;
- particles[3] = a4;
- particles[4] = b1;
- particles[5] = b2;
- particles[6] = b3;
- particles[7] = b4;
- }
- bool areGroupsIdentical(int group1, int group2) {
- int map1, map2, a1, a2, a3, a4, b1, b2, b3, b4;
- force.getTorsionParameters(group1, map1, a1, a2, a3, a4, b1, b2, b3, b4);
- force.getTorsionParameters(group2, map2, a1, a2, a3, a4, b1, b2, b3, b4);
- return (map1 == map2);
- }
-private:
- const CMAPTorsionForce& force;
-};
-
-CudaCalcCMAPTorsionForceKernel::~CudaCalcCMAPTorsionForceKernel() {
- if (coefficients != NULL)
- delete coefficients;
- if (mapPositions != NULL)
- delete mapPositions;
- if (torsionMaps != NULL)
- delete torsionMaps;
- if (torsionIndices != NULL)
- delete torsionIndices;
-}
-
-void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
- numTorsions = force.getNumTorsions();
- if (numTorsions == 0)
- return;
- int numMaps = force.getNumMaps();
- vector coeffVec;
- vector mapPositionsVec(numMaps);
- vector energy;
- vector > c;
- int currentPosition = 0;
- mapPositions = new CUDAStream(numMaps, 1, "cmapTorsionMapPositions");
- for (int i = 0; i < numMaps; i++) {
- int size;
- force.getMapParameters(i, size, energy);
- CMAPTorsionForceImpl::calcMapDerivatives(size, energy, c);
- (*mapPositions)[i] = make_int2(currentPosition, size);
- currentPosition += 4*size*size;
- for (int j = 0; j < size*size; j++) {
- coeffVec.push_back(make_float4(c[j][0], c[j][1], c[j][2], c[j][3]));
- coeffVec.push_back(make_float4(c[j][4], c[j][5], c[j][6], c[j][7]));
- coeffVec.push_back(make_float4(c[j][8], c[j][9], c[j][10], c[j][11]));
- coeffVec.push_back(make_float4(c[j][12], c[j][13], c[j][14], c[j][15]));
- }
- }
- coefficients = new CUDAStream((int) coeffVec.size(), 1, "cmapTorsionCoefficients");;
- for (int i = 0; i < (int) coeffVec.size(); i++)
- (*coefficients)[i] = coeffVec[i];
- torsionMaps = new CUDAStream(numTorsions, 1, "cmapTorsionMaps");
- torsionIndices = new CUDAStream(4*numTorsions, 1, "cmapTorsionIndices");
- vector forceBufferCounter(system.getNumParticles(), 0);
- for (int i = 0; i < numTorsions; i++) {
- int map, a1, a2, a3, a4, b1, b2, b3, b4;
- force.getTorsionParameters(i, map, a1, a2, a3, a4, b1, b2, b3, b4);
- (*torsionMaps)[i] = map;
- (*torsionIndices)[i*4] = make_int4(a1, a2, a3, a4);
- (*torsionIndices)[i*4+1] = make_int4(b1, b2, b3, b4);
- (*torsionIndices)[i*4+2] = make_int4(forceBufferCounter[a1]++, forceBufferCounter[a2]++, forceBufferCounter[a3]++, forceBufferCounter[a4]++);
- (*torsionIndices)[i*4+3] = make_int4(forceBufferCounter[b1]++, forceBufferCounter[b2]++, forceBufferCounter[b3]++, forceBufferCounter[b4]++);
- }
- coefficients->Upload();
- mapPositions->Upload();
- torsionMaps->Upload();
- torsionIndices->Upload();
- int maxBuffers = 1;
- for (int i = 0; i < (int) forceBufferCounter.size(); i++)
- maxBuffers = max(maxBuffers, forceBufferCounter[i]);
- if (maxBuffers > data.gpu->sim.outputBuffers)
- data.gpu->sim.outputBuffers = maxBuffers;
- data.gpu->forces.push_back(new ForceInfo(force));
-}
-
-double CudaCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
- if( numTorsions )
- kCalculateCMAPTorsionForces(data.gpu, *coefficients, *mapPositions, *torsionIndices, *torsionMaps);
- return 0.0;
-}
-
-class CudaCalcCustomTorsionForceKernel::ForceInfo : public CudaForceInfo {
-public:
- ForceInfo(const CustomTorsionForce& force) : force(force) {
- }
- int getNumParticleGroups() {
- return force.getNumTorsions();
- }
- void getParticlesInGroup(int index, std::vector& particles) {
- int particle1, particle2, particle3, particle4;
- vector parameters;
- force.getTorsionParameters(index, particle1, particle2, particle3, particle4, parameters);
- particles.resize(4);
- particles[0] = particle1;
- particles[1] = particle2;
- particles[2] = particle3;
- particles[3] = particle4;
- }
- bool areGroupsIdentical(int group1, int group2) {
- int particle1, particle2, particle3, particle4;
- vector parameters1, parameters2;
- force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, parameters1);
- force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, parameters2);
- for (int i = 0; i < (int) parameters1.size(); i++)
- if (parameters1[i] != parameters2[i])
- return false;
- return true;
- }
-private:
- const CustomTorsionForce& force;
-};
-
-CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() {
-}
-
-void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
- numTorsions = force.getNumTorsions();
- vector particle1(numTorsions);
- vector particle2(numTorsions);
- vector particle3(numTorsions);
- vector particle4(numTorsions);
- vector > params(numTorsions);
- for (int i = 0; i < numTorsions; i++)
- force.getTorsionParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], params[i]);
- vector paramNames;
- for (int i = 0; i < force.getNumPerTorsionParameters(); i++)
- paramNames.push_back(force.getPerTorsionParameterName(i));
- globalParamNames.resize(force.getNumGlobalParameters());
- globalParamValues.resize(force.getNumGlobalParameters());
- for (int i = 0; i < force.getNumGlobalParameters(); i++) {
- globalParamNames[i] = force.getGlobalParameterName(i);
- globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
- }
- gpuSetCustomTorsionParameters(data.gpu, particle1, particle2, particle3, particle4, params, force.getEnergyFunction(), paramNames, globalParamNames);
- if (globalParamValues.size() > 0)
- SetCustomTorsionGlobalParams(globalParamValues);
- data.gpu->forces.push_back(new ForceInfo(force));
-}
-
-double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
- updateGlobalParams(context);
- kCalculateCustomTorsionForces(data.gpu);
- return 0.0;
-}
-
-void CudaCalcCustomTorsionForceKernel::updateGlobalParams(ContextImpl& context) {
- bool changed = false;
- for (int i = 0; i < (int) globalParamNames.size(); i++) {
- float value = (float) context.getParameter(globalParamNames[i]);
- if (value != globalParamValues[i])
- changed = true;
- globalParamValues[i] = value;
- }
- if (changed)
- SetCustomTorsionGlobalParams(globalParamValues);
-}
-
-void CudaCalcCustomTorsionForceKernel::copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force) {
- throw OpenMMException("CudaPlatform does not support copyParametersToContext");
-}
-
-class CudaCalcNonbondedForceKernel::ForceInfo : public CudaForceInfo {
-public:
- ForceInfo(const NonbondedForce& force) : force(force) {
- }
- bool areParticlesIdentical(int particle1, int particle2) {
- double charge1, charge2, sigma1, sigma2, epsilon1, epsilon2;
- force.getParticleParameters(particle1, charge1, sigma1, epsilon1);
- force.getParticleParameters(particle2, charge2, sigma2, epsilon2);
- return (charge1 == charge2 && sigma1 == sigma2 && epsilon1 == epsilon2);
- }
- int getNumParticleGroups() {
- return force.getNumExceptions();
- }
- void getParticlesInGroup(int index, std::vector& particles) {
- int particle1, particle2;
- double chargeProd, sigma, epsilon;
- force.getExceptionParameters(index, particle1, particle2, chargeProd, sigma, epsilon);
- particles.resize(2);
- particles[0] = particle1;
- particles[1] = particle2;
- }
- bool areGroupsIdentical(int group1, int group2) {
- int particle1, particle2;
- double chargeProd1, chargeProd2, sigma1, sigma2, epsilon1, epsilon2;
- force.getExceptionParameters(group1, particle1, particle2, chargeProd1, sigma1, epsilon1);
- force.getExceptionParameters(group2, particle1, particle2, chargeProd2, sigma2, epsilon2);
- return (chargeProd1 == chargeProd2 && sigma1 == sigma2 && epsilon1 == epsilon2);
- }
-private:
- const NonbondedForce& force;
-};
-
-CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
-}
-
-void CudaCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
- data.hasNonbonded = true;
- numParticles = force.getNumParticles();
- _gpuContext* gpu = data.gpu;
-
- // Identify which exceptions are 1-4 interactions.
-
- vector > exclusions;
- vector exceptions;
- for (int i = 0; i < force.getNumExceptions(); i++) {
- int particle1, particle2;
- double chargeProd, sigma, epsilon;
- force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
- exclusions.push_back(pair(particle1, particle2));
- if (chargeProd != 0.0 || epsilon != 0.0)
- exceptions.push_back(i);
- }
-
- // Initialize nonbonded interactions.
-
- {
- vector particle(numParticles);
- vector c6(numParticles);
- vector c12(numParticles);
- vector q(numParticles);
- vector symbol;
- vector > exclusionList(numParticles);
- for (int i = 0; i < numParticles; i++) {
- double charge, radius, depth;
- force.getParticleParameters(i, charge, radius, depth);
- particle[i] = i;
- q[i] = (float) charge;
- c6[i] = (float) (4*depth*pow(radius, 6.0));
- c12[i] = (float) (4*depth*pow(radius, 12.0));
- exclusionList[i].push_back(i);
- }
- for (int i = 0; i < (int)exclusions.size(); i++) {
- exclusionList[exclusions[i].first].push_back(exclusions[i].second);
- exclusionList[exclusions[i].second].push_back(exclusions[i].first);
- }
- CudaNonbondedMethod method = NO_CUTOFF;
- if (force.getNonbondedMethod() != NonbondedForce::NoCutoff) {
- gpuSetNonbondedCutoff(gpu, (float) force.getCutoffDistance(), (float) force.getReactionFieldDielectric());
- method = CUTOFF;
- }
- if (force.getNonbondedMethod() == NonbondedForce::CutoffPeriodic) {
- method = PERIODIC;
- }
- if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
- if (force.getReciprocalSpaceForceGroup() > 0)
- throw OpenMMException("CudaPlatform does not support force groups");
- if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
- double alpha;
- int kmaxx, kmaxy, kmaxz;
- NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz);
- gpuSetEwaldParameters(gpu, (float) alpha, kmaxx, kmaxy, kmaxz);
- method = EWALD;
- }
- else {
- double alpha;
- int gridSizeX, gridSizeY, gridSizeZ;
- NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
- gpuSetPMEParameters(gpu, (float) alpha, gridSizeX, gridSizeY, gridSizeZ);
- method = PARTICLE_MESH_EWALD;
- }
- }
- data.nonbondedMethod = method;
- gpuSetCoulombParameters(gpu, (float) ONE_4PI_EPS0, particle, c6, c12, q, symbol, exclusionList, method);
-
- // Compute the Ewald self energy.
-
- data.ewaldSelfEnergy = 0.0;
- if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
- double selfEnergyScale = gpu->sim.epsfac*gpu->sim.alphaEwald/std::sqrt(PI);
- for (int i = 0; i < numParticles; i++)
- data.ewaldSelfEnergy -= selfEnergyScale*q[i]*q[i];
- }
-
- // Compute the long range dispersion correction.
-
- if (force.getUseDispersionCorrection())
- data.dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
- else
- data.dispersionCoefficient = 0.0;
- }
-
- // Initialize 1-4 nonbonded interactions.
-
- {
- int numExceptions = exceptions.size();
- vector particle1(numExceptions);
- vector particle2(numExceptions);
- vector c6(numExceptions);
- vector c12(numExceptions);
- vector q1(numExceptions);
- vector q2(numExceptions);
- for (int i = 0; i < numExceptions; i++) {
- double charge, sig, eps;
- force.getExceptionParameters(exceptions[i], particle1[i], particle2[i], charge, sig, eps);
- c6[i] = (float) (4*eps*pow(sig, 6.0));
- c12[i] = (float) (4*eps*pow(sig, 12.0));
- q1[i] = (float) charge;
- q2[i] = 1.0f;
- }
- gpuSetLJ14Parameters(gpu, (float) ONE_4PI_EPS0, 1.0f, particle1, particle2, c6, c12, q1, q2);
- }
- data.gpu->forces.push_back(new ForceInfo(force));
-}
-
-double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
- return 0.0;
-}
-
-void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const NonbondedForce& force) {
- throw OpenMMException("CudaPlatform does not support copyParametersToContext");
-}
-
-class CudaCalcCustomNonbondedForceKernel::ForceInfo : public CudaForceInfo {
-public:
- ForceInfo(const CustomNonbondedForce& force) : force(force) {
- }
- bool areParticlesIdentical(int particle1, int particle2) {
- vector params1;
- vector params2;
- force.getParticleParameters(particle1, params1);
- force.getParticleParameters(particle2, params2);
- for (int i = 0; i < (int) params1.size(); i++)
- if (params1[i] != params2[i])
- return false;
- return true;
- }
- int getNumParticleGroups() {
- return force.getNumExclusions();
- }
- void getParticlesInGroup(int index, std::vector& particles) {
- int particle1, particle2;
- force.getExclusionParticles(index, particle1, particle2);
- particles.resize(2);
- particles[0] = particle1;
- particles[1] = particle2;
- }
- bool areGroupsIdentical(int group1, int group2) {
- return true;
- }
-private:
- const CustomNonbondedForce& force;
-};
-
-CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
-}
-
-void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
- data.hasCustomNonbonded = true;
- numParticles = force.getNumParticles();
- _gpuContext* gpu = data.gpu;
-
- // Initialize nonbonded interactions.
-
- vector particle(numParticles);
- vector > parameters(numParticles);
- vector > exclusionList(numParticles);
- for (int i = 0; i < numParticles; i++) {
- force.getParticleParameters(i, parameters[i]);
- particle[i] = i;
- exclusionList[i].push_back(i);
- }
- for (int i = 0; i < force.getNumExclusions(); i++) {
- int particle1, particle2;
- force.getExclusionParticles(i, particle1, particle2);
- exclusionList[particle1].push_back(particle2);
- exclusionList[particle2].push_back(particle1);
- }
- CudaNonbondedMethod method = NO_CUTOFF;
- if (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff)
- method = CUTOFF;
- if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic) {
- method = PERIODIC;
- }
- data.customNonbondedMethod = method;
-
- // Record the tabulated functions.
-
- for (int i = 0; i < force.getNumFunctions(); i++) {
- string name;
- vector values;
- double min, max;
- force.getFunctionParameters(i, name, values, min, max);
- gpuSetTabulatedFunction(gpu, i, name, values, min, max);
- }
-
- // Record information for the expressions.
-
- vector paramNames;
- for (int i = 0; i < force.getNumPerParticleParameters(); i++)
- paramNames.push_back(force.getPerParticleParameterName(i));
- globalParamNames.resize(force.getNumGlobalParameters());
- globalParamValues.resize(force.getNumGlobalParameters());
- for (int i = 0; i < force.getNumGlobalParameters(); i++) {
- globalParamNames[i] = force.getGlobalParameterName(i);
- globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
- }
- gpuSetCustomNonbondedParameters(gpu, parameters, exclusionList, method, (float) force.getCutoffDistance(), force.getEnergyFunction(), paramNames, globalParamNames);
- if (globalParamValues.size() > 0)
- SetCustomNonbondedGlobalParams(globalParamValues);
- data.gpu->forces.push_back(new ForceInfo(force));
-}
-
-double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
- updateGlobalParams(context);
- return 0.0;
-}
-
-void CudaCalcCustomNonbondedForceKernel::updateGlobalParams(ContextImpl& context) {
- bool changed = false;
- for (int i = 0; i < (int) globalParamNames.size(); i++) {
- float value = (float) context.getParameter(globalParamNames[i]);
- if (value != globalParamValues[i])
- changed = true;
- globalParamValues[i] = value;
- }
- if (changed)
- SetCustomNonbondedGlobalParams(globalParamValues);
-}
-
-void CudaCalcCustomNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force) {
- throw OpenMMException("CudaPlatform does not support copyParametersToContext");
-}
-
-class CudaCalcGBSAOBCForceKernel::ForceInfo : public CudaForceInfo {
-public:
- ForceInfo(const GBSAOBCForce& force) : force(force) {
- }
- bool areParticlesIdentical(int particle1, int particle2) {
- double charge1, charge2, radius1, radius2, scale1, scale2;
- force.getParticleParameters(particle1, charge1, radius1, scale1);
- force.getParticleParameters(particle2, charge2, radius2, scale2);
- return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
- }
-private:
- const GBSAOBCForce& force;
-};
-
-CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
-}
-
-void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
-
- int numParticles = system.getNumParticles();
- _gpuContext* gpu = data.gpu;
- vector radius(numParticles);
- vector scale(numParticles);
- vector charge(numParticles);
- for (int i = 0; i < numParticles; i++) {
- double particleCharge, particleRadius, scalingFactor;
- force.getParticleParameters(i, particleCharge, particleRadius, scalingFactor);
- radius[i] = (float) particleRadius;
- scale[i] = (float) scalingFactor;
- charge[i] = (float) particleCharge;
- }
- gpuSetObcParameters(gpu, (float) force.getSoluteDielectric(), (float) force.getSolventDielectric(), radius, scale, charge);
- data.gpu->forces.push_back(new ForceInfo(force));
-}
-
-double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
- return 0.0;
-}
-
-void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force) {
- throw OpenMMException("CudaPlatform does not support copyParametersToContext");
-}
-
-class CudaCalcGBVIForceKernel::ForceInfo : public CudaForceInfo {
-public:
- ForceInfo(const GBVIForce& force) : force(force) {
- }
- bool areParticlesIdentical(int particle1, int particle2) {
- double charge1, charge2, radius1, radius2, gamma1, gamma2;
- force.getParticleParameters(particle1, charge1, radius1, gamma1);
- force.getParticleParameters(particle2, charge2, radius2, gamma2);
- return (charge1 == charge2 && radius1 == radius2 && gamma1 == gamma2);
- }
-private:
- const GBVIForce& force;
-};
-
-CudaCalcGBVIForceKernel::~CudaCalcGBVIForceKernel() {
-}
-
-void CudaCalcGBVIForceKernel::initialize(const System& system, const GBVIForce& force, const std::vector & inputScaledRadii) {
-
- int numParticles = system.getNumParticles();
- _gpuContext* gpu = data.gpu;
-
- vector particle(numParticles);
- vector radius(numParticles);
- vector scaledRadii(numParticles);
- vector gammas(numParticles);
-
- for (int i = 0; i < numParticles; i++) {
- double charge, particleRadius, gamma;
- force.getParticleParameters(i, charge, particleRadius, gamma );
- particle[i] = i;
- radius[i] = (float) particleRadius;
- gammas[i] = (float) gamma;
- scaledRadii[i] = (float) inputScaledRadii[i];
- }
-
- int gbviBornRadiusScalingMethod;
- if( force.getBornRadiusScalingMethod() == GBVIForce::QuinticSpline ){
- gbviBornRadiusScalingMethod = 1;
- } else {
- gbviBornRadiusScalingMethod = 2;
- }
- gpuSetGBVIParameters(gpu, (float) force.getSoluteDielectric(), (float) force.getSolventDielectric(), particle,
- radius, gammas, scaledRadii, gbviBornRadiusScalingMethod,
- static_cast(force.getQuinticLowerLimitFactor()),
- static_cast(force.getQuinticUpperBornRadiusLimit()) );
-
- data.gpu->forces.push_back(new ForceInfo(force));
-}
-
-double CudaCalcGBVIForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
- return 0.0;
-}
-
-class CudaCalcCustomExternalForceKernel::ForceInfo : public CudaForceInfo {
-public:
- ForceInfo(const CustomExternalForce& force, int numParticles) : force(force), indices(numParticles, -1) {
- vector params;
- for (int i = 0; i < force.getNumParticles(); i++) {
- int particle;
- force.getParticleParameters(i, particle, params);
- indices[particle] = i;
- }
- }
- bool areParticlesIdentical(int particle1, int particle2) {
- particle1 = indices[particle1];
- particle2 = indices[particle2];
- if (particle1 == -1 && particle2 == -1)
- return true;
- if (particle1 == -1 || particle2 == -1)
- return false;
- int temp;
- vector params1;
- vector params2;
- force.getParticleParameters(particle1, temp, params1);
- force.getParticleParameters(particle2, temp, params2);
- for (int i = 0; i < (int) params1.size(); i++)
- if (params1[i] != params2[i])
- return false;
- return true;
- }
-private:
- const CustomExternalForce& force;
- vector indices;
-};
-
-CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() {
-}
-
-void CudaCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
- numParticles = force.getNumParticles();
- vector particle(numParticles);
- vector > params(numParticles);
- for (int i = 0; i < numParticles; i++)
- force.getParticleParameters(i, particle[i], params[i]);
- vector paramNames;
- for (int i = 0; i < force.getNumPerParticleParameters(); i++)
- paramNames.push_back(force.getPerParticleParameterName(i));
- globalParamNames.resize(force.getNumGlobalParameters());
- globalParamValues.resize(force.getNumGlobalParameters());
- for (int i = 0; i < force.getNumGlobalParameters(); i++) {
- globalParamNames[i] = force.getGlobalParameterName(i);
- globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
- }
- gpuSetCustomExternalParameters(data.gpu, particle, params, force.getEnergyFunction(), paramNames, globalParamNames);
- if (globalParamValues.size() > 0)
- SetCustomExternalGlobalParams(globalParamValues);
- data.gpu->forces.push_back(new ForceInfo(force, system.getNumParticles()));
-}
-
-double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
- updateGlobalParams(context);
- kCalculateCustomExternalForces(data.gpu);
- return 0.0;
-}
-
-void CudaCalcCustomExternalForceKernel::updateGlobalParams(ContextImpl& context) {
- bool changed = false;
- for (int i = 0; i < (int) globalParamNames.size(); i++) {
- float value = (float) context.getParameter(globalParamNames[i]);
- if (value != globalParamValues[i])
- changed = true;
- globalParamValues[i] = value;
- }
- if (changed)
- SetCustomExternalGlobalParams(globalParamValues);
-}
-
-void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& context, const CustomExternalForce& force) {
- throw OpenMMException("CudaPlatform does not support copyParametersToContext");
-}
-
-void OPENMMCUDA_EXPORT OpenMM::cudaOpenMMInitializeIntegration(const System& system, CudaPlatform::PlatformData& data, const Integrator& integrator) {
-
- // Initialize any terms that haven't already been handled by a Force.
-
- _gpuContext* gpu = data.gpu;
- if (!data.hasBonds)
- gpuSetBondParameters(gpu, vector(), vector(), vector(), vector());
- if (!data.hasAngles)
- gpuSetBondAngleParameters(gpu, vector(), vector(), vector(), vector(), vector());
- if (!data.hasPeriodicTorsions)
- gpuSetDihedralParameters(gpu, vector(), vector(), vector(), vector(), vector(), vector(), vector());
- if (!data.hasRB)
- gpuSetRbDihedralParameters(gpu, vector(), vector(), vector(), vector(), vector(), vector(),
- vector(), vector(), vector(), vector());
- if (!data.hasNonbonded) {
- gpuSetCoulombParameters(gpu, (float) ONE_4PI_EPS0, vector(), vector(), vector(), vector(), vector(), vector >(), NO_CUTOFF);
- gpuSetLJ14Parameters(gpu, (float) ONE_4PI_EPS0, 1.0f, vector(), vector(), vector(), vector(), vector(), vector());
- if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
- throw OpenMMException("CudaPlatform requires GBSAOBCForce and GBVIForce to be used with a NonbondedForce");
- }
-
- // Set masses.
-
- int numParticles = system.getNumParticles();
- vector mass(numParticles);
- for (int i = 0; i < numParticles; i++)
- mass[i] = (float) system.getParticleMass(i);
- gpuSetMass(gpu, mass);
-
- // Set constraints.
-
- int numConstraints = system.getNumConstraints();
- vector particle1(numConstraints);
- vector particle2(numConstraints);
- vector distance(numConstraints);
- vector invMass1(numConstraints);
- vector invMass2(numConstraints);
- for (int i = 0; i < numConstraints; i++) {
- int particle1Index, particle2Index;
- double constraintDistance;
- system.getConstraintParameters(i, particle1Index, particle2Index, constraintDistance);
- particle1[i] = particle1Index;
- particle2[i] = particle2Index;
- distance[i] = (float) constraintDistance;
- invMass1[i] = 1.0f/mass[particle1Index];
- invMass2[i] = 1.0f/mass[particle2Index];
- }
- gpuSetConstraintParameters(gpu, particle1, particle2, distance, invMass1, invMass2, (float)integrator.getConstraintTolerance());
-
- // Finish initialization.
-
- gpuBuildThreadBlockWorkList(gpu);
- gpuBuildExclusionList(gpu);
- gpuBuildOutputBuffers(gpu);
- gpuSetConstants(gpu);
- if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
- kClearBornSumAndForces(gpu);
- else
- kClearForces(gpu);
- cudaThreadSynchronize();
-}
-
-CudaIntegrateVerletStepKernel::~CudaIntegrateVerletStepKernel() {
-}
-
-void CudaIntegrateVerletStepKernel::initialize(const System& system, const VerletIntegrator& integrator) {
- cudaOpenMMInitializeIntegration(system, data, integrator);
- prevStepSize = -1.0;
-}
-
-void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIntegrator& integrator) {
- _gpuContext* gpu = data.gpu;
- double stepSize = integrator.getStepSize();
- if (stepSize != prevStepSize) {
- // Initialize the GPU parameters.
-
- gpuSetVerletIntegrationParameters(gpu, (float) stepSize, 0.0f);
- gpuSetConstants(gpu);
- prevStepSize = stepSize;
- }
- kVerletUpdatePart1(gpu);
- kApplyShake(gpu);
- kApplySettle(gpu);
- kApplyCCMA(gpu);
- if (data.removeCM)
- if (data.stepCount%data.cmMotionFrequency == 0)
- gpu->bCalculateCM = true;
- kVerletUpdatePart2(gpu);
- data.time += stepSize;
- data.stepCount++;
-}
-
-CudaIntegrateLangevinStepKernel::~CudaIntegrateLangevinStepKernel() {
-}
-
-void CudaIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) {
- cudaOpenMMInitializeIntegration(system, data, integrator);
- _gpuContext* gpu = data.gpu;
- gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
- gpuInitializeRandoms(gpu);
- prevTemp = -1.0;
- prevFriction = -1.0;
- prevStepSize = -1.0;
-}
-
-void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const LangevinIntegrator& integrator) {
- _gpuContext* gpu = data.gpu;
- double temperature = integrator.getTemperature();
- double friction = integrator.getFriction();
- double stepSize = integrator.getStepSize();
- if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
- // Initialize the GPU parameters.
-
- double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
- gpuSetLangevinIntegrationParameters(gpu, (float) tau, (float) stepSize, (float) temperature, 0.0f);
- gpuSetConstants(gpu);
- kGenerateRandoms(gpu);
- prevTemp = temperature;
- prevFriction = friction;
- prevStepSize = stepSize;
- }
- kLangevinUpdatePart1(gpu);
- if (data.removeCM)
- if (data.stepCount%data.cmMotionFrequency == 0)
- gpu->bCalculateCM = true;
- kLangevinUpdatePart2(gpu);
- kApplyShake(gpu);
- kApplySettle(gpu);
- kApplyCCMA(gpu);
- kSetVelocitiesFromPositions(gpu);
- data.time += stepSize;
- data.stepCount++;
-}
-
-CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() {
-}
-
-void CudaIntegrateBrownianStepKernel::initialize(const System& system, const BrownianIntegrator& integrator) {
- cudaOpenMMInitializeIntegration(system, data, integrator);
- _gpuContext* gpu = data.gpu;
- gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
- gpuInitializeRandoms(gpu);
- prevTemp = -1.0;
- prevFriction = -1.0;
- prevStepSize = -1.0;
-}
-
-void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const BrownianIntegrator& integrator) {
- _gpuContext* gpu = data.gpu;
- double temperature = integrator.getTemperature();
- double friction = integrator.getFriction();
- double stepSize = integrator.getStepSize();
- if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
- // Initialize the GPU parameters.
-
- double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
- gpuSetBrownianIntegrationParameters(gpu, (float) tau, (float) stepSize, (float) temperature);
- gpuSetConstants(gpu);
- kGenerateRandoms(gpu);
- prevTemp = temperature;
- prevFriction = friction;
- prevStepSize = stepSize;
- }
- kBrownianUpdatePart1(gpu);
- kApplyShake(gpu);
- kApplySettle(gpu);
- kApplyCCMA(gpu);
- if (data.removeCM)
- if (data.stepCount%data.cmMotionFrequency == 0)
- gpu->bCalculateCM = true;
- kBrownianUpdatePart2(gpu);
- data.time += stepSize;
- data.stepCount++;
-}
-
-CudaIntegrateVariableVerletStepKernel::~CudaIntegrateVariableVerletStepKernel() {
-}
-
-void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, const VariableVerletIntegrator& integrator) {
- cudaOpenMMInitializeIntegration(system, data, integrator);
- prevErrorTol = -1.0;
-}
-
-double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime) {
- _gpuContext* gpu = data.gpu;
- double errorTol = integrator.getErrorTolerance();
- if (errorTol != prevErrorTol) {
- // Initialize the GPU parameters.
-
- gpuSetVerletIntegrationParameters(gpu, 0.0f, (float) errorTol);
- gpuSetConstants(gpu);
- prevErrorTol = errorTol;
- }
- float maxStepSize = (float)(maxTime-data.time);
- kSelectVerletStepSize(gpu, maxStepSize);
- kVerletUpdatePart1(gpu);
- kApplyShake(gpu);
- kApplySettle(gpu);
- kApplyCCMA(gpu);
- if (data.removeCM)
- if (data.stepCount%data.cmMotionFrequency == 0)
- gpu->bCalculateCM = true;
- kVerletUpdatePart2(gpu);
- gpu->psStepSize->Download();
- data.time += (*gpu->psStepSize)[0].y;
- if ((*gpu->psStepSize)[0].y == maxStepSize)
- data.time = maxTime; // Avoid round-off error
- data.stepCount++;
- return (*gpu->psStepSize)[0].y;
-}
-
-CudaIntegrateVariableLangevinStepKernel::~CudaIntegrateVariableLangevinStepKernel() {
-}
-
-void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) {
- cudaOpenMMInitializeIntegration(system, data, integrator);
- _gpuContext* gpu = data.gpu;
- gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
- gpuInitializeRandoms(gpu);
- prevTemp = -1.0;
- prevFriction = -1.0;
- prevErrorTol = -1.0;
-}
-
-double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
- _gpuContext* gpu = data.gpu;
- double temperature = integrator.getTemperature();
- double friction = integrator.getFriction();
- double errorTol = integrator.getErrorTolerance();
- if (temperature != prevTemp || friction != prevFriction || errorTol != prevErrorTol) {
- // Initialize the GPU parameters.
-
- double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
- gpuSetLangevinIntegrationParameters(gpu, (float) tau, 0.0f, (float) temperature, (float) errorTol);
- gpuSetConstants(gpu);
- kGenerateRandoms(gpu);
- prevTemp = temperature;
- prevFriction = friction;
- prevErrorTol = errorTol;
- }
- float maxStepSize = (float)(maxTime-data.time);
- kSelectLangevinStepSize(gpu, maxStepSize);
- kLangevinUpdatePart1(gpu);
- if (data.removeCM)
- if (data.stepCount%data.cmMotionFrequency == 0)
- gpu->bCalculateCM = true;
- kLangevinUpdatePart2(gpu);
- kApplyShake(gpu);
- kApplySettle(gpu);
- kApplyCCMA(gpu);
- kSetVelocitiesFromPositions(gpu);
- gpu->psStepSize->Download();
- data.time += (*gpu->psStepSize)[0].y;
- if ((*gpu->psStepSize)[0].y == maxStepSize)
- data.time = maxTime; // Avoid round-off error
- data.stepCount++;
- return (*gpu->psStepSize)[0].y;
-}
-
-CudaApplyAndersenThermostatKernel::~CudaApplyAndersenThermostatKernel() {
- if (atomGroups != NULL)
- delete atomGroups;
-}
-
-void CudaApplyAndersenThermostatKernel::initialize(const System& system, const AndersenThermostat& thermostat) {
- _gpuContext* gpu = data.gpu;
- gpu->seed = (unsigned long) thermostat.getRandomNumberSeed();
- gpuInitializeRandoms(gpu);
- prevTemp = -1.0;
- prevFrequency = -1.0;
- prevStepSize = -1.0;
-
- // Create the arrays with the group definitions.
-
- vector > groups = AndersenThermostatImpl::calcParticleGroups(system);
- atomGroups = new CUDAStream(system.getNumParticles(), 1, "atomGroups");
- for (int i = 0; i < (int) groups.size(); i++) {
- for (int j = 0; j < (int) groups[i].size(); j++)
- (*atomGroups)[groups[i][j]] = i;
- }
- atomGroups->Upload();
-}
-
-void CudaApplyAndersenThermostatKernel::execute(ContextImpl& context) {
- _gpuContext* gpu = data.gpu;
- double temperature = context.getParameter(AndersenThermostat::Temperature());
- double frequency = context.getParameter(AndersenThermostat::CollisionFrequency());
- double stepSize = context.getIntegrator().getStepSize();
- if (temperature != prevTemp || frequency != prevFrequency || stepSize != prevStepSize) {
- // Initialize the GPU parameters.
-
- gpuSetAndersenThermostatParameters(gpu, (float) temperature, (float) frequency);
- gpuSetConstants(gpu);
- kGenerateRandoms(gpu);
- prevTemp = temperature;
- prevFrequency = frequency;
- prevStepSize = stepSize;
- }
- kCalculateAndersenThermostat(gpu, *atomGroups);
-}
-
-CudaApplyMonteCarloBarostatKernel::~CudaApplyMonteCarloBarostatKernel() {
- if (moleculeAtoms != NULL)
- delete moleculeAtoms;
- if (moleculeStartIndex != NULL)
- delete moleculeStartIndex;
-}
-
-void CudaApplyMonteCarloBarostatKernel::initialize(const System& system, const MonteCarloBarostat& thermostat) {
-}
-
-void CudaApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, double scale) {
- if (!hasInitializedMolecules) {
- hasInitializedMolecules = true;
-
- // Create the arrays with the molecule definitions.
-
- vector > molecules = context.getMolecules();
- numMolecules = molecules.size();
- moleculeAtoms = new CUDAStream(context.getSystem().getNumParticles(), 1, "moleculeAtoms");
- moleculeStartIndex = new CUDAStream(numMolecules+1, 1, "moleculeStartIndex");
- int index = 0;
- for (int i = 0; i < numMolecules; i++) {
- (*moleculeStartIndex)[i] = index;
- for (int j = 0; j < (int) molecules[i].size(); j++)
- (*moleculeAtoms)[index++] = molecules[i][j];
- }
- (*moleculeStartIndex)[numMolecules] = index;
- moleculeAtoms->Upload();
- moleculeStartIndex->Upload();
- }
- _gpuContext* gpu = data.gpu;
- gpu->psPosqP4->CopyFrom(*gpu->psPosq4);
- kScaleAtomCoordinates(gpu, scale, *moleculeAtoms, *moleculeStartIndex);
- for (int i = 0; i < (int) gpu->posCellOffsets.size(); i++)
- gpu->posCellOffsets[i] = make_int3(0, 0, 0);
-}
-
-void CudaApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
- _gpuContext* gpu = data.gpu;
- gpu->psPosq4->CopyFrom(*gpu->psPosqP4);
-}
-
-void CudaCalcKineticEnergyKernel::initialize(const System& system) {
- int numParticles = system.getNumParticles();
- masses.resize(numParticles);
- for (int i = 0; i < numParticles; ++i)
- masses[i] = system.getParticleMass(i);
-}
-
-double CudaCalcKineticEnergyKernel::execute(ContextImpl& context) {
- // We don't currently have a GPU kernel to do this, so we retrieve the velocities and calculate the energy
- // on the CPU.
-
- _gpuContext* gpu = data.gpu;
- gpu->psVelm4->Download();
- double energy = 0.0;
- for (int i = 0; i < (int) masses.size(); ++i) {
- float4 v = (*gpu->psVelm4)[i];
- energy += masses[i]*(v.x*v.x+v.y*v.y+v.z*v.z);
- }
- return 0.5*energy;
-}
-
-void CudaRemoveCMMotionKernel::initialize(const System& system, const CMMotionRemover& force) {
- data.removeCM = true;
- data.cmMotionFrequency = force.getFrequency();
-}
-
-void CudaRemoveCMMotionKernel::execute(ContextImpl& context) {
-}
diff --git a/platforms/cuda-old/src/CudaKernels.h b/platforms/cuda-old/src/CudaKernels.h
deleted file mode 100644
index 4f414dcad793d1fab429c50cdb07e1fe61196f91..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/src/CudaKernels.h
+++ /dev/null
@@ -1,982 +0,0 @@
-#ifndef OPENMM_CUDAKERNELS_H_
-#define OPENMM_CUDAKERNELS_H_
-
-/* -------------------------------------------------------------------------- *
- * OpenMM *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from *
- * Simbios, the NIH National Center for Physics-Based Simulation of *
- * Biological Structures at Stanford, funded under the NIH Roadmap for *
- * Medical Research, grant U54 GM072970. See https://simtk.org. *
- * *
- * Portions copyright (c) 2008-2012 Stanford University and the Authors. *
- * Authors: Peter Eastman *
- * Contributors: *
- * *
- * This program is free software: you can redistribute it and/or modify *
- * it under the terms of the GNU Lesser General Public License as published *
- * by the Free Software Foundation, either version 3 of the License, or *
- * (at your option) any later version. *
- * *
- * This program is distributed in the hope that it will be useful, *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
- * GNU Lesser General Public License for more details. *
- * *
- * You should have received a copy of the GNU Lesser General Public License *
- * along with this program. If not, see . *
- * -------------------------------------------------------------------------- */
-
-#include "CudaPlatform.h"
-#include "openmm/kernels.h"
-#include "kernels/gputypes.h"
-#include "openmm/System.h"
-
-class CudaAndersenThermostat;
-class CudaBrownianDynamics;
-class CudaStochasticDynamics;
-class CudaShakeAlgorithm;
-class CudaVerletDynamics;
-
-namespace OpenMM {
-
-// Export internal cudaOpenMMInitializeIntegration() method so it can be used by NML plugin
-void OPENMMCUDA_EXPORT cudaOpenMMInitializeIntegration(const System& system, CudaPlatform::PlatformData& data, const Integrator& integrator);
-
-/**
- * This kernel is invoked at the beginning and end of force and energy computations. It gives the
- * Platform a chance to clear buffers and do other initialization at the beginning, and to do any
- * necessary work at the end to determine the final results.
- */
-class CudaCalcForcesAndEnergyKernel : public CalcForcesAndEnergyKernel {
-public:
- CudaCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : CalcForcesAndEnergyKernel(name, platform), data(data) {
- }
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- */
- void initialize(const System& system);
- /**
- * This is called at the beginning of each force/energy computation, before calcForcesAndEnergy() has been called on
- * any ForceImpl.
- *
- * @param context the context in which to execute this kernel
- * @param includeForce true if forces should be computed
- * @param includeEnergy true if potential energy should be computed
- * @param groups a set of bit flags for which force groups to include
- */
- void beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups);
- /**
- * This is called at the end of each force/energy computation, after calcForcesAndEnergy() has been called on
- * every ForceImpl.
- *
- * @param context the context in which to execute this kernel
- * @param includeForce true if forces should be computed
- * @param includeEnergy true if potential energy should be computed
- * @param groups a set of bit flags for which force groups to include
- * @return the potential energy of the system. This value is added to all values returned by ForceImpls'
- * calcForcesAndEnergy() methods. That is, each force kernel may either return its contribution to the
- * energy directly, or add it to an internal buffer so that it will be included here.
- */
- double finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups);
-private:
- CudaPlatform::PlatformData& data;
-};
-
-/**
- * This kernel provides methods for setting and retrieving various state data: time, positions,
- * velocities, and forces.
- */
-class CudaUpdateStateDataKernel : public UpdateStateDataKernel {
-public:
- CudaUpdateStateDataKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : UpdateStateDataKernel(name, platform), data(data) {
- }
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- */
- void initialize(const System& system);
- /**
- * Get the current time (in picoseconds).
- *
- * @param context the context in which to execute this kernel
- */
- double getTime(const ContextImpl& context) const;
- /**
- * Set the current time (in picoseconds).
- *
- * @param context the context in which to execute this kernel
- */
- void setTime(ContextImpl& context, double time);
- /**
- * Get the positions of all particles.
- *
- * @param positions on exit, this contains the particle positions
- */
- void getPositions(ContextImpl& context, std::vector& positions);
- /**
- * Set the positions of all particles.
- *
- * @param positions a vector containg the particle positions
- */
- void setPositions(ContextImpl& context, const std::vector& positions);
- /**
- * Get the velocities of all particles.
- *
- * @param velocities on exit, this contains the particle velocities
- */
- void getVelocities(ContextImpl& context, std::vector& velocities);
- /**
- * Set the velocities of all particles.
- *
- * @param velocities a vector containg the particle velocities
- */
- void setVelocities(ContextImpl& context, const std::vector& velocities);
- /**
- * Get the current forces on all particles.
- *
- * @param forces on exit, this contains the forces
- */
- void getForces(ContextImpl& context, std::vector& forces);
- /**
- * Get the current periodic box vectors.
- *
- * @param a on exit, this contains the vector defining the first edge of the periodic box
- * @param b on exit, this contains the vector defining the second edge of the periodic box
- * @param c on exit, this contains the vector defining the third edge of the periodic box
- */
- void getPeriodicBoxVectors(ContextImpl& context, Vec3& a, Vec3& b, Vec3& c) const;
- /**
- * Set the current periodic box vectors.
- *
- * @param a the vector defining the first edge of the periodic box
- * @param b the vector defining the second edge of the periodic box
- * @param c the vector defining the third edge of the periodic box
- */
- void setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) const;
- /**
- * Create a checkpoint recording the current state of the Context.
- *
- * @param stream an output stream the checkpoint data should be written to
- */
- void createCheckpoint(ContextImpl& context, std::ostream& stream);
- /**
- * Load a checkpoint that was written by createCheckpoint().
- *
- * @param stream an input stream the checkpoint data should be read from
- */
- void loadCheckpoint(ContextImpl& context, std::istream& stream);
-private:
- CudaPlatform::PlatformData& data;
-};
-
-/**
- * This kernel modifies the positions of particles to enforce distance constraints.
- */
-class CudaApplyConstraintsKernel : public ApplyConstraintsKernel {
-public:
- CudaApplyConstraintsKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : ApplyConstraintsKernel(name, platform), data(data) {
- }
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- */
- void initialize(const System& system);
- /**
- * Update particle positions to enforce constraints.
- *
- * @param context the context in which to execute this kernel
- * @param tol the distance tolerance within which constraints must be satisfied.
- */
- void apply(ContextImpl& context, double tol);
-private:
- CudaPlatform::PlatformData& data;
-};
-
-/**
- * This kernel recomputes the positions of virtual sites.
- */
-class CudaVirtualSitesKernel : public VirtualSitesKernel {
-public:
- CudaVirtualSitesKernel(std::string name, const Platform& platform) : VirtualSitesKernel(name, platform) {
- }
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- */
- void initialize(const System& system);
- /**
- * Compute the virtual site locations.
- *
- * @param context the context in which to execute this kernel
- */
- void computePositions(ContextImpl& context);
-};
-
-/**
- * This kernel is invoked by HarmonicBondForce to calculate the forces acting on the system and the energy of the system.
- */
-class CudaCalcHarmonicBondForceKernel : public CalcHarmonicBondForceKernel {
-public:
- CudaCalcHarmonicBondForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) : CalcHarmonicBondForceKernel(name, platform), data(data), system(system) {
- }
- ~CudaCalcHarmonicBondForceKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param force the HarmonicBondForce this kernel will be used for
- */
- void initialize(const System& system, const HarmonicBondForce& force);
- /**
- * Execute the kernel to calculate the forces and/or energy.
- *
- * @param context the context in which to execute this kernel
- * @param includeForces true if forces should be calculated
- * @param includeEnergy true if the energy should be calculated
- * @return the potential energy due to the force
- */
- double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
- /**
- * Copy changed parameters over to a context.
- *
- * @param context the context to copy parameters to
- * @param force the HarmonicBondForce to copy the parameters from
- */
- void copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force);
-private:
- class ForceInfo;
- int numBonds;
- CudaPlatform::PlatformData& data;
- System& system;
-};
-
-/**
- * This kernel is invoked by CustomBondForce to calculate the forces acting on the system and the energy of the system.
- */
-class CudaCalcCustomBondForceKernel : public CalcCustomBondForceKernel {
-public:
- CudaCalcCustomBondForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) : CalcCustomBondForceKernel(name, platform),
- data(data), system(system) {
- }
- ~CudaCalcCustomBondForceKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param force the CustomBondForce this kernel will be used for
- */
- void initialize(const System& system, const CustomBondForce& force);
- /**
- * Execute the kernel to calculate the forces and/or energy.
- *
- * @param context the context in which to execute this kernel
- * @param includeForces true if forces should be calculated
- * @param includeEnergy true if the energy should be calculated
- * @return the potential energy due to the force
- */
- double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
- /**
- * Copy changed parameters over to a context.
- *
- * @param context the context to copy parameters to
- * @param force the CustomBondForce to copy the parameters from
- */
- void copyParametersToContext(ContextImpl& context, const CustomBondForce& force);
-private:
- class ForceInfo;
- void updateGlobalParams(ContextImpl& context);
- int numBonds;
- CudaPlatform::PlatformData& data;
- std::vector globalParamNames;
- std::vector globalParamValues;
- System& system;
-};
-
-/**
- * This kernel is invoked by HarmonicAngleForce to calculate the forces acting on the system and the energy of the system.
- */
-class CudaCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel {
-public:
- CudaCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) : CalcHarmonicAngleForceKernel(name, platform), data(data), system(system) {
- }
- ~CudaCalcHarmonicAngleForceKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param force the HarmonicAngleForce this kernel will be used for
- */
- void initialize(const System& system, const HarmonicAngleForce& force);
- /**
- * Execute the kernel to calculate the forces and/or energy.
- *
- * @param context the context in which to execute this kernel
- * @param includeForces true if forces should be calculated
- * @param includeEnergy true if the energy should be calculated
- * @return the potential energy due to the force
- */
- double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
- /**
- * Copy changed parameters over to a context.
- *
- * @param context the context to copy parameters to
- * @param force the HarmonicAngleForce to copy the parameters from
- */
- void copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force);
-private:
- class ForceInfo;
- int numAngles;
- CudaPlatform::PlatformData& data;
- System& system;
-};
-
-/**
- * This kernel is invoked by CustomAngleForce to calculate the forces acting on the system and the energy of the system.
- */
-class CudaCalcCustomAngleForceKernel : public CalcCustomAngleForceKernel {
-public:
- CudaCalcCustomAngleForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) : CalcCustomAngleForceKernel(name, platform),
- data(data), system(system) {
- }
- ~CudaCalcCustomAngleForceKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param force the CustomAngleForce this kernel will be used for
- */
- void initialize(const System& system, const CustomAngleForce& force);
- /**
- * Execute the kernel to calculate the forces and/or energy.
- *
- * @param context the context in which to execute this kernel
- * @param includeForces true if forces should be calculated
- * @param includeEnergy true if the energy should be calculated
- * @return the potential energy due to the force
- */
- double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
- /**
- * Copy changed parameters over to a context.
- *
- * @param context the context to copy parameters to
- * @param force the CustomAngleForce to copy the parameters from
- */
- void copyParametersToContext(ContextImpl& context, const CustomAngleForce& force);
-private:
- class ForceInfo;
- void updateGlobalParams(ContextImpl& context);
- int numAngles;
- CudaPlatform::PlatformData& data;
- std::vector globalParamNames;
- std::vector globalParamValues;
- System& system;
-};
-
-/**
- * This kernel is invoked by PeriodicTorsionForce to calculate the forces acting on the system and the energy of the system.
- */
-class CudaCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel {
-public:
- CudaCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) : CalcPeriodicTorsionForceKernel(name, platform), data(data), system(system) {
- }
- ~CudaCalcPeriodicTorsionForceKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param force the PeriodicTorsionForce this kernel will be used for
- */
- void initialize(const System& system, const PeriodicTorsionForce& force);
- /**
- * Execute the kernel to calculate the forces and/or energy.
- *
- * @param context the context in which to execute this kernel
- * @param includeForces true if forces should be calculated
- * @param includeEnergy true if the energy should be calculated
- * @return the potential energy due to the force
- */
- double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
- /**
- * Copy changed parameters over to a context.
- *
- * @param context the context to copy parameters to
- * @param force the PeriodicTorsionForce to copy the parameters from
- */
- void copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force);
-private:
- class ForceInfo;
- int numTorsions;
- CudaPlatform::PlatformData& data;
- System& system;
-};
-
-/**
- * This kernel is invoked by RBTorsionForce to calculate the forces acting on the system and the energy of the system.
- */
-class CudaCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel {
-public:
- CudaCalcRBTorsionForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) : CalcRBTorsionForceKernel(name, platform), data(data), system(system) {
- }
- ~CudaCalcRBTorsionForceKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param force the RBTorsionForce this kernel will be used for
- */
- void initialize(const System& system, const RBTorsionForce& force);
- /**
- * Execute the kernel to calculate the forces and/or energy.
- *
- * @param context the context in which to execute this kernel
- * @param includeForces true if forces should be calculated
- * @param includeEnergy true if the energy should be calculated
- * @return the potential energy due to the force
- */
- double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
- /**
- * Copy changed parameters over to a context.
- *
- * @param context the context to copy parameters to
- * @param force the RBTorsionForce to copy the parameters from
- */
- void copyParametersToContext(ContextImpl& context, const RBTorsionForce& force);
-private:
- class ForceInfo;
- int numTorsions;
- CudaPlatform::PlatformData& data;
- System& system;
-};
-
-/**
- * This kernel is invoked by CMAPTorsionForce to calculate the forces acting on the system and the energy of the system.
- */
-class CudaCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel {
-public:
- CudaCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
- CalcCMAPTorsionForceKernel(name, platform), data(data), system(system), coefficients(NULL), mapPositions(NULL),
- torsionIndices(NULL), torsionMaps(NULL) {
- }
- ~CudaCalcCMAPTorsionForceKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param force the CMAPTorsionForce this kernel will be used for
- */
- void initialize(const System& system, const CMAPTorsionForce& force);
- /**
- * Execute the kernel to calculate the forces and/or energy.
- *
- * @param context the context in which to execute this kernel
- * @param includeForces true if forces should be calculated
- * @param includeEnergy true if the energy should be calculated
- * @return the potential energy due to the force
- */
- double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
-private:
- class ForceInfo;
- CudaPlatform::PlatformData& data;
- System& system;
- int numTorsions;
- CUDAStream* coefficients;
- CUDAStream* mapPositions;
- CUDAStream* torsionIndices;
- CUDAStream* torsionMaps;
-};
-
-/**
- * This kernel is invoked by CustomTorsionForce to calculate the forces acting on the system and the energy of the system.
- */
-class CudaCalcCustomTorsionForceKernel : public CalcCustomTorsionForceKernel {
-public:
- CudaCalcCustomTorsionForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) : CalcCustomTorsionForceKernel(name, platform),
- data(data), system(system) {
- }
- ~CudaCalcCustomTorsionForceKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param force the CustomTorsionForce this kernel will be used for
- */
- void initialize(const System& system, const CustomTorsionForce& force);
- /**
- * Execute the kernel to calculate the forces and/or energy.
- *
- * @param context the context in which to execute this kernel
- * @param includeForces true if forces should be calculated
- * @param includeEnergy true if the energy should be calculated
- * @return the potential energy due to the force
- */
- double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
- /**
- * Copy changed parameters over to a context.
- *
- * @param context the context to copy parameters to
- * @param force the CustomTorsionForce to copy the parameters from
- */
- void copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force);
-private:
- class ForceInfo;
- void updateGlobalParams(ContextImpl& context);
- int numTorsions;
- CudaPlatform::PlatformData& data;
- std::vector globalParamNames;
- std::vector globalParamValues;
- System& system;
-};
-
-/**
- * This kernel is invoked by NonbondedForce to calculate the forces acting on the system.
- */
-class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
-public:
- CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) : CalcNonbondedForceKernel(name, platform), data(data), system(system) {
- }
- ~CudaCalcNonbondedForceKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param force the NonbondedForce this kernel will be used for
- */
- void initialize(const System& system, const NonbondedForce& force);
- /**
- * Execute the kernel to calculate the forces and/or energy.
- *
- * @param context the context in which to execute this kernel
- * @param includeForces true if forces should be calculated
- * @param includeEnergy true if the energy should be calculated
- * @param includeReciprocal true if reciprocal space interactions should be included
- * @return the potential energy due to the force
- */
- double execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal);
- /**
- * Copy changed parameters over to a context.
- *
- * @param context the context to copy parameters to
- * @param force the NonbondedForce to copy the parameters from
- */
- void copyParametersToContext(ContextImpl& context, const NonbondedForce& force);
-private:
- class ForceInfo;
- CudaPlatform::PlatformData& data;
- int numParticles;
- System& system;
-};
-
-/**
- * This kernel is invoked by CustomNonbondedForce to calculate the forces acting on the system.
- */
-class CudaCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel {
-public:
- CudaCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) : CalcCustomNonbondedForceKernel(name, platform), data(data), system(system) {
- }
- ~CudaCalcCustomNonbondedForceKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param force the CustomNonbondedForce this kernel will be used for
- */
- void initialize(const System& system, const CustomNonbondedForce& force);
- /**
- * Execute the kernel to calculate the forces and/or energy.
- *
- * @param context the context in which to execute this kernel
- * @param includeForces true if forces should be calculated
- * @param includeEnergy true if the energy should be calculated
- * @return the potential energy due to the force
- */
- double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
- /**
- * Copy changed parameters over to a context.
- *
- * @param context the context to copy parameters to
- * @param force the CustomNonbondedForce to copy the parameters from
- */
- void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
-private:
- class ForceInfo;
- void updateGlobalParams(ContextImpl& context);
- CudaPlatform::PlatformData& data;
- int numParticles;
- std::vector globalParamNames;
- std::vector globalParamValues;
- System& system;
-};
-
-/**
- * This kernel is invoked by GBSAOBCForce to calculate the forces acting on the system.
- */
-class CudaCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel {
-public:
- CudaCalcGBSAOBCForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : CalcGBSAOBCForceKernel(name, platform), data(data) {
- }
- ~CudaCalcGBSAOBCForceKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param force the GBSAOBCForce this kernel will be used for
- */
- void initialize(const System& system, const GBSAOBCForce& force);
- /**
- * Execute the kernel to calculate the forces and/or energy.
- *
- * @param context the context in which to execute this kernel
- * @param includeForces true if forces should be calculated
- * @param includeEnergy true if the energy should be calculated
- * @return the potential energy due to the force
- */
- double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
- /**
- * Copy changed parameters over to a context.
- *
- * @param context the context to copy parameters to
- * @param force the GBSAOBCForce to copy the parameters from
- */
- void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
-private:
- class ForceInfo;
- CudaPlatform::PlatformData& data;
-};
-
-/**
- * This kernel is invoked by GBVIForce to calculate the forces acting on the system.
- */
-class CudaCalcGBVIForceKernel : public CalcGBVIForceKernel {
-public:
- CudaCalcGBVIForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : CalcGBVIForceKernel(name, platform), data(data) {
- }
- ~CudaCalcGBVIForceKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param force the GBVIForce this kernel will be used for
- * @param scaledRadii the scaled radii (Eq. 5 of Labute paper)
- */
- void initialize(const System& system, const GBVIForce& force, const std::vector & scaledRadii);
- /**
- * Execute the kernel to calculate the forces and/or energy.
- *
- * @param context the context in which to execute this kernel
- * @param includeForces true if forces should be calculated
- * @param includeEnergy true if the energy should be calculated
- * @return the potential energy due to the force
- */
- double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
-private:
- class ForceInfo;
- CudaPlatform::PlatformData& data;
-};
-
-/**
- * This kernel is invoked by CustomExternalForce to calculate the forces acting on the system and the energy of the system.
- */
-class CudaCalcCustomExternalForceKernel : public CalcCustomExternalForceKernel {
-public:
- CudaCalcCustomExternalForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) : CalcCustomExternalForceKernel(name, platform),
- data(data), system(system) {
- }
- ~CudaCalcCustomExternalForceKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param force the CustomExternalForce this kernel will be used for
- */
- void initialize(const System& system, const CustomExternalForce& force);
- /**
- * Execute the kernel to calculate the forces and/or energy.
- *
- * @param context the context in which to execute this kernel
- * @param includeForces true if forces should be calculated
- * @param includeEnergy true if the energy should be calculated
- * @return the potential energy due to the force
- */
- double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
- /**
- * Copy changed parameters over to a context.
- *
- * @param context the context to copy parameters to
- * @param force the CustomNonbondedForce to copy the parameters from
- */
- void copyParametersToContext(ContextImpl& context, const CustomExternalForce& force);
-private:
- class ForceInfo;
- void updateGlobalParams(ContextImpl& context);
- int numParticles;
- CudaPlatform::PlatformData& data;
- std::vector globalParamNames;
- std::vector globalParamValues;
- System& system;
-};
-
-/**
- * This kernel is invoked by VerletIntegrator to take one time step.
- */
-class CudaIntegrateVerletStepKernel : public IntegrateVerletStepKernel {
-public:
- CudaIntegrateVerletStepKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : IntegrateVerletStepKernel(name, platform), data(data) {
- }
- ~CudaIntegrateVerletStepKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param integrator the VerletIntegrator this kernel will be used for
- */
- void initialize(const System& system, const VerletIntegrator& integrator);
- /**
- * Execute the kernel.
- *
- * @param context the context in which to execute this kernel
- * @param integrator the VerletIntegrator this kernel is being used for
- */
- void execute(ContextImpl& context, const VerletIntegrator& integrator);
-private:
- CudaPlatform::PlatformData& data;
- double prevStepSize;
-};
-
-/**
- * This kernel is invoked by LangevinIntegrator to take one time step.
- */
-class CudaIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel {
-public:
- CudaIntegrateLangevinStepKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : IntegrateLangevinStepKernel(name, platform), data(data) {
- }
- ~CudaIntegrateLangevinStepKernel();
- /**
- * Initialize the kernel, setting up the particle masses.
- *
- * @param system the System this kernel will be applied to
- * @param integrator the LangevinIntegrator this kernel will be used for
- */
- void initialize(const System& system, const LangevinIntegrator& integrator);
- /**
- * Execute the kernel.
- *
- * @param context the context in which to execute this kernel
- * @param integrator the LangevinIntegrator this kernel is being used for
- */
- void execute(ContextImpl& context, const LangevinIntegrator& integrator);
-private:
- CudaPlatform::PlatformData& data;
- double prevTemp, prevFriction, prevStepSize;
-};
-
-/**
- * This kernel is invoked by BrownianIntegrator to take one time step.
- */
-class CudaIntegrateBrownianStepKernel : public IntegrateBrownianStepKernel {
-public:
- CudaIntegrateBrownianStepKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : IntegrateBrownianStepKernel(name, platform), data(data) {
- }
- ~CudaIntegrateBrownianStepKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param integrator the BrownianIntegrator this kernel will be used for
- */
- void initialize(const System& system, const BrownianIntegrator& integrator);
- /**
- * Execute the kernel.
- *
- * @param context the context in which to execute this kernel
- * @param integrator the BrownianIntegrator this kernel is being used for
- */
- void execute(ContextImpl& context, const BrownianIntegrator& integrator);
-private:
- CudaPlatform::PlatformData& data;
- double prevTemp, prevFriction, prevStepSize;
-};
-
-/**
- * This kernel is invoked by VariableVerletIntegrator to take one time step.
- */
-class CudaIntegrateVariableVerletStepKernel : public IntegrateVariableVerletStepKernel {
-public:
- CudaIntegrateVariableVerletStepKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : IntegrateVariableVerletStepKernel(name, platform), data(data) {
- }
- ~CudaIntegrateVariableVerletStepKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param integrator the VerletIntegrator this kernel will be used for
- */
- void initialize(const System& system, const VariableVerletIntegrator& integrator);
- /**
- * Execute the kernel.
- *
- * @param context the context in which to execute this kernel
- * @param integrator the VerletIntegrator this kernel is being used for
- * @param maxTime the maximum time beyond which the simulation should not be advanced
- * @return the size of the step that was taken
- */
- double execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime);
-private:
- CudaPlatform::PlatformData& data;
- double prevErrorTol;
-};
-
-/**
- * This kernel is invoked by VariableLangevinIntegrator to take one time step.
- */
-class CudaIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel {
-public:
- CudaIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : IntegrateVariableLangevinStepKernel(name, platform), data(data) {
- }
- ~CudaIntegrateVariableLangevinStepKernel();
- /**
- * Initialize the kernel, setting up the particle masses.
- *
- * @param system the System this kernel will be applied to
- * @param integrator the VariableLangevinIntegrator this kernel will be used for
- */
- void initialize(const System& system, const VariableLangevinIntegrator& integrator);
- /**
- * Execute the kernel.
- *
- * @param context the context in which to execute this kernel
- * @param integrator the VariableLangevinIntegrator this kernel is being used for
- * @param maxTime the maximum time beyond which the simulation should not be advanced
- * @return the size of the step that was taken
- */
- double execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime);
-private:
- CudaPlatform::PlatformData& data;
- double prevTemp, prevFriction, prevErrorTol;
-};
-
-/**
- * This kernel is invoked by AndersenThermostat at the start of each time step to adjust the particle velocities.
- */
-class CudaApplyAndersenThermostatKernel : public ApplyAndersenThermostatKernel {
-public:
- CudaApplyAndersenThermostatKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : ApplyAndersenThermostatKernel(name, platform),
- data(data), atomGroups(NULL) {
- }
- ~CudaApplyAndersenThermostatKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param thermostat the AndersenThermostat this kernel will be used for
- */
- void initialize(const System& system, const AndersenThermostat& thermostat);
- /**
- * Execute the kernel.
- *
- * @param context the context in which to execute this kernel
- */
- void execute(ContextImpl& context);
-private:
- CudaPlatform::PlatformData& data;
- double prevTemp, prevFrequency, prevStepSize;
- CUDAStream* atomGroups;
-};
-
-/**
- * This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
- */
-class CudaApplyMonteCarloBarostatKernel : public ApplyMonteCarloBarostatKernel {
-public:
- CudaApplyMonteCarloBarostatKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : ApplyMonteCarloBarostatKernel(name, platform), data(data),
- hasInitializedMolecules(false), moleculeAtoms(NULL), moleculeStartIndex(NULL) {
- }
- ~CudaApplyMonteCarloBarostatKernel();
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- * @param barostat the MonteCarloBarostat this kernel will be used for
- */
- void initialize(const System& system, const MonteCarloBarostat& barostat);
- /**
- * Attempt a Monte Carlo step, scaling particle positions (or cluster centers) by a specified value.
- * This is called BEFORE the periodic box size is modified. It should begin by translating each particle
- * or cluster into the first periodic box, so that coordinates will still be correct after the box size
- * is changed.
- *
- * @param context the context in which to execute this kernel
- * @param scale the scale factor by which to multiply particle positions
- */
- void scaleCoordinates(ContextImpl& context, double scale);
- /**
- * Reject the most recent Monte Carlo step, restoring the particle positions to where they were before
- * scaleCoordinates() was last called.
- *
- * @param context the context in which to execute this kernel
- */
- void restoreCoordinates(ContextImpl& context);
-private:
- CudaPlatform::PlatformData& data;
- bool hasInitializedMolecules;
- int numMolecules;
- CUDAStream* moleculeAtoms;
- CUDAStream* moleculeStartIndex;
-};
-
-/**
- * This kernel is invoked to calculate the kinetic energy of the system.
- */
-class CudaCalcKineticEnergyKernel : public CalcKineticEnergyKernel {
-public:
- CudaCalcKineticEnergyKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : CalcKineticEnergyKernel(name, platform), data(data) {
- }
- /**
- * Initialize the kernel.
- *
- * @param system the System this kernel will be applied to
- */
- void initialize(const System& system);
- /**
- * Execute the kernel.
- *
- * @param context the context in which to execute this kernel
- */
- double execute(ContextImpl& context);
-private:
- CudaPlatform::PlatformData& data;
- std::vector masses;
-};
-
-/**
- * This kernel is invoked to remove center of mass motion from the system.
- */
-class CudaRemoveCMMotionKernel : public RemoveCMMotionKernel {
-public:
- CudaRemoveCMMotionKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : RemoveCMMotionKernel(name, platform), data(data) {
- }
- /**
- * Initialize the kernel, setting up the particle masses.
- *
- * @param system the System this kernel will be applied to
- * @param force the CMMotionRemover this kernel will be used for
- */
- void initialize(const System& system, const CMMotionRemover& force);
- /**
- * Execute the kernel.
- *
- * @param context the context in which to execute this kernel
- */
- void execute(ContextImpl& context);
-private:
- CudaPlatform::PlatformData& data;
-};
-
-} // namespace OpenMM
-
-#endif /*OPENMM_CUDAKERNELS_H_*/
diff --git a/platforms/cuda-old/src/CudaPlatform.cpp b/platforms/cuda-old/src/CudaPlatform.cpp
deleted file mode 100644
index 9c4709dfd88df5665fa40cd48e37fbfdbce1c4e4..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/src/CudaPlatform.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-/* -------------------------------------------------------------------------- *
- * OpenMM *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from *
- * Simbios, the NIH National Center for Physics-Based Simulation of *
- * Biological Structures at Stanford, funded under the NIH Roadmap for *
- * Medical Research, grant U54 GM072970. See https://simtk.org. *
- * *
- * Portions copyright (c) 2008 Stanford University and the Authors. *
- * Authors: Peter Eastman *
- * Contributors: *
- * *
- * This program is free software: you can redistribute it and/or modify *
- * it under the terms of the GNU Lesser General Public License as published *
- * by the Free Software Foundation, either version 3 of the License, or *
- * (at your option) any later version. *
- * *
- * This program is distributed in the hope that it will be useful, *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
- * GNU Lesser General Public License for more details. *
- * *
- * You should have received a copy of the GNU Lesser General Public License *
- * along with this program. If not, see . *
- * -------------------------------------------------------------------------- */
-
-#include "CudaPlatform.h"
-#include "CudaKernelFactory.h"
-#include "CudaKernels.h"
-#include "openmm/internal/ContextImpl.h"
-#include "kernels/gputypes.h"
-#include "openmm/Context.h"
-#include "openmm/OpenMMException.h"
-#include "openmm/System.h"
-#include
-
-using namespace OpenMM;
-using std::map;
-using std::string;
-using std::stringstream;
-
-extern "C" OPENMMCUDA_EXPORT void registerPlatforms() {
- if (gpuIsAvailable())
- Platform::registerPlatform(new CudaPlatform());
-}
-
-CudaPlatform::CudaPlatform() {
- CudaKernelFactory* factory = new CudaKernelFactory();
- registerKernelFactory(CalcForcesAndEnergyKernel::Name(), factory);
- registerKernelFactory(UpdateStateDataKernel::Name(), factory);
- registerKernelFactory(ApplyConstraintsKernel::Name(), factory);
- registerKernelFactory(VirtualSitesKernel::Name(), factory);
- registerKernelFactory(CalcHarmonicBondForceKernel::Name(), factory);
- registerKernelFactory(CalcCustomBondForceKernel::Name(), factory);
- registerKernelFactory(CalcHarmonicAngleForceKernel::Name(), factory);
- registerKernelFactory(CalcCustomAngleForceKernel::Name(), factory);
- registerKernelFactory(CalcPeriodicTorsionForceKernel::Name(), factory);
- registerKernelFactory(CalcRBTorsionForceKernel::Name(), factory);
- registerKernelFactory(CalcCMAPTorsionForceKernel::Name(), factory);
- registerKernelFactory(CalcCustomTorsionForceKernel::Name(), factory);
- registerKernelFactory(CalcNonbondedForceKernel::Name(), factory);
- registerKernelFactory(CalcCustomNonbondedForceKernel::Name(), factory);
- registerKernelFactory(CalcGBSAOBCForceKernel::Name(), factory);
- registerKernelFactory(CalcGBVIForceKernel::Name(), factory);
- registerKernelFactory(CalcCustomExternalForceKernel::Name(), factory);
- registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
- registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
- registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
- registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory);
- registerKernelFactory(IntegrateVariableLangevinStepKernel::Name(), factory);
- registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory);
- registerKernelFactory(ApplyMonteCarloBarostatKernel::Name(), factory);
- registerKernelFactory(CalcKineticEnergyKernel::Name(), factory);
- registerKernelFactory(RemoveCMMotionKernel::Name(), factory);
- platformProperties.push_back(CudaDevice());
- platformProperties.push_back(CudaUseBlockingSync());
- setPropertyDefaultValue(CudaDevice(), "0");
- setPropertyDefaultValue(CudaUseBlockingSync(), "true");
-}
-
-bool CudaPlatform::supportsDoublePrecision() const {
- return false;
-}
-
-const string& CudaPlatform::getPropertyValue(const Context& context, const string& property) const {
- const ContextImpl& impl = getContextImpl(context);
- const PlatformData* data = reinterpret_cast(impl.getPlatformData());
- map::const_iterator value = data->propertyValues.find(property);
- if (value != data->propertyValues.end())
- return value->second;
- return Platform::getPropertyValue(context, property);
-}
-
-void CudaPlatform::setPropertyValue(Context& context, const string& property, const string& value) const {
-}
-
-void CudaPlatform::contextCreated(ContextImpl& context, const map& properties) const {
- System& system = context.getSystem();
- for (int i = 0; i < system.getNumParticles(); i++)
- if (system.isVirtualSite(i))
- throw OpenMMException("CudaPlatform does not support virtual sites");
- for (int i = 0; i < system.getNumForces(); i++)
- if (system.getForce(i).getForceGroup() != 0)
- throw OpenMMException("CudaPlatform does not support force groups");
- unsigned int device = 0;
- const string& devicePropValue = (properties.find(CudaDevice()) == properties.end() ?
- getPropertyDefaultValue(CudaDevice()) : properties.find(CudaDevice())->second);
- if (devicePropValue.length() > 0)
- stringstream(devicePropValue) >> device;
- int numParticles = context.getSystem().getNumParticles();
- const string& blockingSync = (properties.find(CudaUseBlockingSync()) == properties.end() ?
- getPropertyDefaultValue(CudaUseBlockingSync()) : properties.find(CudaUseBlockingSync())->second);
- _gpuContext* gpu = (_gpuContext*) gpuInit(numParticles, device, blockingSync == "true");
- context.setPlatformData(new PlatformData(gpu));
-}
-
-void CudaPlatform::contextDestroyed(ContextImpl& context) const {
- PlatformData* data = reinterpret_cast(context.getPlatformData());
- gpuShutDown(data->gpu);
- delete data;
-}
-
-CudaPlatform::PlatformData::PlatformData(_gpuContext* gpu) : gpu(gpu), removeCM(false), nonbondedMethod(0), customNonbondedMethod(0), hasBonds(false), hasAngles(false),
- hasPeriodicTorsions(false), hasRB(false), hasNonbonded(false), hasCustomNonbonded(false), stepCount(0), computeForceCount(0), time(0.0),
- ewaldSelfEnergy(0.0), dispersionCoefficient(0.0) {
- stringstream device;
- device << gpu->device;
- propertyValues[CudaPlatform::CudaDevice()] = device.str();
- propertyValues[CudaPlatform::CudaUseBlockingSync()] = (gpu->useBlockingSync ? "true" : "false");
-}
diff --git a/platforms/cuda-old/src/kernels/bbsort.cu b/platforms/cuda-old/src/kernels/bbsort.cu
deleted file mode 100644
index fd204d54490408526b2cdffbfd4b43746d2ba70f..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/src/kernels/bbsort.cu
+++ /dev/null
@@ -1,337 +0,0 @@
-/*
- * Authored by: Chen, Shifu
- *
- * Email: chen@gmtk.org
- *
- * Website: http://www.gmtk.org/gsort
- *
- * The code is distributed under BSD license, you are allowed to use, modify or sell this code, but a statement is required if you used this code any where.
- *
- */
-
-
-#include
-#include
-#include "vector_types.h"
-#include "bbsort.h"
-#include "bbsort_kernel.cu"
-
-
-int getValue(int2 v){
- return v.y;
-}
-
-template
-T getValue(T v){
- return v;
-}
-
-# define CUDA_SAFE_CALL_NO_SYNC( call) { \
- cudaError err = call; \
- if( cudaSuccess != err) { \
- fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
- __FILE__, __LINE__, cudaGetErrorString( err) ); \
- exit(EXIT_FAILURE); \
- } }
-
-# define CUDA_SAFE_CALL( call) CUDA_SAFE_CALL_NO_SYNC(call);
-
-bool assignSliceToBuckets(unsigned int* sliceCount,int sliceSize,unsigned int* bucketOffset,unsigned int* bucketOfSlice,unsigned int* bucketSizes,unsigned int* sliceOffsetInBucket,int& bucketsCount,float step)
-{
- int i=0;
-
- bool overflow=false;
-
- int tmpSum=0;
-
- bucketOffset[0]=0;
-
- for(i=0;iBLOCK_SIZE)
- {
- overflow=true;
- }
-
- tmpSum += sliceCount[i];
- bucketOfSlice[i]=bucketsCount;
- bucketSizes[bucketsCount] = tmpSum;
- sliceOffsetInBucket[i]=tmpSum -sliceCount[i];
- if(tmpSum > BLOCK_SIZE )
- {
- if(i != 0)
- {
- bucketOfSlice[i]=bucketsCount+1;
- bucketSizes[bucketsCount] -= sliceCount[i];
- sliceOffsetInBucket[i]=0;
- bucketOffset[bucketsCount+1]=bucketOffset[bucketsCount] + tmpSum - sliceCount[i];
-
- bucketsCount++;
- tmpSum=sliceCount[i];
- bucketSizes[bucketsCount] = tmpSum;
- }
- else
- {
- bucketOffset[bucketsCount+1]=bucketOffset[bucketsCount] + tmpSum ;
- sliceOffsetInBucket[i]=0;
- tmpSum=0;
- bucketsCount++;
- }
- }
-
- }
- bucketsCount++;
-
- return overflow;
-
-}
-
-template
-void reduceMinMax(T* dData,int size,float& result,bool isMax)
-{
-
- int step;
- step=(size%2==0)?
- (size/2):(size/2 +1);
- int blockSize=BLOCK_SIZE;
- int blockCount;
- int length=size;
- T originalResult;
- while(step > 0)
- {
- if(step%BLOCK_SIZE==0)
- blockCount=step/BLOCK_SIZE;
- else
- blockCount=step/BLOCK_SIZE+1;
-
- if(isMax)
- reduceMaxD<<>>(dData,step,length);
- else
- reduceMinD<<>>(dData,step,length);
-
- length=step;
-
- step=(step%2==0 || step==1)?(step/2):(step/2 +1);
- }
-
- CUDA_SAFE_CALL(cudaMemcpy(&originalResult, dData, sizeof(T), cudaMemcpyDeviceToHost));
-
- result=(int)getValue(originalResult);
-}
-
-template
-void evaluateDisorder(T* dData,int size,float maxValue, float minValue, int& listOrder)
-{
- int blockCount;
-
- if((size-1) % BLOCK_SIZE ==0)blockCount=size/BLOCK_SIZE;
- else blockCount=size/BLOCK_SIZE+1;
-
- float* dDiffData;
- CUDA_SAFE_CALL(cudaMalloc((void**)&dDiffData, sizeof(float) * size));
-
- calDifferenceD<<>>(dData,dDiffData,size);
-
- float sum=0;
-
- int step;
- step=(size%2==0)?
- (size/2):(size/2 +1);
-
- int blockSize=BLOCK_SIZE;
-
- int length=size;
-
- while(step > 0)
- {
-
- if(step%BLOCK_SIZE==0)
- blockCount=step/BLOCK_SIZE;
- else
- blockCount=step/BLOCK_SIZE+1;
-
- reduceSumD<<>>(dDiffData,step,length);
-
- length=step;
-
- step=(step%2==0 || step==1)?(step/2):(step/2 +1);
- }
-
- CUDA_SAFE_CALL(cudaMemcpy(&sum, dDiffData, sizeof(float), cudaMemcpyDeviceToHost));
-
- if( sum < (maxValue - minValue) * size / 10)
- listOrder=NEARLY_SORTED;
- else
- listOrder=DISORDERLY;
-
- CUDA_SAFE_CALL(cudaFree(dDiffData));
-}
-
-template
-void bbSortBody(T* dData,int size,int listOrder/*,float sliceStep,int sliceSize, T* dTmpData, float minValue,float maxValue*/)
-{
- float minValue,maxValue;
- T* dTmpData;
-
- CUDA_SAFE_CALL(cudaMalloc((void**)&dTmpData, sizeof(T) * size));
- CUDA_SAFE_CALL(cudaMemcpy(dTmpData, dData, sizeof(T) * size, cudaMemcpyDeviceToDevice));
- reduceMinMax(dTmpData,size,maxValue,true);
- CUDA_SAFE_CALL(cudaMemcpy(dTmpData, dData, sizeof(T) * size, cudaMemcpyDeviceToDevice));
- reduceMinMax(dTmpData,size,minValue,false);
-
- if(minValue == maxValue)
- {
- CUDA_SAFE_CALL(cudaFree(dTmpData));
- return ;
- }
-
- if(listOrder == AUTO_EVALUATE )
- {
- evaluateDisorder(dData,size,maxValue,minValue,listOrder);
- }
-
- float sliceStep = (float) (50.0*((double)(maxValue-minValue)/(double)size));
- int sliceSize = (int) ((maxValue-minValue)/sliceStep + 10);
-
- int blockCount;
-
- if(size%BLOCK_SIZE==0)blockCount=size/BLOCK_SIZE;
- else blockCount=size/BLOCK_SIZE+1;
-
- unsigned int* dSliceCounts;
- unsigned int* dOffsetInSlice;
-
- CUDA_SAFE_CALL(cudaMalloc((void**)&dOffsetInSlice, sizeof(unsigned int) * size));
- CUDA_SAFE_CALL(cudaMalloc((void**)&dSliceCounts, sizeof(unsigned int) * sliceSize));
- CUDA_SAFE_CALL(cudaMemset(dSliceCounts,0, sizeof(int) * sliceSize));
-
- if(listOrder == NEARLY_SORTED)
- {
- assignElementToSlicesNearlySortedD<<>>(dData,size,dSliceCounts,dOffsetInSlice,minValue,sliceStep,sliceSize,blockCount);
- }
- else
- assignElementToSlicesD<<>>(dData,size,dSliceCounts,dOffsetInSlice,minValue,sliceStep,sliceSize);
- unsigned int* hSliceCounts=new unsigned int[sliceSize];
- CUDA_SAFE_CALL(cudaMemcpy(hSliceCounts, dSliceCounts, sizeof(unsigned int) * sliceSize, cudaMemcpyDeviceToHost));
-
- int looseBucketSize=size/100;
-
- unsigned int* hBucketOffsets=new unsigned int[looseBucketSize];
- unsigned int* hBucketSizes=new unsigned int[looseBucketSize];
- unsigned int* hBucketOfSlices=new unsigned int[sliceSize];
- unsigned int* hSliceOffsetInBucket=new unsigned int[sliceSize];
- int bucketsCount=0;
-
- memset(hBucketSizes,0,sizeof(int) * looseBucketSize);
- memset(hSliceOffsetInBucket,0,sizeof(unsigned int) * sliceSize);
-
- bool overflow;
-
- overflow = assignSliceToBuckets(hSliceCounts,sliceSize,hBucketOffsets,hBucketOfSlices,hBucketSizes,hSliceOffsetInBucket,bucketsCount,sliceStep);
-
- unsigned int* dBucketOffsets;
- unsigned int* dBucketSizes;
-
- unsigned int* dBucketOfSlices;
- unsigned int* dSliceOffsetInBucket;
-
- CUDA_SAFE_CALL(cudaMalloc((void**)&dBucketOfSlices, sizeof(unsigned int) * sliceSize));
- CUDA_SAFE_CALL(cudaMalloc((void**)&dSliceOffsetInBucket, sizeof(unsigned int) * sliceSize));
- CUDA_SAFE_CALL(cudaMalloc((void**)&dBucketOffsets, sizeof(unsigned int) * bucketsCount));
- CUDA_SAFE_CALL(cudaMalloc((void**)&dBucketSizes, sizeof(unsigned int) * bucketsCount));
-
-
- CUDA_SAFE_CALL(cudaMemcpy(dBucketOfSlices, hBucketOfSlices, sizeof(unsigned int) * sliceSize, cudaMemcpyHostToDevice));
- CUDA_SAFE_CALL(cudaMemcpy(dSliceOffsetInBucket, hSliceOffsetInBucket, sizeof(unsigned int) * sliceSize, cudaMemcpyHostToDevice));
- CUDA_SAFE_CALL(cudaMemcpy(dBucketOffsets, hBucketOffsets, sizeof(unsigned int) * bucketsCount, cudaMemcpyHostToDevice));
- CUDA_SAFE_CALL(cudaMemcpy(dBucketSizes, hBucketSizes, sizeof(unsigned int) * bucketsCount, cudaMemcpyHostToDevice));
-
- cudaBindTexture(0,tBucketOffsets,dBucketOffsets);
- cudaBindTexture(0,tBucketSizes,dBucketSizes);
- cudaBindTexture(0,tBucketOfSlices,dBucketOfSlices);
- cudaBindTexture(0,tSliceOffsetInBucket,dSliceOffsetInBucket);
-
- assignElementToBucketD<<>>(dData,dTmpData,size,dOffsetInSlice,minValue,sliceStep);
-
- CUDA_SAFE_CALL( cudaThreadSynchronize() );
-
- bitonicSortD<<>>(dTmpData);
-
- CUDA_SAFE_CALL(cudaMemcpy(dData, dTmpData, sizeof(T) * size, cudaMemcpyDeviceToDevice));
-
- if(overflow){
- for(int i=0;i BLOCK_SIZE)
- {
- bbSort(dData + hBucketOffsets[i],hBucketSizes[i],listOrder);
- }
- }
- }
-
- delete hBucketOffsets;
- delete hBucketOfSlices;
- delete hSliceCounts;
- delete hBucketSizes;
- delete hSliceOffsetInBucket;
-
- CUDA_SAFE_CALL(cudaFree(dOffsetInSlice));
- CUDA_SAFE_CALL(cudaFree(dSliceCounts));
- CUDA_SAFE_CALL(cudaFree(dTmpData));
-
- cudaUnbindTexture( tBucketSizes );
- CUDA_SAFE_CALL(cudaFree(dBucketSizes));
-
- cudaUnbindTexture( tBucketOffsets );
- CUDA_SAFE_CALL(cudaFree(dBucketOffsets));
-
- cudaUnbindTexture( tBucketOfSlices );
- CUDA_SAFE_CALL(cudaFree(dBucketOfSlices));
-
- cudaUnbindTexture( tSliceOffsetInBucket );
- CUDA_SAFE_CALL(cudaFree(dSliceOffsetInBucket));
-}
-
-/************************************************************************************
-
-Uncomment your desired function definition here
-
-Please note that, only one type of bbsort() can be used in a program, due to NVCC compiler doesn't support overriding kernel function
-
-float, double, int, uint, short, and ushort are originally supported, if you want to use bbsort() in double
-
-please follow the readme.txt
-
-Also note that you need to use 1.3 capbility (use arch=sm_13 in your compile command) to sort doubles
-
-*************************************************************************************/
-
-template<>
-void OPENMMCUDA_EXPORT bbSort(int2* dData,int size,int listOrder)
-{
-
- bbSortBody(dData,size,listOrder);
-}
-
-//void bbSort(float* dData,int size,int listOrder)
-//{
-//
-// bbSortBody(dData,size,listOrder);
-//}
-
-//void bbSort(int* dData,int size,int listOrder)
-//{
-//
-// bbSortBody(dData,size,listOrder);
-//}
-//
-//void bbSort(unsigned int* dData,int size,int listOrder)
-//{
-//
-// bbSortBody(dData,size,listOrder);
-//}
-//
-//void bbSort(double* dData,int size,int listOrder)
-//{
-//
-// bbSortBody(dData,size,listOrder);
-//}
diff --git a/platforms/cuda-old/src/kernels/bbsort.h b/platforms/cuda-old/src/kernels/bbsort.h
deleted file mode 100644
index 8a63516ac5d5cdc57a97b6d11ee1d718790449bb..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/src/kernels/bbsort.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Authored by: Chen, Shifu
- *
- * Email: chen@gmtk.org
- *
- * Website: http://www.gmtk.org/gsort
- *
- * The code is distributed under BSD license, you are allowed to use, modify or sell this code, but a statement is required if you used this code any where.
- *
- */
-#ifndef _BBSORT_H_
-#define _BBSORT_H_
-#include "windowsExportCuda.h"
-
-#define BLOCK_SIZE 512
-
-#define DISORDERLY 0
-#define NEARLY_SORTED 1
-#define AUTO_EVALUATE 2
-
-template
-void OPENMMCUDA_EXPORT bbSort(T* dData,int number,int listOrder=AUTO_EVALUATE);
-
-#endif // _BBSORT_H_
diff --git a/platforms/cuda-old/src/kernels/bbsort_kernel.cu b/platforms/cuda-old/src/kernels/bbsort_kernel.cu
deleted file mode 100644
index a906c2b96abe85f17990bbc9c323df3a6bcaab8e..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/src/kernels/bbsort_kernel.cu
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Authored by: Chen, Shifu
- *
- * Email: chen@gmtk.org
- *
- * Website: http://www.gmtk.org/gsort
- *
- * The code is distributed under BSD license, you are allowed to use, modify or sell this code, but a statement is required if you used this code any where.
- *
- */
-#ifndef _BBSORT_KERNEL_H_
-#define _BBSORT_KERNEL_H_
-
-#include "bbsort.h"
-#include "math_constants.h"
-
-texture tBucketSizes;
-texture tBucketOffsets;
-texture tBucketOfSlices;
-texture tSliceOffsetInBucket;
-
-static __device__ int dGetValue(int2 v){
- return v.y;
-}
-
-template
-static __device__ T dGetValue(T v){
- return v;
-}
-
-
-static __device__ void dPad(int2& v){
- v.x=0x3fffffff;
- v.y=0x4fffffff;
-}
-
-template
-static __device__ void dPad(T & v){
- v=0x7fffffff;
-}
-
-template
-__global__ static void reduceMaxD(T * dData,int step,int length)
-{
- int index = blockIdx.x * blockDim.x + threadIdx.x;
-
- if(index + step >=length)
- return ;
- dData[index] = dGetValue(dData[index])>dGetValue(dData[index+step])?dData[index]:dData[index+step];
-}
-
-template
-__global__ static void reduceMinD(T * dData,int step,int length)
-{
-
- int index = blockIdx.x * blockDim.x + threadIdx.x;
-
- if(index + step >=length)
- return ;
-
- dData[index] = dGetValue(dData[index])=length)
- return ;
-
- dDiffData[index] += dDiffData[index+step];
-}
-
-template
-__global__ static void calDifferenceD(T * dData,float * dDiffData,int size)
-{
- int index = blockIdx.x * blockDim.x + threadIdx.x;
-
- if(index > size-1)
- return ;
-
- const unsigned int tid = threadIdx.x;
-
- extern __shared__ T sData[];
-
- sData[tid]=dData[index];
-
- __syncthreads();
-
- if(tid < blockDim.x -1)
- dDiffData[index] = abs(dGetValue(sData[tid+1]) - dGetValue(sData[tid]));
- else
- dDiffData[index] =0;
-
-}
-
-template
-__device__ inline void dSwap(T & a, T & b)
-{
- T tmp = a;
- a = b;
- b = tmp;
-}
-
-
-template
-__global__ static void bitonicSortD(T * datas)
-{
- extern __shared__ T shared[];
-
- const unsigned int bid=blockIdx.x;
-
- const unsigned int tid = threadIdx.x;
-
- __shared__ unsigned int count;
- __shared__ unsigned int offset;
-
- if(tid == 0)
- {
- count=tex1Dfetch(tBucketSizes,bid);
- offset=tex1Dfetch(tBucketOffsets,bid);
- }
-
- __syncthreads();
-
- if(tid < count)
- shared[tid] = datas[tid+offset];
- else
- {
- dPad(shared[tid]);
- }
-
- __syncthreads();
-
- for (unsigned int k = 2; k <= BLOCK_SIZE; k *= 2)
- {
- for (unsigned int j = k / 2; j>0; j /= 2)
- {
- unsigned int ixj = tid ^ j;
-
-
- if (ixj > tid)
- {
- if ((tid & k) == 0)
- {
- if (dGetValue(shared[tid]) > dGetValue(shared[ixj]))
- {
- dSwap(shared[tid], shared[ixj]);
- }
- }
- else
- {
- if (dGetValue(shared[tid]) < dGetValue(shared[ixj]))
- {
- dSwap(shared[tid], shared[ixj]);
- }
- }
- }
-
- __syncthreads();
- }
- }
- if(tid < count)
- datas[tid+offset] = shared[tid];
-}
-
-template
-
-__global__ void assignElementToSlicesD(T* dDatas,int number,unsigned int* dSliceCounts,unsigned int* dOffsetInSlice,float minValue,float step,int sliceSize)
-{
- unsigned int index= __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
-
- if(index > number-1)
- return ;
-
- unsigned int s=((dGetValue(dDatas[index]) - minValue)/ step);
-
- unsigned int offset=atomicInc(dSliceCounts + s,0xFFFFFFF);
-
- dOffsetInSlice[index] = offset;
-
-}
-
-template
-__global__ void assignElementToSlicesNearlySortedD(T* dDatas,int number,unsigned int* dSliceCounts,unsigned int* dOffsetInSlice,float minValue,float step,int sliceSize,int blockCount)
-{
- unsigned int index= blockIdx.x + blockCount * threadIdx.x;
-
- if(index > number-1)
- return ;
-
- unsigned int s=((dGetValue(dDatas[index]) - minValue)/ step);
-
- unsigned int offset=atomicInc(dSliceCounts + s,0xFFFFFFF);
-
- dOffsetInSlice[index] = offset;
-
-}
-
-template
-__global__ void assignElementToBucketD(T* dDatas,T* dNewDatas,int number,unsigned int* dOffsetInSlice,float minValue,float step)
-{
-
- unsigned int index= __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
-
- if(index > number-1)
- return ;
-
- unsigned int s=((dGetValue(dDatas[index]) - minValue)/ step);
-
- unsigned int b=tex1Dfetch(tBucketOfSlices,s);
-
- unsigned int offset =tex1Dfetch(tBucketOffsets,b) + tex1Dfetch(tSliceOffsetInBucket,s) + dOffsetInSlice[index];
-
- dNewDatas[offset] =dDatas[index];
-
-}
-
-#endif // _BBSORT_KERNEL_H_
diff --git a/platforms/cuda-old/src/kernels/cudaCompact.cu b/platforms/cuda-old/src/kernels/cudaCompact.cu
deleted file mode 100644
index c5efb6ba882be004a78566f7ab6b4fed53656dba..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/src/kernels/cudaCompact.cu
+++ /dev/null
@@ -1,223 +0,0 @@
-
-/* Code for CUDA stream compaction. Roughly based on:
- Billeter M, Olsson O, Assarsson U. Efficient Stream Compaction on Wide SIMD Many-Core Architectures.
- High Performance Graphics 2009.
-
- Notes:
- - paper recommends 128 threads/block, so this is hard coded.
- - I only implement the prefix-sum based compact primitive, and not the POPC one, as that is more
- complicated and performs poorly on current hardware
- - I only implement the scattered- and staged-write variant of phase III as it they have reasonable
- performance across most of the tested workloads in the paper. The selective variant is not
- implemented.
- - The prefix sum of per-block element counts (phase II) is not done in a particularly efficient
- manner. It is, however, done in a very easy to program manner, and integrated into the top of
- phase III, reducing the number of kernel invocations required. If one wanted to use existing code,
- it'd be easy to take the CUDA SDK scanLargeArray sample, and do a prefix sum over dgBlockCounts in
- a phase II kernel. You could also adapt the existing prescan128 to take an initial value, and scan
- dgBlockCounts in stages.
-
- Date: 23 Aug 2009
- Author: Imran Haque (ihaque@cs.stanford.edu)
- Affiliation: Stanford University
- License: Public Domain
-*/
-
-#include "cudaCompact.h"
-
-typedef unsigned int T;
-
-// Phase 1: Count valid elements per thread block
-// Hard-code 128 thd/blk
-__device__ unsigned int sumReduce128(volatile unsigned int* arr) {
- // Parallel reduce element counts
- // Assumes 128 thd/block
- if (threadIdx.x < 64) arr[threadIdx.x] += arr[threadIdx.x+64];
- __syncthreads();
- if (threadIdx.x < 32) {
- arr[threadIdx.x] += arr[threadIdx.x+32];
- if (threadIdx.x < 16) arr[threadIdx.x] += arr[threadIdx.x+16];
- if (threadIdx.x < 8) arr[threadIdx.x] += arr[threadIdx.x+8];
- if (threadIdx.x < 4) arr[threadIdx.x] += arr[threadIdx.x+4];
- if (threadIdx.x < 2) arr[threadIdx.x] += arr[threadIdx.x+2];
- if (threadIdx.x < 1) arr[threadIdx.x] += arr[threadIdx.x+1];
- }
- __syncthreads();
- return arr[0];
-}
-
-__global__ void countElts(unsigned int* dgBlockCounts,const unsigned int* dgValid,const size_t eltsPerBlock,const size_t len) {
- __shared__ volatile unsigned int dsCount[128];
- dsCount[threadIdx.x] = 0;
- size_t ub;
- ub = (len < (blockIdx.x+1)*eltsPerBlock) ? len : ((blockIdx.x + 1)*eltsPerBlock);
- for (int base = blockIdx.x * eltsPerBlock; base < (blockIdx.x+1)*eltsPerBlock; base += blockDim.x) {
- if ((base + threadIdx.x) < ub && dgValid[base+threadIdx.x])
- dsCount[threadIdx.x]++;
- }
- __syncthreads();
- unsigned int blockCount = sumReduce128(dsCount);
- if (threadIdx.x == 0) dgBlockCounts[blockIdx.x] = blockCount;
- return;
-}
-
-// Phase 2/3: Move valid elements using SIMD compaction (phase 2 is done implicitly at top of __global__ method)
-// Exclusive prefix scan over 128 elements
-// Assumes 128 threads
-// Taken from cuda SDK "scan" sample for naive scan, with small modifications
-__device__ int exclusivePrescan128(const unsigned int* in,unsigned int* outAndTemp) {
- const int n=128;
- //TODO: this temp storage could be reduced since we write to shared memory in out anyway, and n is hardcoded
- //__shared__ int temp[2*n];
- unsigned int* temp = outAndTemp;
- int pout = 1, pin = 0;
-
- // load input into temp
- // This is exclusive scan, so shift right by one and set first elt to 0
- temp[pout*n + threadIdx.x] = (threadIdx.x > 0) ? in[threadIdx.x-1] : 0;
- __syncthreads();
-
- for (int offset = 1; offset < n; offset *= 2)
- {
- pout = 1 - pout; // swap double buffer indices
- pin = 1 - pout;
- __syncthreads();
- temp[pout*n+threadIdx.x] = temp[pin*n+threadIdx.x];
- if (threadIdx.x >= offset)
- temp[pout*n+threadIdx.x] += temp[pin*n+threadIdx.x - offset];
- }
-
- //out[threadIdx.x] = temp[pout*n+threadIdx.x]; // write output
- __syncthreads();
- return outAndTemp[127]+in[127]; // Return sum of all elements
-}
-__device__ int compactSIMDPrefixSum(const T* dsData,const unsigned int* dsValid,T* dsCompact) {
- __shared__ unsigned int dsLocalIndex[256];
- int numValid = exclusivePrescan128(dsValid,dsLocalIndex);
- if (dsValid[threadIdx.x]) dsCompact[dsLocalIndex[threadIdx.x]] = dsData[threadIdx.x];
- return numValid;
-}
-
-__global__ void moveValidElementsStaged(const T* dgData,T* dgCompact,const unsigned int* dgValid,const unsigned int* dgBlockCounts,size_t eltsPerBlock,size_t len,size_t* dNumValidElements) {
- __shared__ T inBlock[128];
- __shared__ unsigned int validBlock[128];
- __shared__ T compactBlock[128];
- int blockOutOffset=0;
- // Sum up the blockCounts before us to find our offset
- // This is totally inefficient - lots of repeated work b/w blocks, and uneven balancing.
- // Paper implements this as a prefix sum kernel in phase II
- // May still be faster than an extra kernel invocation?
- for (int base = 0; base < blockIdx.x; base += blockDim.x) {
- // Load up the count of valid elements for each block before us in batches of 128
- if ((base + threadIdx.x) < blockIdx.x) {
- validBlock[threadIdx.x] = dgBlockCounts[base+threadIdx.x];
- } else {
- validBlock[threadIdx.x] = 0;
- }
- __syncthreads();
- // Parallel reduce these counts
- // Accumulate in the final offset variable
- blockOutOffset += sumReduce128(validBlock);
- }
-
- size_t ub;
- ub = (len < (blockIdx.x+1)*eltsPerBlock) ? len : ((blockIdx.x + 1)*eltsPerBlock);
- for (int base = blockIdx.x * eltsPerBlock; base < (blockIdx.x+1)*eltsPerBlock; base += blockDim.x) {
- if ((base + threadIdx.x) < ub) {
- validBlock[threadIdx.x] = dgValid[base+threadIdx.x];
- inBlock[threadIdx.x] = dgData[base+threadIdx.x];
- } else {
- validBlock[threadIdx.x] = 0;
- }
- __syncthreads();
- int numValidBlock = compactSIMDPrefixSum(inBlock,validBlock,compactBlock);
- __syncthreads();
- if (threadIdx.x < numValidBlock) {
- dgCompact[blockOutOffset + threadIdx.x] = compactBlock[threadIdx.x];
- }
- blockOutOffset += numValidBlock;
- }
- if (blockIdx.x == (gridDim.x-1) && threadIdx.x == 0) {
- *dNumValidElements = blockOutOffset;
- }
-}
-
-__global__ void moveValidElementsScattered(const T* dgData,T* dgCompact,const unsigned int* dgValid,const unsigned int* dgBlockCounts,size_t eltsPerBlock,size_t len,size_t* dNumValidElements) {
- __shared__ T inBlock[128];
- __shared__ unsigned int validBlock[128];
- T* compactBlock=dgCompact;
- size_t blockOutOffset = 0;
- // Sum up the blockCounts before us to find our offset
- // This is totally inefficient - lots of repeated work b/w blocks, and uneven balancing.
- // Paper implements this as a prefix sum kernel in phase II
- // May still be faster than an extra kernel invocation?
- for (int base = 0; base < blockIdx.x; base += blockDim.x) {
- // Load up the count of valid elements for each block before us in batches of 128
- if ((base + threadIdx.x) < blockIdx.x) {
- validBlock[threadIdx.x] = dgBlockCounts[base+threadIdx.x];
- } else {
- validBlock[threadIdx.x] = 0;
- }
- __syncthreads();
- // Parallel reduce these counts
- // Accumulate in the final offset variable
- blockOutOffset += sumReduce128(validBlock);
- }
- compactBlock += blockOutOffset;
- size_t ub;
- ub = (len < (blockIdx.x+1)*eltsPerBlock) ? len : ((blockIdx.x + 1)*eltsPerBlock);
- for (int base = blockIdx.x * eltsPerBlock; base < (blockIdx.x+1)*eltsPerBlock; base += blockDim.x) {
- if ((base + threadIdx.x) < ub) {
- validBlock[threadIdx.x] = dgValid[base+threadIdx.x];
- inBlock[threadIdx.x] = dgData[base+threadIdx.x];
- } else {
- validBlock[threadIdx.x] = 0;
- }
- __syncthreads();
- int numValidBlock = compactSIMDPrefixSum(inBlock,validBlock,compactBlock);
- blockOutOffset += numValidBlock;
- compactBlock += numValidBlock;
- }
- if (blockIdx.x == (gridDim.x-1) && threadIdx.x == 0) {
- *dNumValidElements = blockOutOffset;
- }
-}
-
-void OPENMMCUDA_EXPORT planCompaction(compactionPlan& d,bool stageOutput) {
- int device;
- cudaGetDevice(&device);
- cudaDeviceProp deviceProp;
- cudaGetDeviceProperties(&deviceProp, device);
- d.nThreadBlocks = 16*deviceProp.multiProcessorCount;
- cudaMalloc((void**)&(d.dgBlockCounts), d.nThreadBlocks*sizeof(unsigned int));
- d.stageOutput = stageOutput;
- // TODO: make sure allocation worked
- d.valid = true;
-}
-
-void OPENMMCUDA_EXPORT destroyCompactionPlan(compactionPlan& d) {
- if (d.valid) cudaFree(d.dgBlockCounts);
-}
-
-int OPENMMCUDA_EXPORT compactStream(const compactionPlan& d,T* dOut,const T* dIn,const unsigned int* dValid,size_t len,size_t* dNumValid) {
- if (!d.valid) {
- return -1;
- }
- // Figure out # elements per block
- unsigned int numBlocks = d.nThreadBlocks;
- if (numBlocks*128 > len)
- numBlocks = (len+127)/128;
- const size_t eltsPerBlock = len/numBlocks + ((len % numBlocks) ? 1 : 0);
-
- // TODO: implement loop over blocks of 10M
- // Phase 1: Calculate number of valid elements per thread block
- countElts<<>>(d.dgBlockCounts,dValid,eltsPerBlock,len);
-
- // Phase 2/3: Move valid elements using SIMD compaction
- if (d.stageOutput) {
- moveValidElementsStaged<<>>(dIn,dOut,dValid,d.dgBlockCounts,eltsPerBlock,len,dNumValid);
- } else {
- moveValidElementsScattered<<>>(dIn,dOut,dValid,d.dgBlockCounts,eltsPerBlock,len,dNumValid);
- }
- return 0;
-}
diff --git a/platforms/cuda-old/src/kernels/cudaCompact.h b/platforms/cuda-old/src/kernels/cudaCompact.h
deleted file mode 100644
index ac9fef27afda8e86d9b81e18615edbceb83a90c7..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/src/kernels/cudaCompact.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef __OPENMM_CUDACOMPACT_H__
-#define __OPENMM_CUDACOMPACT_H__
-
-/* Code for CUDA stream compaction. Roughly based on:
- Billeter M, Olsson O, Assarsson U. Efficient Stream Compaction on Wide SIMD Many-Core Architectures.
- High Performance Graphics 2009.
-
- Notes:
- - paper recommends 128 threads/block, so this is hard coded.
- - I only implement the prefix-sum based compact primitive, and not the POPC one, as that is more
- complicated and performs poorly on current hardware
- - I only implement the scattered- and staged-write variant of phase III as it they have reasonable
- performance across most of the tested workloads in the paper. The selective variant is not
- implemented.
- - The prefix sum of per-block element counts (phase II) is not done in a particularly efficient
- manner. It is, however, done in a very easy to program manner, and integrated into the top of
- phase III, reducing the number of kernel invocations required. If one wanted to use existing code,
- it'd be easy to take the CUDA SDK scanLargeArray sample, and do a prefix sum over dgBlockCounts in
- a phase II kernel. You could also adapt the existing prescan128 to take an initial value, and scan
- dgBlockCounts in stages.
-
- Date: 23 Aug 2009
- Author: Imran Haque (ihaque@cs.stanford.edu)
- Affiliation: Stanford University
- License: Public Domain
-*/
-
-#include "windowsExportCuda.h"
-
-struct compactionPlan {
- bool valid;
- unsigned int* dgBlockCounts;
- unsigned int nThreadBlocks;
- bool stageOutput;
-};
-
-extern "C"
-void OPENMMCUDA_EXPORT planCompaction(compactionPlan& d,bool stageOutput=true);
-
-extern "C"
-void OPENMMCUDA_EXPORT destroyCompactionPlan(compactionPlan& d);
-
-extern "C"
-int OPENMMCUDA_EXPORT compactStream(const compactionPlan& d,unsigned int* dOut,const unsigned int* dIn,const unsigned int* dValid,size_t len,size_t* dNumValid);
-
-#endif // __OPENMM_CUDACOMPACT_H__
diff --git a/platforms/cuda-old/src/kernels/cudaKernels.h b/platforms/cuda-old/src/kernels/cudaKernels.h
deleted file mode 100755
index 611e971e132349311ea4a21150bed4cd84b7cd66..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/src/kernels/cudaKernels.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/* -------------------------------------------------------------------------- *
- * OpenMM *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from *
- * Simbios, the NIH National Center for Physics-Based Simulation of *
- * Biological Structures at Stanford, funded under the NIH Roadmap for *
- * Medical Research, grant U54 GM072970. See https://simtk.org. *
- * *
- * Portions copyright (c) 2009 Stanford University and the Authors. *
- * Authors: Scott Le Grand, Peter Eastman *
- * Contributors: *
- * *
- * This program is free software: you can redistribute it and/or modify *
- * it under the terms of the GNU Lesser General Public License as published *
- * by the Free Software Foundation, either version 3 of the License, or *
- * (at your option) any later version. *
- * *
- * This program is distributed in the hope that it will be useful, *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
- * GNU Lesser General Public License for more details. *
- * *
- * You should have received a copy of the GNU Lesser General Public License *
- * along with this program. If not, see . *
- * -------------------------------------------------------------------------- */
-
-#include "gputypes.h"
-
-// Initialization
-extern void OPENMMCUDA_EXPORT kClearForces(gpuContext gpu);
-extern void kClearEnergy(gpuContext gpu);
-extern void kClearBornSumAndForces(gpuContext gpu);
-extern void kClearObcGbsaBornSum(gpuContext gpu);
-extern void OPENMMCUDA_EXPORT kCalculateObcGbsaBornSum(gpuContext gpu);
-extern void OPENMMCUDA_EXPORT kReduceObcGbsaBornSum(gpuContext gpu);
-extern void kCalculateGBVIBornSum(gpuContext gpu);
-extern void kReduceGBVIBornSum(gpuContext gpu);
-extern void kClearGBVIBornSum( gpuContext gpu );
-extern void kGenerateRandoms(gpuContext gpu);
-
-// Main loop
-extern void kCalculateCDLJObcGbsaForces1(gpuContext gpu);
-extern void kCalculateCDLJGBVIForces1(gpuContext gpu);
-extern void kCalculateCDLJForces(gpuContext gpu);
-extern void kCalculateCMAPTorsionForces(gpuContext gpu, CUDAStream& coefficients, CUDAStream& mapPositions, CUDAStream& torsionIndices, CUDAStream& torsionMaps);
-extern void kCalculateCustomBondForces(gpuContext gpu);
-extern void kCalculateCustomAngleForces(gpuContext gpu);
-extern void kCalculateCustomTorsionForces(gpuContext gpu);
-extern void kCalculateCustomExternalForces(gpuContext gpu);
-extern void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid);
-extern void kReduceObcGbsaBornForces(gpuContext gpu);
-extern void OPENMMCUDA_EXPORT kCalculateObcGbsaForces2(gpuContext gpu);
-extern void kCalculateGBVIForces2(gpuContext gpu);
-extern void kCalculateLocalForces(gpuContext gpu);
-extern void kCalculateAndersenThermostat(gpuContext gpu, CUDAStream& atomGroups);
-extern void kReduceBornSumAndForces(gpuContext gpu);
-extern void kApplyShake(gpuContext gpu);
-extern void kApplyCCMA(gpuContext gpu);
-extern void kApplySettle(gpuContext gpu);
-extern void kLangevinUpdatePart1(gpuContext gpu);
-extern void kLangevinUpdatePart2(gpuContext gpu);
-extern void kSelectLangevinStepSize(gpuContext gpu, float maxTimeStep);
-extern void kSetVelocitiesFromPositions(gpuContext gpu);
-extern void kVerletUpdatePart1(gpuContext gpu);
-extern void kVerletUpdatePart2(gpuContext gpu);
-extern void kSelectVerletStepSize(gpuContext gpu, float maxTimeStep);
-extern void kBrownianUpdatePart1(gpuContext gpu);
-extern void kBrownianUpdatePart2(gpuContext gpu);
-extern void kScaleAtomCoordinates(gpuContext gpu, float scale, CUDAStream& moleculeAtoms, CUDAStream& moleculeStartIndex);
-extern void kApplyConstraints(gpuContext gpu);
-
-// Extras
-extern void OPENMMCUDA_EXPORT kReduceForces(gpuContext gpu);
-extern double kReduceEnergy(gpuContext gpu);
-
-// Initializers
-extern void SetCalculateCDLJObcGbsaForces1Sim(gpuContext gpu);
-extern void GetCalculateCDLJObcGbsaForces1Sim(gpuContext gpu);
-extern void SetCalculateCDLJForcesSim(gpuContext gpu);
-extern void GetCalculateCDLJForcesSim(gpuContext gpu);
-extern void SetCalculateCustomBondForcesSim(gpuContext gpu);
-extern void GetCalculateCustomBondForcesSim(gpuContext gpu);
-extern void SetCalculateCustomAngleForcesSim(gpuContext gpu);
-extern void GetCalculateCustomAngleForcesSim(gpuContext gpu);
-extern void SetCalculateCustomTorsionForcesSim(gpuContext gpu);
-extern void GetCalculateCustomTorsionForcesSim(gpuContext gpu);
-extern void SetCalculateCustomExternalForcesSim(gpuContext gpu);
-extern void GetCalculateCustomExternalForcesSim(gpuContext gpu);
-extern void SetCalculateCustomNonbondedForcesSim(gpuContext gpu);
-extern void GetCalculateCustomNonbondedForcesSim(gpuContext gpu);
-extern void SetCalculateLocalForcesSim(gpuContext gpu);
-extern void GetCalculateLocalForcesSim(gpuContext gpu);
-extern void SetCalculateObcGbsaBornSumSim(gpuContext gpu);
-extern void GetCalculateObcGbsaBornSumSim(gpuContext gpu);
-extern void SetCalculateGBVIBornSumSim(gpuContext gpu);
-extern void GetCalculateGBVIBornSumSim(gpuContext gpu);
-extern void OPENMMCUDA_EXPORT SetCalculateObcGbsaForces2Sim(gpuContext gpu);
-extern void GetCalculateObcGbsaForces2Sim(gpuContext gpu);
-extern void SetCalculateGBVIForces2Sim(gpuContext gpu);
-extern void GetCalculateGBVIForces2Sim(gpuContext gpu);
-extern void SetCalculateAndersenThermostatSim(gpuContext gpu);
-extern void GetCalculateAndersenThermostatSim(gpuContext gpu);
-extern void SetCalculatePMESim(gpuContext gpu);
-extern void GetCalculatePMESim(gpuContext gpu);
-extern void OPENMMCUDA_EXPORT SetForcesSim(gpuContext gpu);
-extern void GetForcesSim(gpuContext gpu);
-extern void SetShakeHSim(gpuContext gpu);
-extern void GetShakeHSim(gpuContext gpu);
-extern void SetLangevinUpdateSim(gpuContext gpu);
-extern void GetLangevinUpdateSim(gpuContext gpu);
-extern void SetSettleSim(gpuContext gpu);
-extern void GetSettleSim(gpuContext gpu);
-extern void SetCCMASim(gpuContext gpu);
-extern void GetCCMASim(gpuContext gpu);
-extern void SetVerletUpdateSim(gpuContext gpu);
-extern void GetVerletUpdateSim(gpuContext gpu);
-extern void SetBrownianUpdateSim(gpuContext gpu);
-extern void GetBrownianUpdateSim(gpuContext gpu);
-extern void SetRandomSim(gpuContext gpu);
-extern void GetRandomSim(gpuContext gpu);
-extern void SetCustomBondForceExpression(const Expression<256>& expression);
-extern void SetCustomBondEnergyExpression(const Expression<256>& expression);
-extern void SetCustomBondGlobalParams(const std::vector& paramValues);
-extern void SetCustomAngleForceExpression(const Expression<256>& expression);
-extern void SetCustomAngleEnergyExpression(const Expression<256>& expression);
-extern void SetCustomAngleGlobalParams(const std::vector& paramValues);
-extern void SetCustomTorsionForceExpression(const Expression<256>& expression);
-extern void SetCustomTorsionEnergyExpression(const Expression<256>& expression);
-extern void SetCustomTorsionGlobalParams(const std::vector& paramValues);
-extern void SetCustomExternalForceExpressions(const Expression<256>& expressionX, const Expression<256>& expressionY, const Expression<256>& expressionZ);
-extern void SetCustomExternalEnergyExpression(const Expression<256>& expression);
-extern void SetCustomExternalGlobalParams(const std::vector& paramValues);
-extern void SetCustomNonbondedForceExpression(const Expression<256>& expression);
-extern void SetCustomNonbondedEnergyExpression(const Expression<256>& expression);
-extern void SetCustomNonbondedGlobalParams(const std::vector& paramValues);
-
-extern void kPrintGBVI( gpuContext gpu, std::string callId, int call, FILE* log);
-extern void kPrintObc( gpuContext gpu, std::string callId, int call, FILE* log);
-
diff --git a/platforms/cuda-old/src/kernels/cudatypes.h b/platforms/cuda-old/src/kernels/cudatypes.h
deleted file mode 100755
index 3c65cc61418251ab241ef4782a4354144b0065e4..0000000000000000000000000000000000000000
--- a/platforms/cuda-old/src/kernels/cudatypes.h
+++ /dev/null
@@ -1,512 +0,0 @@
-#ifndef CUDATYPES_H
-#define CUDATYPES_H
-
-/* -------------------------------------------------------------------------- *
- * OpenMM *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from *
- * Simbios, the NIH National Center for Physics-Based Simulation of *
- * Biological Structures at Stanford, funded under the NIH Roadmap for *
- * Medical Research, grant U54 GM072970. See https://simtk.org. *
- * *
- * Portions copyright (c) 2009 Stanford University and the Authors. *
- * Authors: Scott Le Grand, Peter Eastman *
- * Contributors: *
- * *
- * This program is free software: you can redistribute it and/or modify *
- * it under the terms of the GNU Lesser General Public License as published *
- * by the Free Software Foundation, either version 3 of the License, or *
- * (at your option) any later version. *
- * *
- * This program is distributed in the hope that it will be useful, *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
- * GNU Lesser General Public License for more details. *
- * *
- * You should have received a copy of the GNU Lesser General Public License *
- * along with this program. If not, see . *
- * -------------------------------------------------------------------------- */
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include "openmm/OpenMMException.h"
-
-#define RTERROR(status, s) \
- if (status != cudaSuccess) { \
- throw OpenMM::OpenMMException(std::string(s) + " " + cudaGetErrorString(status)); \
- }
-
-#define LAUNCHERROR(s) \
- { \
- cudaError_t status = cudaGetLastError(); \
- if (status != cudaSuccess) { \
- throw OpenMM::OpenMMException(std::string("Error: ") + cudaGetErrorString(status) + " launching kernel " + s); \
- } \
- }
-
-// Pure virtual class to define an interface for objects resident both on GPU and CPU
-struct SoADeviceObject {
- virtual void Allocate() = 0;
- virtual void Deallocate() = 0;
- virtual void Upload() = 0;
- virtual void Download() = 0;
-};
-
-template
-struct CUDAStream : public SoADeviceObject
-{
- unsigned int _length;
- unsigned int _subStreams;
- unsigned int _stride;
- T** _pSysStream;
- T** _pDevStream;
- T* _pSysData;
- T* _pDevData;
- std::string _name;
- CUDAStream(int length, int subStreams = 1, std::string name="");
- CUDAStream(unsigned int length, unsigned int subStreams = 1, std::string name="");
- CUDAStream(unsigned int length, int subStreams = 1, std::string name="");
- CUDAStream(int length, unsigned int subStreams = 1, std::string name="");
- virtual ~CUDAStream();
- void Allocate();
- void Deallocate();
- void Upload();
- void Download();
- void CopyFrom(const CUDAStream