Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
5a06df78
Commit
5a06df78
authored
Mar 04, 2020
by
tic20
Browse files
Merge
https://github.com/openmm/openmm
parents
8dd60914
a9223eea
Changes
335
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1741 additions
and
9193 deletions
+1741
-9193
platforms/cuda/include/CudaPlatform.h
platforms/cuda/include/CudaPlatform.h
+3
-3
platforms/cuda/include/CudaProgram.h
platforms/cuda/include/CudaProgram.h
+61
-0
platforms/cuda/include/CudaSort.h
platforms/cuda/include/CudaSort.h
+2
-2
platforms/cuda/include/windowsExportCuda.h
platforms/cuda/include/windowsExportCuda.h
+0
-41
platforms/cuda/sharedTarget/CMakeLists.txt
platforms/cuda/sharedTarget/CMakeLists.txt
+6
-5
platforms/cuda/src/CudaArray.cpp
platforms/cuda/src/CudaArray.cpp
+10
-5
platforms/cuda/src/CudaBondedUtilities.cpp
platforms/cuda/src/CudaBondedUtilities.cpp
+6
-1
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+43
-669
platforms/cuda/src/CudaEvent.cpp
platforms/cuda/src/CudaEvent.cpp
+50
-0
platforms/cuda/src/CudaIntegrationUtilities.cpp
platforms/cuda/src/CudaIntegrationUtilities.cpp
+42
-692
platforms/cuda/src/CudaKernel.cpp
platforms/cuda/src/CudaKernel.cpp
+80
-0
platforms/cuda/src/CudaKernelFactory.cpp
platforms/cuda/src/CudaKernelFactory.cpp
+33
-28
platforms/cuda/src/CudaKernelSources.h.in
platforms/cuda/src/CudaKernelSources.h.in
+3
-3
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+1292
-7508
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+15
-0
platforms/cuda/src/CudaParallelKernels.cpp
platforms/cuda/src/CudaParallelKernels.cpp
+52
-52
platforms/cuda/src/CudaParameterSet.cpp
platforms/cuda/src/CudaParameterSet.cpp
+4
-166
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+5
-3
platforms/cuda/src/CudaProgram.cpp
platforms/cuda/src/CudaProgram.cpp
+8
-15
platforms/cuda/src/kernels/common.cu
platforms/cuda/src/kernels/common.cu
+26
-0
No files found.
platforms/cuda/include/CudaPlatform.h
View file @
5a06df78
...
@@ -30,7 +30,7 @@
...
@@ -30,7 +30,7 @@
#include "openmm/Platform.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/System.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/ThreadPool.h"
#include "windowsExportC
uda
.h"
#include "
openmm/common/
windowsExportC
ommon
.h"
namespace
OpenMM
{
namespace
OpenMM
{
...
@@ -40,7 +40,7 @@ class CudaContext;
...
@@ -40,7 +40,7 @@ class CudaContext;
* This Platform subclass uses CUDA implementations of the OpenMM kernels.
* This Platform subclass uses CUDA implementations of the OpenMM kernels.
*/
*/
class
OPENMM_EXPORT_C
UDA
CudaPlatform
:
public
Platform
{
class
OPENMM_EXPORT_C
OMMON
CudaPlatform
:
public
Platform
{
public:
public:
class
PlatformData
;
class
PlatformData
;
CudaPlatform
();
CudaPlatform
();
...
@@ -127,7 +127,7 @@ public:
...
@@ -127,7 +127,7 @@ public:
}
}
};
};
class
OPENMM_EXPORT_C
UDA
CudaPlatform
::
PlatformData
{
class
OPENMM_EXPORT_C
OMMON
CudaPlatform
::
PlatformData
{
public:
public:
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
blockingProperty
,
const
std
::
string
&
precisionProperty
,
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
blockingProperty
,
const
std
::
string
&
precisionProperty
,
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
compilerProperty
,
const
std
::
string
&
tempProperty
,
const
std
::
string
&
hostCompilerProperty
,
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
compilerProperty
,
const
std
::
string
&
tempProperty
,
const
std
::
string
&
hostCompilerProperty
,
...
...
platforms/cuda/include/CudaProgram.h
0 → 100644
View file @
5a06df78
#ifndef OPENMM_CUDAPROGRAM_H_
#define OPENMM_CUDAPROGRAM_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/common/ComputeProgram.h"
#include "CudaContext.h"
namespace
OpenMM
{
/**
* This is the CUDA implementation of the ComputeProgramImpl interface.
*/
class
CudaProgram
:
public
ComputeProgramImpl
{
public:
/**
* Create a new CudaProgram.
*
* @param context the context this kernel belongs to
* @param module the compiled module
*/
CudaProgram
(
CudaContext
&
context
,
CUmodule
module
);
/**
* Create a ComputeKernel for one of the kernels in this program.
*
* @param name the name of the kernel to get
*/
ComputeKernel
createKernel
(
const
std
::
string
&
name
);
private:
CudaContext
&
context
;
CUmodule
module
;
};
}
// namespace OpenMM
#endif
/*OPENMM_CUDAPROGRAM_H_*/
platforms/cuda/include/CudaSort.h
View file @
5a06df78
...
@@ -28,7 +28,7 @@
...
@@ -28,7 +28,7 @@
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "CudaArray.h"
#include "CudaArray.h"
#include "windowsExportC
uda
.h"
#include "
openmm/common/
windowsExportC
ommon
.h"
#include "CudaContext.h"
#include "CudaContext.h"
namespace
OpenMM
{
namespace
OpenMM
{
...
@@ -66,7 +66,7 @@ namespace OpenMM {
...
@@ -66,7 +66,7 @@ namespace OpenMM {
* elements).
* elements).
*/
*/
class
OPENMM_EXPORT_C
UDA
CudaSort
{
class
OPENMM_EXPORT_C
OMMON
CudaSort
{
public:
public:
class
SortTrait
;
class
SortTrait
;
/**
/**
...
...
platforms/cuda/include/windowsExportCuda.h
deleted
100644 → 0
View file @
8dd60914
#ifndef OPENMM_WINDOWSEXPORTCUDA_H_
#define OPENMM_WINDOWSEXPORTCUDA_H_
/*
* Shared libraries are messy in Visual Studio. We have to distinguish three
* cases:
* (1) this header is being used to build the OpenMM shared library
* (dllexport)
* (2) this header is being used by a *client* of the OpenMM shared
* library (dllimport)
* (3) we are building the OpenMM static library, or the client is
* being compiled with the expectation of linking with the
* OpenMM static library (nothing special needed)
* In the CMake script for building this library, we define one of the symbols
* OPENMM_CUDA_BUILDING_{SHARED|STATIC}_LIBRARY
* Client code normally has no special symbol defined, in which case we'll
* assume it wants to use the shared library. However, if the client defines
* the symbol OPENMM_USE_STATIC_LIBRARIES we'll suppress the dllimport so
* that the client code can be linked with static libraries. Note that
* the client symbol is not library dependent, while the library symbols
* affect only the OpenMM library, meaning that other libraries can
* be clients of this one. However, we are assuming all-static or all-shared.
*/
#ifdef _MSC_VER
// We don't want to hear about how sprintf is "unsafe".
#pragma warning(disable:4996)
// Keep MS VC++ quiet about lack of dll export of private members.
#pragma warning(disable:4251)
#if defined(OPENMM_CUDA_BUILDING_SHARED_LIBRARY)
#define OPENMM_EXPORT_CUDA __declspec(dllexport)
#elif defined(OPENMM_CUDA_BUILDING_STATIC_LIBRARY) || defined(OPENMM_CUDA_USE_STATIC_LIBRARIES)
#define OPENMM_EXPORT_CUDA
#else
#define OPENMM_EXPORT_CUDA __declspec(dllimport) // i.e., a client of a shared library
#endif
#else
#define OPENMM_EXPORT_CUDA // Linux, Mac
#endif
#endif // OPENMM_WINDOWSEXPORTCUDA_H_
platforms/cuda/sharedTarget/CMakeLists.txt
View file @
5a06df78
...
@@ -4,17 +4,18 @@
...
@@ -4,17 +4,18 @@
INCLUDE
(
FindCUDA
)
INCLUDE
(
FindCUDA
)
INCLUDE_DIRECTORIES
(
${
CUDA_TOOLKIT_INCLUDE
}
)
INCLUDE_DIRECTORIES
(
${
CUDA_TOOLKIT_INCLUDE
}
)
FILE
(
GLOB CUDA_KERNELS
${
CUDA
_SOURCE_DIR
}
/kernels/*.cu
)
FILE
(
GLOB CUDA_KERNELS
${
KERNEL
_SOURCE_DIR
}
/kernels/*.cu
)
ADD_CUSTOM_COMMAND
(
OUTPUT
${
CUDA_
KERNELS_CPP
}
${
CUDA_
KERNELS_H
}
ADD_CUSTOM_COMMAND
(
OUTPUT
${
KERNELS_CPP
}
${
KERNELS_H
}
COMMAND
${
CMAKE_COMMAND
}
COMMAND
${
CMAKE_COMMAND
}
ARGS -D
CUDA
_SOURCE_DIR=
${
CUDA
_SOURCE_DIR
}
-D
CUDA_
KERNELS_CPP=
${
CUDA_
KERNELS_CPP
}
-D
CUDA_
KERNELS_H=
${
CUDA_
KERNELS_H
}
-D
CUDA
_SOURCE_CLASS=
${
CUDA
_SOURCE_CLASS
}
-P
${
CMAKE_
CURRENT_
SOURCE_DIR
}
/
..
/Encode
CUDA
Files.cmake
ARGS -D
KERNEL
_SOURCE_DIR=
${
KERNEL
_SOURCE_DIR
}
-D KERNELS_CPP=
${
KERNELS_CPP
}
-D KERNELS_H=
${
KERNELS_H
}
-D
KERNEL
_SOURCE_CLASS=
${
KERNEL
_SOURCE_CLASS
}
-D KERNEL_FILE_EXTENSION=cu
-P
${
CMAKE_SOURCE_DIR
}
/
cmake_modules
/Encode
Kernel
Files.cmake
DEPENDS
${
CUDA_KERNELS
}
DEPENDS
${
CUDA_KERNELS
}
)
)
SET_SOURCE_FILES_PROPERTIES
(
${
CUDA_
KERNELS_CPP
}
${
CUDA
_KERNELS_
H
}
PROPERTIES GENERATED TRUE
)
SET_SOURCE_FILES_PROPERTIES
(
${
KERNELS_CPP
}
${
KERNELS_H
}
${
COMMON
_KERNELS_
CPP
}
PROPERTIES GENERATED TRUE
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
ADD_DEPENDENCIES
(
${
SHARED_TARGET
}
CommonKernels
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
OPENMM_LIBRARY_NAME
}
${
CUDA_CUDA_LIBRARY
}
${
CUDA_cufft_LIBRARY
}
${
PTHREADS_LIB
}
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
OPENMM_LIBRARY_NAME
}
${
CUDA_CUDA_LIBRARY
}
${
CUDA_cufft_LIBRARY
}
${
PTHREADS_LIB
}
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_C
UDA
_BUILDING_SHARED_LIBRARY"
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_C
OMMON
_BUILDING_SHARED_LIBRARY"
)
IF
(
APPLE
)
IF
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-F/Library/Frameworks -framework CUDA"
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-F/Library/Frameworks -framework CUDA"
)
ELSE
(
APPLE
)
ELSE
(
APPLE
)
...
...
platforms/cuda/src/CudaArray.cpp
View file @
5a06df78
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2012-201
8
Stanford University and the Authors. *
* Portions copyright (c) 2012-201
9
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -51,10 +51,10 @@ CudaArray::~CudaArray() {
...
@@ -51,10 +51,10 @@ CudaArray::~CudaArray() {
}
}
}
}
void
CudaArray
::
initialize
(
C
uda
Context
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
)
{
void
CudaArray
::
initialize
(
C
ompute
Context
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
)
{
if
(
this
->
pointer
!=
0
)
if
(
this
->
pointer
!=
0
)
throw
OpenMMException
(
"CudaArray has already been initialized"
);
throw
OpenMMException
(
"CudaArray has already been initialized"
);
this
->
context
=
&
context
;
this
->
context
=
&
dynamic_cast
<
CudaContext
&>
(
context
)
;
this
->
size
=
size
;
this
->
size
=
size
;
this
->
elementSize
=
elementSize
;
this
->
elementSize
=
elementSize
;
this
->
name
=
name
;
this
->
name
=
name
;
...
@@ -82,6 +82,10 @@ void CudaArray::resize(int size) {
...
@@ -82,6 +82,10 @@ void CudaArray::resize(int size) {
initialize
(
*
context
,
size
,
elementSize
,
name
);
initialize
(
*
context
,
size
,
elementSize
,
name
);
}
}
ComputeContext
&
CudaArray
::
getContext
()
{
return
*
context
;
}
void
CudaArray
::
upload
(
const
void
*
data
,
bool
blocking
)
{
void
CudaArray
::
upload
(
const
void
*
data
,
bool
blocking
)
{
if
(
pointer
==
0
)
if
(
pointer
==
0
)
throw
OpenMMException
(
"CudaArray has not been initialized"
);
throw
OpenMMException
(
"CudaArray has not been initialized"
);
...
@@ -112,12 +116,13 @@ void CudaArray::download(void* data, bool blocking) const {
...
@@ -112,12 +116,13 @@ void CudaArray::download(void* data, bool blocking) const {
}
}
}
}
void
CudaArray
::
copyTo
(
Cuda
Array
&
dest
)
const
{
void
CudaArray
::
copyTo
(
Array
Interface
&
dest
)
const
{
if
(
pointer
==
0
)
if
(
pointer
==
0
)
throw
OpenMMException
(
"CudaArray has not been initialized"
);
throw
OpenMMException
(
"CudaArray has not been initialized"
);
if
(
dest
.
getSize
()
!=
size
||
dest
.
getElementSize
()
!=
elementSize
)
if
(
dest
.
getSize
()
!=
size
||
dest
.
getElementSize
()
!=
elementSize
)
throw
OpenMMException
(
"Error copying array "
+
name
+
" to "
+
dest
.
getName
()
+
": The destination array does not match the size of the array"
);
throw
OpenMMException
(
"Error copying array "
+
name
+
" to "
+
dest
.
getName
()
+
": The destination array does not match the size of the array"
);
CUresult
result
=
cuMemcpyDtoDAsync
(
dest
.
getDevicePointer
(),
pointer
,
size
*
elementSize
,
context
->
getCurrentStream
());
CudaArray
&
cuDest
=
context
->
unwrap
(
dest
);
CUresult
result
=
cuMemcpyDtoDAsync
(
cuDest
.
getDevicePointer
(),
pointer
,
size
*
elementSize
,
context
->
getCurrentStream
());
if
(
result
!=
CUDA_SUCCESS
)
{
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
std
::
stringstream
str
;
str
<<
"Error copying array "
<<
name
<<
" to "
<<
dest
.
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
str
<<
"Error copying array "
<<
name
<<
" to "
<<
dest
.
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
...
...
platforms/cuda/src/CudaBondedUtilities.cpp
View file @
5a06df78
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
9
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "CudaBondedUtilities.h"
#include "CudaBondedUtilities.h"
#include "CudaContext.h"
#include "CudaExpressionUtilities.h"
#include "CudaExpressionUtilities.h"
#include "CudaKernelSources.h"
#include "CudaKernelSources.h"
#include "openmm/OpenMMException.h"
#include "openmm/OpenMMException.h"
...
@@ -52,6 +53,10 @@ string CudaBondedUtilities::addArgument(CUdeviceptr data, const string& type) {
...
@@ -52,6 +53,10 @@ string CudaBondedUtilities::addArgument(CUdeviceptr data, const string& type) {
return
"customArg"
+
context
.
intToString
(
arguments
.
size
());
return
"customArg"
+
context
.
intToString
(
arguments
.
size
());
}
}
string
CudaBondedUtilities
::
addArgument
(
ArrayInterface
&
data
,
const
string
&
type
)
{
return
addArgument
(
context
.
unwrap
(
data
).
getDevicePointer
(),
type
);
}
string
CudaBondedUtilities
::
addEnergyParameterDerivative
(
const
string
&
param
)
{
string
CudaBondedUtilities
::
addEnergyParameterDerivative
(
const
string
&
param
)
{
// See if the parameter has already been added.
// See if the parameter has already been added.
...
...
platforms/cuda/src/CudaContext.cpp
View file @
5a06df78
...
@@ -31,14 +31,14 @@
...
@@ -31,14 +31,14 @@
#include "CudaContext.h"
#include "CudaContext.h"
#include "CudaArray.h"
#include "CudaArray.h"
#include "CudaBondedUtilities.h"
#include "CudaBondedUtilities.h"
#include "Cuda
ForceInfo
.h"
#include "Cuda
Event
.h"
#include "CudaIntegrationUtilities.h"
#include "CudaIntegrationUtilities.h"
#include "CudaKernels.h"
#include "CudaKernels.h"
#include "CudaKernelSources.h"
#include "CudaKernelSources.h"
#include "CudaNonbondedUtilities.h"
#include "CudaNonbondedUtilities.h"
#include "CudaProgram.h"
#include "openmm/common/ComputeArray.h"
#include "SHA1.h"
#include "SHA1.h"
#include "hilbert.h"
#include "openmm/OpenMMException.h"
#include "openmm/Platform.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/System.h"
#include "openmm/VirtualSite.h"
#include "openmm/VirtualSite.h"
...
@@ -106,9 +106,9 @@ static int executeInWindows(const string &command) {
...
@@ -106,9 +106,9 @@ static int executeInWindows(const string &command) {
#endif
#endif
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
,
CudaContext
*
originalContext
)
:
system
(
system
),
currentStream
(
0
),
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
,
CudaContext
*
originalContext
)
:
ComputeContext
(
system
),
currentStream
(
0
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordere
d
(
false
),
hasAssignedPosqCharges
(
false
),
platformData
(
platformData
),
contextIsVali
d
(
false
),
hasAssignedPosqCharges
(
false
),
hasCompilerKernel
(
false
),
isNvccAvailable
(
false
),
pinnedBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
)
,
thread
(
NULL
)
{
hasCompilerKernel
(
false
),
isNvccAvailable
(
false
),
pinnedBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
)
{
// Determine what compiler to use.
// Determine what compiler to use.
this
->
compiler
=
"
\"
"
+
compiler
+
"
\"
"
;
this
->
compiler
=
"
\"
"
+
compiler
+
"
\"
"
;
...
@@ -218,7 +218,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -218,7 +218,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
}
}
int
major
,
minor
;
int
major
,
minor
;
CHECK_RESULT
(
cuDeviceComputeCapability
(
&
major
,
&
minor
,
device
));
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
major
,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR
,
device
));
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
minor
,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR
,
device
));
int
numThreadBlocksPerComputeUnit
=
(
major
==
6
?
4
:
6
);
int
numThreadBlocksPerComputeUnit
=
(
major
==
6
?
4
:
6
);
if
(
cudaDriverVersion
<
7000
)
{
if
(
cudaDriverVersion
<
7000
)
{
// This is a workaround to support GTX 980 with CUDA 6.5. It reports
// This is a workaround to support GTX 980 with CUDA 6.5. It reports
...
@@ -257,7 +258,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -257,7 +258,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
int
multiprocessors
;
int
multiprocessors
;
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
device
));
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
device
));
numThreadBlocks
=
numThreadBlocksPerComputeUnit
*
multiprocessors
;
numThreadBlocks
=
numThreadBlocksPerComputeUnit
*
multiprocessors
;
if
(
c
omputeCapability
>=
7.
0
)
{
if
(
c
udaDriverVersion
>=
900
0
)
{
compilationDefines
[
"SYNC_WARPS"
]
=
"__syncwarp();"
;
compilationDefines
[
"SYNC_WARPS"
]
=
"__syncwarp();"
;
compilationDefines
[
"SHFL(var, srcLane)"
]
=
"__shfl_sync(0xffffffff, var, srcLane);"
;
compilationDefines
[
"SHFL(var, srcLane)"
]
=
"__shfl_sync(0xffffffff, var, srcLane);"
;
compilationDefines
[
"BALLOT(var)"
]
=
"__ballot_sync(0xffffffff, var);"
;
compilationDefines
[
"BALLOT(var)"
]
=
"__ballot_sync(0xffffffff, var);"
;
...
@@ -300,7 +301,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -300,7 +301,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines
[
"make_mixed3"
]
=
"make_float3"
;
compilationDefines
[
"make_mixed3"
]
=
"make_float3"
;
compilationDefines
[
"make_mixed4"
]
=
"make_float4"
;
compilationDefines
[
"make_mixed4"
]
=
"make_float4"
;
}
}
posCellOffsets
.
resize
(
paddedNumAtoms
,
make_int4
(
0
,
0
,
0
,
0
));
force
.
initialize
<
long
long
>
(
*
this
,
paddedNumAtoms
*
3
,
"force"
);
posCellOffsets
.
resize
(
paddedNumAtoms
,
mm_int4
(
0
,
0
,
0
,
0
));
atomIndexDevice
.
initialize
<
int
>
(
*
this
,
paddedNumAtoms
,
"atomIndex"
);
atomIndexDevice
.
initialize
<
int
>
(
*
this
,
paddedNumAtoms
,
"atomIndex"
);
atomIndex
.
resize
(
paddedNumAtoms
);
atomIndex
.
resize
(
paddedNumAtoms
);
for
(
int
i
=
0
;
i
<
paddedNumAtoms
;
++
i
)
for
(
int
i
=
0
;
i
<
paddedNumAtoms
;
++
i
)
...
@@ -396,10 +398,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -396,10 +398,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
"pos.z -= floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;}"
;
"pos.z -= floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;}"
;
}
}
// Create the work thread used for parallelization when running on multiple devices.
thread
=
new
WorkThread
();
// Create utilities objects.
// Create utilities objects.
bonded
=
new
CudaBondedUtilities
(
*
this
);
bonded
=
new
CudaBondedUtilities
(
*
this
);
...
@@ -428,8 +426,6 @@ CudaContext::~CudaContext() {
...
@@ -428,8 +426,6 @@ CudaContext::~CudaContext() {
delete
bonded
;
delete
bonded
;
if
(
nonbonded
!=
NULL
)
if
(
nonbonded
!=
NULL
)
delete
nonbonded
;
delete
nonbonded
;
if
(
thread
!=
NULL
)
delete
thread
;
string
errorMessage
=
"Error deleting Context"
;
string
errorMessage
=
"Error deleting Context"
;
if
(
contextIsValid
&&
!
isLinkedContext
)
{
if
(
contextIsValid
&&
!
isLinkedContext
)
{
cuProfilerStop
();
cuProfilerStop
();
...
@@ -469,7 +465,6 @@ void CudaContext::initialize() {
...
@@ -469,7 +465,6 @@ void CudaContext::initialize() {
}
}
velm
.
upload
(
pinnedBuffer
);
velm
.
upload
(
pinnedBuffer
);
bonded
->
initialize
(
system
);
bonded
->
initialize
(
system
);
force
.
initialize
<
long
long
>
(
*
this
,
paddedNumAtoms
*
3
,
"force"
);
addAutoclearBuffer
(
force
.
getDevicePointer
(),
force
.
getSize
()
*
force
.
getElementSize
());
addAutoclearBuffer
(
force
.
getDevicePointer
(),
force
.
getSize
()
*
force
.
getElementSize
());
addAutoclearBuffer
(
energyBuffer
.
getDevicePointer
(),
energyBuffer
.
getSize
()
*
energyBuffer
.
getElementSize
());
addAutoclearBuffer
(
energyBuffer
.
getDevicePointer
(),
energyBuffer
.
getSize
()
*
energyBuffer
.
getElementSize
());
int
numEnergyParamDerivs
=
energyParamDerivNames
.
size
();
int
numEnergyParamDerivs
=
energyParamDerivNames
.
size
();
...
@@ -484,12 +479,8 @@ void CudaContext::initialize() {
...
@@ -484,12 +479,8 @@ void CudaContext::initialize() {
nonbonded
->
initialize
(
system
);
nonbonded
->
initialize
(
system
);
}
}
void
CudaContext
::
addForce
(
CudaForceInfo
*
force
)
{
void
CudaContext
::
initializeContexts
()
{
forces
.
push_back
(
force
);
getPlatformData
().
initializeContexts
(
system
);
}
vector
<
CudaForceInfo
*>&
CudaContext
::
getForceInfos
()
{
return
forces
;
}
}
void
CudaContext
::
setAsCurrent
()
{
void
CudaContext
::
setAsCurrent
()
{
...
@@ -497,38 +488,6 @@ void CudaContext::setAsCurrent() {
...
@@ -497,38 +488,6 @@ void CudaContext::setAsCurrent() {
cuCtxSetCurrent
(
context
);
cuCtxSetCurrent
(
context
);
}
}
string
CudaContext
::
replaceStrings
(
const
string
&
input
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
replacements
)
const
{
static
set
<
char
>
symbolChars
;
if
(
symbolChars
.
size
()
==
0
)
{
symbolChars
.
insert
(
'_'
);
for
(
char
c
=
'a'
;
c
<=
'z'
;
c
++
)
symbolChars
.
insert
(
c
);
for
(
char
c
=
'A'
;
c
<=
'Z'
;
c
++
)
symbolChars
.
insert
(
c
);
for
(
char
c
=
'0'
;
c
<=
'9'
;
c
++
)
symbolChars
.
insert
(
c
);
}
string
result
=
input
;
for
(
auto
&
pair
:
replacements
)
{
int
index
=
0
;
int
size
=
pair
.
first
.
size
();
do
{
index
=
result
.
find
(
pair
.
first
,
index
);
if
(
index
!=
result
.
npos
)
{
if
((
index
==
0
||
symbolChars
.
find
(
result
[
index
-
1
])
==
symbolChars
.
end
())
&&
(
index
==
result
.
size
()
-
size
||
symbolChars
.
find
(
result
[
index
+
size
])
==
symbolChars
.
end
()))
{
// We have found a complete symbol, not part of a longer symbol.
result
.
replace
(
index
,
size
,
pair
.
second
);
index
+=
pair
.
second
.
size
();
}
else
index
++
;
}
}
while
(
index
!=
result
.
npos
);
}
return
result
;
}
CUmodule
CudaContext
::
createModule
(
const
string
source
,
const
char
*
optimizationFlags
)
{
CUmodule
CudaContext
::
createModule
(
const
string
source
,
const
char
*
optimizationFlags
)
{
return
createModule
(
source
,
map
<
string
,
string
>
(),
optimizationFlags
);
return
createModule
(
source
,
map
<
string
,
string
>
(),
optimizationFlags
);
}
}
...
@@ -572,6 +531,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
...
@@ -572,6 +531,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
src
<<
"typedef float4 mixed4;
\n
"
;
src
<<
"typedef float4 mixed4;
\n
"
;
}
}
src
<<
"typedef unsigned int tileflags;
\n
"
;
src
<<
"typedef unsigned int tileflags;
\n
"
;
src
<<
CudaKernelSources
::
common
<<
endl
;
for
(
auto
&
pair
:
defines
)
{
for
(
auto
&
pair
:
defines
)
{
src
<<
"#define "
<<
pair
.
first
;
src
<<
"#define "
<<
pair
.
first
;
if
(
!
pair
.
second
.
empty
())
if
(
!
pair
.
second
.
empty
())
...
@@ -716,19 +676,29 @@ void CudaContext::restoreDefaultStream() {
...
@@ -716,19 +676,29 @@ void CudaContext::restoreDefaultStream() {
setCurrentStream
(
0
);
setCurrentStream
(
0
);
}
}
string
CudaContext
::
doubleToString
(
double
value
)
const
{
CudaArray
*
CudaContext
::
createArray
()
{
stringstream
s
;
return
new
CudaArray
();
s
.
precision
(
useDoublePrecision
?
16
:
8
);
s
<<
scientific
<<
value
;
if
(
!
useDoublePrecision
)
s
<<
"f"
;
return
s
.
str
();
}
}
string
CudaContext
::
intToString
(
int
value
)
const
{
ComputeEvent
CudaContext
::
createEvent
()
{
stringstream
s
;
return
shared_ptr
<
ComputeEventImpl
>
(
new
CudaEvent
(
*
this
));
s
<<
value
;
}
return
s
.
str
();
ComputeProgram
CudaContext
::
compileProgram
(
const
std
::
string
source
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
defines
)
{
CUmodule
module
=
createModule
(
CudaKernelSources
::
vectorOps
+
source
,
defines
);
return
shared_ptr
<
ComputeProgramImpl
>
(
new
CudaProgram
(
*
this
,
module
));
}
CudaArray
&
CudaContext
::
unwrap
(
ArrayInterface
&
array
)
const
{
CudaArray
*
cuarray
;
ComputeArray
*
wrapper
=
dynamic_cast
<
ComputeArray
*>
(
&
array
);
if
(
wrapper
!=
NULL
)
cuarray
=
dynamic_cast
<
CudaArray
*>
(
&
wrapper
->
getArray
());
else
cuarray
=
dynamic_cast
<
CudaArray
*>
(
&
array
);
if
(
cuarray
==
NULL
)
throw
OpenMMException
(
"Array argument is not an CudaArray"
);
return
*
cuarray
;
}
}
std
::
string
CudaContext
::
getErrorString
(
CUresult
result
)
{
std
::
string
CudaContext
::
getErrorString
(
CUresult
result
)
{
...
@@ -763,8 +733,8 @@ int CudaContext::computeThreadBlockSize(double memory, bool preferShared) const
...
@@ -763,8 +733,8 @@ int CudaContext::computeThreadBlockSize(double memory, bool preferShared) const
return
threads
;
return
threads
;
}
}
void
CudaContext
::
clearBuffer
(
Cuda
Array
&
array
)
{
void
CudaContext
::
clearBuffer
(
Array
Interface
&
array
)
{
clearBuffer
(
array
.
getDevicePointer
(),
array
.
getSize
()
*
array
.
getElementSize
());
clearBuffer
(
unwrap
(
array
)
.
getDevicePointer
(),
array
.
getSize
()
*
array
.
getElementSize
());
}
}
void
CudaContext
::
clearBuffer
(
CUdeviceptr
memory
,
int
size
)
{
void
CudaContext
::
clearBuffer
(
CUdeviceptr
memory
,
int
size
)
{
...
@@ -773,8 +743,8 @@ void CudaContext::clearBuffer(CUdeviceptr memory, int size) {
...
@@ -773,8 +743,8 @@ void CudaContext::clearBuffer(CUdeviceptr memory, int size) {
executeKernel
(
clearBufferKernel
,
args
,
words
,
128
);
executeKernel
(
clearBufferKernel
,
args
,
words
,
128
);
}
}
void
CudaContext
::
addAutoclearBuffer
(
Cuda
Array
&
array
)
{
void
CudaContext
::
addAutoclearBuffer
(
Array
Interface
&
array
)
{
addAutoclearBuffer
(
array
.
getDevicePointer
(),
array
.
getSize
()
*
array
.
getElementSize
());
addAutoclearBuffer
(
unwrap
(
array
)
.
getDevicePointer
(),
array
.
getSize
()
*
array
.
getElementSize
());
}
}
void
CudaContext
::
addAutoclearBuffer
(
CUdeviceptr
memory
,
int
size
)
{
void
CudaContext
::
addAutoclearBuffer
(
CUdeviceptr
memory
,
int
size
)
{
...
@@ -855,523 +825,6 @@ bool CudaContext::requestPosqCharges() {
...
@@ -855,523 +825,6 @@ bool CudaContext::requestPosqCharges() {
return
allow
;
return
allow
;
}
}
/**
* This class ensures that atom reordering doesn't break virtual sites.
*/
class
CudaContext
::
VirtualSiteInfo
:
public
CudaForceInfo
{
public:
VirtualSiteInfo
(
const
System
&
system
)
{
for
(
int
i
=
0
;
i
<
system
.
getNumParticles
();
i
++
)
{
if
(
system
.
isVirtualSite
(
i
))
{
const
VirtualSite
&
vsite
=
system
.
getVirtualSite
(
i
);
siteTypes
.
push_back
(
&
typeid
(
vsite
));
vector
<
int
>
particles
;
particles
.
push_back
(
i
);
for
(
int
j
=
0
;
j
<
vsite
.
getNumParticles
();
j
++
)
particles
.
push_back
(
vsite
.
getParticle
(
j
));
siteParticles
.
push_back
(
particles
);
vector
<
double
>
weights
;
if
(
dynamic_cast
<
const
TwoParticleAverageSite
*>
(
&
vsite
)
!=
NULL
)
{
// A two particle average.
const
TwoParticleAverageSite
&
site
=
dynamic_cast
<
const
TwoParticleAverageSite
&>
(
vsite
);
weights
.
push_back
(
site
.
getWeight
(
0
));
weights
.
push_back
(
site
.
getWeight
(
1
));
}
else
if
(
dynamic_cast
<
const
ThreeParticleAverageSite
*>
(
&
vsite
)
!=
NULL
)
{
// A three particle average.
const
ThreeParticleAverageSite
&
site
=
dynamic_cast
<
const
ThreeParticleAverageSite
&>
(
vsite
);
weights
.
push_back
(
site
.
getWeight
(
0
));
weights
.
push_back
(
site
.
getWeight
(
1
));
weights
.
push_back
(
site
.
getWeight
(
2
));
}
else
if
(
dynamic_cast
<
const
OutOfPlaneSite
*>
(
&
vsite
)
!=
NULL
)
{
// An out of plane site.
const
OutOfPlaneSite
&
site
=
dynamic_cast
<
const
OutOfPlaneSite
&>
(
vsite
);
weights
.
push_back
(
site
.
getWeight12
());
weights
.
push_back
(
site
.
getWeight13
());
weights
.
push_back
(
site
.
getWeightCross
());
}
siteWeights
.
push_back
(
weights
);
}
}
}
int
getNumParticleGroups
()
{
return
siteTypes
.
size
();
}
void
getParticlesInGroup
(
int
index
,
std
::
vector
<
int
>&
particles
)
{
particles
=
siteParticles
[
index
];
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
if
(
siteTypes
[
group1
]
!=
siteTypes
[
group2
])
return
false
;
int
numParticles
=
siteWeights
[
group1
].
size
();
if
(
siteWeights
[
group2
].
size
()
!=
numParticles
)
return
false
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
if
(
siteWeights
[
group1
][
i
]
!=
siteWeights
[
group2
][
i
])
return
false
;
return
true
;
}
private:
vector
<
const
type_info
*>
siteTypes
;
vector
<
vector
<
int
>
>
siteParticles
;
vector
<
vector
<
double
>
>
siteWeights
;
};
void
CudaContext
::
findMoleculeGroups
()
{
// The first time this is called, we need to identify all the molecules in the system.
if
(
moleculeGroups
.
size
()
==
0
)
{
// Add a ForceInfo that makes sure reordering doesn't break virtual sites.
addForce
(
new
VirtualSiteInfo
(
system
));
// First make a list of every other atom to which each atom is connect by a constraint or force group.
vector
<
vector
<
int
>
>
atomBonds
(
system
.
getNumParticles
());
for
(
int
i
=
0
;
i
<
system
.
getNumConstraints
();
i
++
)
{
int
particle1
,
particle2
;
double
distance
;
system
.
getConstraintParameters
(
i
,
particle1
,
particle2
,
distance
);
atomBonds
[
particle1
].
push_back
(
particle2
);
atomBonds
[
particle2
].
push_back
(
particle1
);
}
for
(
auto
force
:
forces
)
{
for
(
int
j
=
0
;
j
<
force
->
getNumParticleGroups
();
j
++
)
{
vector
<
int
>
particles
;
force
->
getParticlesInGroup
(
j
,
particles
);
for
(
int
k
=
0
;
k
<
(
int
)
particles
.
size
();
k
++
)
for
(
int
m
=
0
;
m
<
(
int
)
particles
.
size
();
m
++
)
if
(
k
!=
m
)
atomBonds
[
particles
[
k
]].
push_back
(
particles
[
m
]);
}
}
// Now identify atoms by which molecule they belong to.
vector
<
vector
<
int
>
>
atomIndices
=
ContextImpl
::
findMolecules
(
numAtoms
,
atomBonds
);
int
numMolecules
=
atomIndices
.
size
();
vector
<
int
>
atomMolecule
(
numAtoms
);
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
(
int
)
atomIndices
[
i
].
size
();
j
++
)
atomMolecule
[
atomIndices
[
i
][
j
]]
=
i
;
// Construct a description of each molecule.
molecules
.
resize
(
numMolecules
);
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
molecules
[
i
].
atoms
=
atomIndices
[
i
];
molecules
[
i
].
groups
.
resize
(
forces
.
size
());
}
for
(
int
i
=
0
;
i
<
system
.
getNumConstraints
();
i
++
)
{
int
particle1
,
particle2
;
double
distance
;
system
.
getConstraintParameters
(
i
,
particle1
,
particle2
,
distance
);
molecules
[
atomMolecule
[
particle1
]].
constraints
.
push_back
(
i
);
}
for
(
int
i
=
0
;
i
<
(
int
)
forces
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
forces
[
i
]
->
getNumParticleGroups
();
j
++
)
{
vector
<
int
>
particles
;
forces
[
i
]
->
getParticlesInGroup
(
j
,
particles
);
if
(
particles
.
size
()
>
0
)
molecules
[
atomMolecule
[
particles
[
0
]]].
groups
[
i
].
push_back
(
j
);
}
}
// Sort them into groups of identical molecules.
vector
<
Molecule
>
uniqueMolecules
;
vector
<
vector
<
int
>
>
moleculeInstances
;
vector
<
vector
<
int
>
>
moleculeOffsets
;
for
(
int
molIndex
=
0
;
molIndex
<
(
int
)
molecules
.
size
();
molIndex
++
)
{
Molecule
&
mol
=
molecules
[
molIndex
];
// See if it is identical to another molecule.
bool
isNew
=
true
;
for
(
int
j
=
0
;
j
<
(
int
)
uniqueMolecules
.
size
()
&&
isNew
;
j
++
)
{
Molecule
&
mol2
=
uniqueMolecules
[
j
];
bool
identical
=
(
mol
.
atoms
.
size
()
==
mol2
.
atoms
.
size
()
&&
mol
.
constraints
.
size
()
==
mol2
.
constraints
.
size
());
// See if the atoms are identical.
int
atomOffset
=
mol2
.
atoms
[
0
]
-
mol
.
atoms
[
0
];
for
(
int
i
=
0
;
i
<
(
int
)
mol
.
atoms
.
size
()
&&
identical
;
i
++
)
{
if
(
mol
.
atoms
[
i
]
!=
mol2
.
atoms
[
i
]
-
atomOffset
||
system
.
getParticleMass
(
mol
.
atoms
[
i
])
!=
system
.
getParticleMass
(
mol2
.
atoms
[
i
]))
identical
=
false
;
for
(
int
k
=
0
;
k
<
(
int
)
forces
.
size
();
k
++
)
if
(
!
forces
[
k
]
->
areParticlesIdentical
(
mol
.
atoms
[
i
],
mol2
.
atoms
[
i
]))
identical
=
false
;
}
// See if the constraints are identical.
for
(
int
i
=
0
;
i
<
(
int
)
mol
.
constraints
.
size
()
&&
identical
;
i
++
)
{
int
c1particle1
,
c1particle2
,
c2particle1
,
c2particle2
;
double
distance1
,
distance2
;
system
.
getConstraintParameters
(
mol
.
constraints
[
i
],
c1particle1
,
c1particle2
,
distance1
);
system
.
getConstraintParameters
(
mol2
.
constraints
[
i
],
c2particle1
,
c2particle2
,
distance2
);
if
(
c1particle1
!=
c2particle1
-
atomOffset
||
c1particle2
!=
c2particle2
-
atomOffset
||
distance1
!=
distance2
)
identical
=
false
;
}
// See if the force groups are identical.
for
(
int
i
=
0
;
i
<
(
int
)
forces
.
size
()
&&
identical
;
i
++
)
{
if
(
mol
.
groups
[
i
].
size
()
!=
mol2
.
groups
[
i
].
size
())
identical
=
false
;
for
(
int
k
=
0
;
k
<
(
int
)
mol
.
groups
[
i
].
size
()
&&
identical
;
k
++
)
{
if
(
!
forces
[
i
]
->
areGroupsIdentical
(
mol
.
groups
[
i
][
k
],
mol2
.
groups
[
i
][
k
]))
identical
=
false
;
vector
<
int
>
p1
,
p2
;
forces
[
i
]
->
getParticlesInGroup
(
mol
.
groups
[
i
][
k
],
p1
);
forces
[
i
]
->
getParticlesInGroup
(
mol2
.
groups
[
i
][
k
],
p2
);
for
(
int
m
=
0
;
m
<
p1
.
size
();
m
++
)
if
(
p1
[
m
]
!=
p2
[
m
]
-
atomOffset
)
identical
=
false
;
}
}
if
(
identical
)
{
moleculeInstances
[
j
].
push_back
(
molIndex
);
moleculeOffsets
[
j
].
push_back
(
mol
.
atoms
[
0
]);
isNew
=
false
;
}
}
if
(
isNew
)
{
uniqueMolecules
.
push_back
(
mol
);
moleculeInstances
.
push_back
(
vector
<
int
>
());
moleculeInstances
[
moleculeInstances
.
size
()
-
1
].
push_back
(
molIndex
);
moleculeOffsets
.
push_back
(
vector
<
int
>
());
moleculeOffsets
[
moleculeOffsets
.
size
()
-
1
].
push_back
(
mol
.
atoms
[
0
]);
}
}
moleculeGroups
.
resize
(
moleculeInstances
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
moleculeInstances
.
size
();
i
++
)
{
moleculeGroups
[
i
].
instances
=
moleculeInstances
[
i
];
moleculeGroups
[
i
].
offsets
=
moleculeOffsets
[
i
];
vector
<
int
>&
atoms
=
uniqueMolecules
[
i
].
atoms
;
moleculeGroups
[
i
].
atoms
.
resize
(
atoms
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
moleculeGroups
[
i
].
atoms
[
j
]
=
atoms
[
j
]
-
atoms
[
0
];
}
}
void
CudaContext
::
invalidateMolecules
()
{
for
(
int
i
=
0
;
i
<
forces
.
size
();
i
++
)
if
(
invalidateMolecules
(
forces
[
i
]))
return
;
}
bool
CudaContext
::
invalidateMolecules
(
CudaForceInfo
*
force
)
{
if
(
numAtoms
==
0
||
nonbonded
==
NULL
||
!
nonbonded
->
getUseCutoff
())
return
false
;
bool
valid
=
true
;
int
forceIndex
=
-
1
;
for
(
int
i
=
0
;
i
<
forces
.
size
();
i
++
)
if
(
forces
[
i
]
==
force
)
forceIndex
=
i
;
getPlatformData
().
threads
.
execute
([
&
]
(
ThreadPool
&
threads
,
int
threadIndex
)
{
for
(
int
group
=
0
;
valid
&&
group
<
(
int
)
moleculeGroups
.
size
();
group
++
)
{
MoleculeGroup
&
mol
=
moleculeGroups
[
group
];
vector
<
int
>&
instances
=
mol
.
instances
;
vector
<
int
>&
offsets
=
mol
.
offsets
;
vector
<
int
>&
atoms
=
mol
.
atoms
;
int
numMolecules
=
instances
.
size
();
Molecule
&
m1
=
molecules
[
instances
[
0
]];
int
offset1
=
offsets
[
0
];
int
numThreads
=
threads
.
getNumThreads
();
int
start
=
max
(
1
,
threadIndex
*
numMolecules
/
numThreads
);
int
end
=
(
threadIndex
+
1
)
*
numMolecules
/
numThreads
;
for
(
int
j
=
start
;
j
<
end
;
j
++
)
{
// See if the atoms are identical.
Molecule
&
m2
=
molecules
[
instances
[
j
]];
int
offset2
=
offsets
[
j
];
for
(
int
i
=
0
;
i
<
(
int
)
atoms
.
size
()
&&
valid
;
i
++
)
{
if
(
!
force
->
areParticlesIdentical
(
atoms
[
i
]
+
offset1
,
atoms
[
i
]
+
offset2
))
valid
=
false
;
}
// See if the force groups are identical.
if
(
valid
&&
forceIndex
>
-
1
)
{
for
(
int
k
=
0
;
k
<
(
int
)
m1
.
groups
[
forceIndex
].
size
()
&&
valid
;
k
++
)
if
(
!
force
->
areGroupsIdentical
(
m1
.
groups
[
forceIndex
][
k
],
m2
.
groups
[
forceIndex
][
k
]))
valid
=
false
;
}
}
}
});
getPlatformData
().
threads
.
waitForThreads
();
if
(
valid
)
return
false
;
// The list of which molecules are identical is no longer valid. We need to restore the
// atoms to their original order, rebuild the list of identical molecules, and sort them
// again.
vector
<
int4
>
newCellOffsets
(
numAtoms
);
if
(
useDoublePrecision
)
{
vector
<
double4
>
oldPosq
(
paddedNumAtoms
);
vector
<
double4
>
newPosq
(
paddedNumAtoms
,
make_double4
(
0
,
0
,
0
,
0
));
vector
<
double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
double4
>
newVelm
(
paddedNumAtoms
,
make_double4
(
0
,
0
,
0
,
0
));
posq
.
download
(
oldPosq
);
velm
.
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
posq
.
upload
(
newPosq
);
velm
.
upload
(
newVelm
);
}
else
if
(
useMixedPrecision
)
{
vector
<
float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
float4
>
newPosq
(
paddedNumAtoms
,
make_float4
(
0
,
0
,
0
,
0
));
vector
<
float4
>
oldPosqCorrection
(
paddedNumAtoms
);
vector
<
float4
>
newPosqCorrection
(
paddedNumAtoms
,
make_float4
(
0
,
0
,
0
,
0
));
vector
<
double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
double4
>
newVelm
(
paddedNumAtoms
,
make_double4
(
0
,
0
,
0
,
0
));
posq
.
download
(
oldPosq
);
velm
.
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newPosqCorrection
[
index
]
=
oldPosqCorrection
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
posq
.
upload
(
newPosq
);
posqCorrection
.
upload
(
newPosqCorrection
);
velm
.
upload
(
newVelm
);
}
else
{
vector
<
float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
float4
>
newPosq
(
paddedNumAtoms
,
make_float4
(
0
,
0
,
0
,
0
));
vector
<
float4
>
oldVelm
(
paddedNumAtoms
);
vector
<
float4
>
newVelm
(
paddedNumAtoms
,
make_float4
(
0
,
0
,
0
,
0
));
posq
.
download
(
oldPosq
);
velm
.
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
posq
.
upload
(
newPosq
);
velm
.
upload
(
newVelm
);
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
atomIndex
[
i
]
=
i
;
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
}
atomIndexDevice
.
upload
(
atomIndex
);
findMoleculeGroups
();
for
(
auto
listener
:
reorderListeners
)
listener
->
execute
();
reorderAtoms
();
return
true
;
}
void
CudaContext
::
reorderAtoms
()
{
atomsWereReordered
=
false
;
if
(
numAtoms
==
0
||
nonbonded
==
NULL
||
!
nonbonded
->
getUseCutoff
()
||
stepsSinceReorder
<
250
)
{
stepsSinceReorder
++
;
return
;
}
atomsWereReordered
=
true
;
stepsSinceReorder
=
0
;
if
(
useDoublePrecision
)
reorderAtomsImpl
<
double
,
double4
,
double
,
double4
>
();
else
if
(
useMixedPrecision
)
reorderAtomsImpl
<
float
,
float4
,
double
,
double4
>
();
else
reorderAtomsImpl
<
float
,
float4
,
float
,
float4
>
();
}
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
void
CudaContext
::
reorderAtomsImpl
()
{
// Find the range of positions and the number of bins along each axis.
Real4
padding
=
{
0
,
0
,
0
,
0
};
vector
<
Real4
>
oldPosq
(
paddedNumAtoms
,
padding
);
vector
<
Real4
>
oldPosqCorrection
(
paddedNumAtoms
,
padding
);
Mixed4
paddingMixed
=
{
0
,
0
,
0
,
0
};
vector
<
Mixed4
>
oldVelm
(
paddedNumAtoms
,
paddingMixed
);
posq
.
download
(
oldPosq
);
velm
.
download
(
oldVelm
);
if
(
useMixedPrecision
)
posqCorrection
.
download
(
oldPosqCorrection
);
Real
minx
=
oldPosq
[
0
].
x
,
maxx
=
oldPosq
[
0
].
x
;
Real
miny
=
oldPosq
[
0
].
y
,
maxy
=
oldPosq
[
0
].
y
;
Real
minz
=
oldPosq
[
0
].
z
,
maxz
=
oldPosq
[
0
].
z
;
if
(
nonbonded
->
getUsePeriodic
())
{
minx
=
miny
=
minz
=
0.0
;
maxx
=
periodicBoxSize
.
x
;
maxy
=
periodicBoxSize
.
y
;
maxz
=
periodicBoxSize
.
z
;
}
else
{
for
(
int
i
=
1
;
i
<
numAtoms
;
i
++
)
{
const
Real4
&
pos
=
oldPosq
[
i
];
minx
=
min
(
minx
,
pos
.
x
);
maxx
=
max
(
maxx
,
pos
.
x
);
miny
=
min
(
miny
,
pos
.
y
);
maxy
=
max
(
maxy
,
pos
.
y
);
minz
=
min
(
minz
,
pos
.
z
);
maxz
=
max
(
maxz
,
pos
.
z
);
}
}
// Loop over each group of identical molecules and reorder them.
vector
<
int
>
originalIndex
(
numAtoms
);
vector
<
Real4
>
newPosq
(
paddedNumAtoms
);
vector
<
Real4
>
newPosqCorrection
(
paddedNumAtoms
);
vector
<
Mixed4
>
newVelm
(
paddedNumAtoms
);
vector
<
int4
>
newCellOffsets
(
numAtoms
);
for
(
auto
&
mol
:
moleculeGroups
)
{
// Find the center of each molecule.
int
numMolecules
=
mol
.
offsets
.
size
();
vector
<
int
>&
atoms
=
mol
.
atoms
;
vector
<
Real4
>
molPos
(
numMolecules
);
Real
invNumAtoms
=
(
Real
)
(
1.0
/
atoms
.
size
());
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
molPos
[
i
].
x
=
0.0
f
;
molPos
[
i
].
y
=
0.0
f
;
molPos
[
i
].
z
=
0.0
f
;
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
const
Real4
&
pos
=
oldPosq
[
atom
];
molPos
[
i
].
x
+=
pos
.
x
;
molPos
[
i
].
y
+=
pos
.
y
;
molPos
[
i
].
z
+=
pos
.
z
;
}
molPos
[
i
].
x
*=
invNumAtoms
;
molPos
[
i
].
y
*=
invNumAtoms
;
molPos
[
i
].
z
*=
invNumAtoms
;
if
(
molPos
[
i
].
x
!=
molPos
[
i
].
x
)
throw
OpenMMException
(
"Particle coordinate is nan"
);
}
if
(
nonbonded
->
getUsePeriodic
())
{
// Move each molecule position into the same box.
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
Real4
center
=
molPos
[
i
];
int
zcell
=
(
int
)
floor
(
center
.
z
*
invPeriodicBoxSize
.
z
);
center
.
x
-=
zcell
*
periodicBoxVecZ
.
x
;
center
.
y
-=
zcell
*
periodicBoxVecZ
.
y
;
center
.
z
-=
zcell
*
periodicBoxVecZ
.
z
;
int
ycell
=
(
int
)
floor
(
center
.
y
*
invPeriodicBoxSize
.
y
);
center
.
x
-=
ycell
*
periodicBoxVecY
.
x
;
center
.
y
-=
ycell
*
periodicBoxVecY
.
y
;
int
xcell
=
(
int
)
floor
(
center
.
x
*
invPeriodicBoxSize
.
x
);
center
.
x
-=
xcell
*
periodicBoxVecX
.
x
;
if
(
xcell
!=
0
||
ycell
!=
0
||
zcell
!=
0
)
{
Real
dx
=
molPos
[
i
].
x
-
center
.
x
;
Real
dy
=
molPos
[
i
].
y
-
center
.
y
;
Real
dz
=
molPos
[
i
].
z
-
center
.
z
;
molPos
[
i
]
=
center
;
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
Real4
p
=
oldPosq
[
atom
];
p
.
x
-=
dx
;
p
.
y
-=
dy
;
p
.
z
-=
dz
;
oldPosq
[
atom
]
=
p
;
posCellOffsets
[
atom
].
x
-=
xcell
;
posCellOffsets
[
atom
].
y
-=
ycell
;
posCellOffsets
[
atom
].
z
-=
zcell
;
}
}
}
}
// Select a bin for each molecule, then sort them by bin.
bool
useHilbert
=
(
numMolecules
>
5000
||
atoms
.
size
()
>
8
);
// For small systems, a simple zigzag curve works better than a Hilbert curve.
Real
binWidth
;
if
(
useHilbert
)
binWidth
=
(
Real
)
(
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
);
else
binWidth
=
(
Real
)
(
0.2
*
nonbonded
->
getMaxCutoffDistance
());
Real
invBinWidth
=
(
Real
)
(
1.0
/
binWidth
);
int
xbins
=
1
+
(
int
)
((
maxx
-
minx
)
*
invBinWidth
);
int
ybins
=
1
+
(
int
)
((
maxy
-
miny
)
*
invBinWidth
);
vector
<
pair
<
int
,
int
>
>
molBins
(
numMolecules
);
bitmask_t
coords
[
3
];
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
int
x
=
(
int
)
((
molPos
[
i
].
x
-
minx
)
*
invBinWidth
);
int
y
=
(
int
)
((
molPos
[
i
].
y
-
miny
)
*
invBinWidth
);
int
z
=
(
int
)
((
molPos
[
i
].
z
-
minz
)
*
invBinWidth
);
int
bin
;
if
(
useHilbert
)
{
coords
[
0
]
=
x
;
coords
[
1
]
=
y
;
coords
[
2
]
=
z
;
bin
=
(
int
)
hilbert_c2i
(
3
,
8
,
coords
);
}
else
{
int
yodd
=
y
&
1
;
int
zodd
=
z
&
1
;
bin
=
z
*
xbins
*
ybins
;
bin
+=
(
zodd
?
ybins
-
y
:
y
)
*
xbins
;
bin
+=
(
yodd
?
xbins
-
x
:
x
);
}
molBins
[
i
]
=
pair
<
int
,
int
>
(
bin
,
i
);
}
sort
(
molBins
.
begin
(),
molBins
.
end
());
// Reorder the atoms.
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
for
(
int
atom
:
atoms
)
{
int
oldIndex
=
mol
.
offsets
[
molBins
[
i
].
second
]
+
atom
;
int
newIndex
=
mol
.
offsets
[
i
]
+
atom
;
originalIndex
[
newIndex
]
=
atomIndex
[
oldIndex
];
newPosq
[
newIndex
]
=
oldPosq
[
oldIndex
];
if
(
useMixedPrecision
)
newPosqCorrection
[
newIndex
]
=
oldPosqCorrection
[
oldIndex
];
newVelm
[
newIndex
]
=
oldVelm
[
oldIndex
];
newCellOffsets
[
newIndex
]
=
posCellOffsets
[
oldIndex
];
}
}
}
// Update the streams.
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
atomIndex
[
i
]
=
originalIndex
[
i
];
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
}
posq
.
upload
(
newPosq
);
if
(
useMixedPrecision
)
posqCorrection
.
upload
(
newPosqCorrection
);
velm
.
upload
(
newVelm
);
atomIndexDevice
.
upload
(
atomIndex
);
for
(
auto
listener
:
reorderListeners
)
listener
->
execute
();
}
void
CudaContext
::
addReorderListener
(
ReorderListener
*
listener
)
{
reorderListeners
.
push_back
(
listener
);
}
void
CudaContext
::
addPreComputation
(
ForcePreComputation
*
computation
)
{
preComputations
.
push_back
(
computation
);
}
void
CudaContext
::
addPostComputation
(
ForcePostComputation
*
computation
)
{
postComputations
.
push_back
(
computation
);
}
void
CudaContext
::
addEnergyParameterDerivative
(
const
string
&
param
)
{
void
CudaContext
::
addEnergyParameterDerivative
(
const
string
&
param
)
{
// See if this parameter has already been registered.
// See if this parameter has already been registered.
...
@@ -1381,90 +834,10 @@ void CudaContext::addEnergyParameterDerivative(const string& param) {
...
@@ -1381,90 +834,10 @@ void CudaContext::addEnergyParameterDerivative(const string& param) {
energyParamDerivNames
.
push_back
(
param
);
energyParamDerivNames
.
push_back
(
param
);
}
}
struct
CudaContext
::
WorkThread
::
ThreadData
{
void
CudaContext
::
flushQueue
()
{
ThreadData
(
std
::
queue
<
CudaContext
::
WorkTask
*>&
tasks
,
bool
&
waiting
,
bool
&
finished
,
cuStreamSynchronize
(
getCurrentStream
());
pthread_mutex_t
&
queueLock
,
pthread_cond_t
&
waitForTaskCondition
,
pthread_cond_t
&
queueEmptyCondition
)
:
tasks
(
tasks
),
waiting
(
waiting
),
finished
(
finished
),
queueLock
(
queueLock
),
waitForTaskCondition
(
waitForTaskCondition
),
queueEmptyCondition
(
queueEmptyCondition
)
{
}
std
::
queue
<
CudaContext
::
WorkTask
*>&
tasks
;
bool
&
waiting
;
bool
&
finished
;
pthread_mutex_t
&
queueLock
;
pthread_cond_t
&
waitForTaskCondition
;
pthread_cond_t
&
queueEmptyCondition
;
};
static
void
*
threadBody
(
void
*
args
)
{
CudaContext
::
WorkThread
::
ThreadData
&
data
=
*
reinterpret_cast
<
CudaContext
::
WorkThread
::
ThreadData
*>
(
args
);
while
(
!
data
.
finished
||
data
.
tasks
.
size
()
>
0
)
{
pthread_mutex_lock
(
&
data
.
queueLock
);
while
(
data
.
tasks
.
empty
()
&&
!
data
.
finished
)
{
data
.
waiting
=
true
;
pthread_cond_signal
(
&
data
.
queueEmptyCondition
);
pthread_cond_wait
(
&
data
.
waitForTaskCondition
,
&
data
.
queueLock
);
}
CudaContext
::
WorkTask
*
task
=
NULL
;
if
(
!
data
.
tasks
.
empty
())
{
data
.
waiting
=
false
;
task
=
data
.
tasks
.
front
();
data
.
tasks
.
pop
();
}
pthread_mutex_unlock
(
&
data
.
queueLock
);
if
(
task
!=
NULL
)
{
task
->
execute
();
delete
task
;
}
}
data
.
waiting
=
true
;
pthread_cond_signal
(
&
data
.
queueEmptyCondition
);
delete
&
data
;
return
0
;
}
CudaContext
::
WorkThread
::
WorkThread
()
:
waiting
(
true
),
finished
(
false
)
{
pthread_mutex_init
(
&
queueLock
,
NULL
);
pthread_cond_init
(
&
waitForTaskCondition
,
NULL
);
pthread_cond_init
(
&
queueEmptyCondition
,
NULL
);
ThreadData
*
data
=
new
ThreadData
(
tasks
,
waiting
,
finished
,
queueLock
,
waitForTaskCondition
,
queueEmptyCondition
);
pthread_create
(
&
thread
,
NULL
,
threadBody
,
data
);
}
CudaContext
::
WorkThread
::~
WorkThread
()
{
pthread_mutex_lock
(
&
queueLock
);
finished
=
true
;
pthread_cond_broadcast
(
&
waitForTaskCondition
);
pthread_mutex_unlock
(
&
queueLock
);
pthread_join
(
thread
,
NULL
);
pthread_mutex_destroy
(
&
queueLock
);
pthread_cond_destroy
(
&
waitForTaskCondition
);
pthread_cond_destroy
(
&
queueEmptyCondition
);
}
}
void
CudaContext
::
WorkThread
::
addTask
(
CudaContext
::
WorkTask
*
task
)
{
pthread_mutex_lock
(
&
queueLock
);
tasks
.
push
(
task
);
waiting
=
false
;
pthread_cond_signal
(
&
waitForTaskCondition
);
pthread_mutex_unlock
(
&
queueLock
);
}
bool
CudaContext
::
WorkThread
::
isWaiting
()
{
return
waiting
;
}
bool
CudaContext
::
WorkThread
::
isFinished
()
{
return
finished
;
}
void
CudaContext
::
WorkThread
::
flush
()
{
pthread_mutex_lock
(
&
queueLock
);
while
(
!
waiting
)
pthread_cond_wait
(
&
queueEmptyCondition
,
&
queueLock
);
pthread_mutex_unlock
(
&
queueLock
);
}
vector
<
int
>
CudaContext
::
getDevicePrecedence
()
{
vector
<
int
>
CudaContext
::
getDevicePrecedence
()
{
int
numDevices
;
int
numDevices
;
CUdevice
thisDevice
;
CUdevice
thisDevice
;
...
@@ -1475,7 +848,8 @@ vector<int> CudaContext::getDevicePrecedence() {
...
@@ -1475,7 +848,8 @@ vector<int> CudaContext::getDevicePrecedence() {
for
(
int
i
=
0
;
i
<
numDevices
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numDevices
;
i
++
)
{
CHECK_RESULT
(
cuDeviceGet
(
&
thisDevice
,
i
));
CHECK_RESULT
(
cuDeviceGet
(
&
thisDevice
,
i
));
int
major
,
minor
,
clock
,
multiprocessors
,
speed
;
int
major
,
minor
,
clock
,
multiprocessors
,
speed
;
CHECK_RESULT
(
cuDeviceComputeCapability
(
&
major
,
&
minor
,
thisDevice
));
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
major
,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR
,
thisDevice
));
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
minor
,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR
,
thisDevice
));
if
(
major
==
1
&&
minor
<
2
)
if
(
major
==
1
&&
minor
<
2
)
continue
;
continue
;
...
...
platforms/cuda/src/CudaEvent.cpp
0 → 100644
View file @
5a06df78
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaEvent.h"
#include "openmm/OpenMMException.h"
using
namespace
OpenMM
;
CudaEvent
::
CudaEvent
(
CudaContext
&
context
)
:
context
(
context
),
eventCreated
(
false
)
{
CUresult
result
=
cuEventCreate
(
&
event
,
CU_EVENT_DISABLE_TIMING
);
if
(
result
!=
CUDA_SUCCESS
)
throw
OpenMMException
(
"Error creating CUDA event:"
+
CudaContext
::
getErrorString
(
result
));
eventCreated
=
true
;
}
CudaEvent
::~
CudaEvent
()
{
if
(
eventCreated
)
cuEventDestroy
(
event
);
}
void
CudaEvent
::
enqueue
()
{
cuEventRecord
(
event
,
0
);
}
void
CudaEvent
::
wait
()
{
cuEventSynchronize
(
event
);
}
platforms/cuda/src/CudaIntegrationUtilities.cpp
View file @
5a06df78
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
9
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -25,18 +25,7 @@
...
@@ -25,18 +25,7 @@
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "CudaIntegrationUtilities.h"
#include "CudaIntegrationUtilities.h"
#include "CudaArray.h"
#include "CudaContext.h"
#include "CudaKernelSources.h"
#include "openmm/internal/OSRngSeed.h"
#include "openmm/HarmonicAngleForce.h"
#include "openmm/VirtualSite.h"
#include "quern.h"
#include "CudaExpressionUtilities.h"
#include "ReferenceCCMAAlgorithm.h"
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <map>
using
namespace
OpenMM
;
using
namespace
OpenMM
;
using
namespace
std
;
using
namespace
std
;
...
@@ -45,539 +34,39 @@ using namespace std;
...
@@ -45,539 +34,39 @@ using namespace std;
#define CHECK_RESULT2(result, prefix) \
#define CHECK_RESULT2(result, prefix) \
if (result != CUDA_SUCCESS) { \
if (result != CUDA_SUCCESS) { \
std::stringstream m; \
std::stringstream m; \
m<<prefix<<": "<<context.getErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
m<<prefix<<": "<<
dynamic_cast<CudaContext&>(
context
)
.getErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
throw OpenMMException(m.str());\
throw OpenMMException(m.str());\
}
}
struct
CudaIntegrationUtilities
::
ShakeCluster
{
CudaIntegrationUtilities
::
CudaIntegrationUtilities
(
CudaContext
&
context
,
const
System
&
system
)
:
IntegrationUtilities
(
context
,
system
),
int
centralID
;
ccmaConvergedMemory
(
NULL
)
{
int
peripheralID
[
3
];
CHECK_RESULT2
(
cuEventCreate
(
&
ccmaEvent
,
CU_EVENT_DISABLE_TIMING
),
"Error creating event for CCMA"
);
int
size
;
bool
valid
;
double
distance
;
double
centralInvMass
,
peripheralInvMass
;
ShakeCluster
()
:
valid
(
true
)
{
}
ShakeCluster
(
int
centralID
,
double
invMass
)
:
centralID
(
centralID
),
centralInvMass
(
invMass
),
size
(
0
),
valid
(
true
)
{
}
void
addAtom
(
int
id
,
double
dist
,
double
invMass
)
{
if
(
size
==
3
||
(
size
>
0
&&
abs
(
dist
-
distance
)
/
distance
>
1e-8
)
||
(
size
>
0
&&
abs
(
invMass
-
peripheralInvMass
)
/
peripheralInvMass
>
1e-8
))
valid
=
false
;
else
{
peripheralID
[
size
++
]
=
id
;
distance
=
dist
;
peripheralInvMass
=
invMass
;
}
}
void
markInvalid
(
map
<
int
,
ShakeCluster
>&
allClusters
,
vector
<
bool
>&
invalidForShake
)
{
valid
=
false
;
invalidForShake
[
centralID
]
=
true
;
for
(
int
i
=
0
;
i
<
size
;
i
++
)
{
invalidForShake
[
peripheralID
[
i
]]
=
true
;
map
<
int
,
ShakeCluster
>::
iterator
otherCluster
=
allClusters
.
find
(
peripheralID
[
i
]);
if
(
otherCluster
!=
allClusters
.
end
()
&&
otherCluster
->
second
.
valid
)
otherCluster
->
second
.
markInvalid
(
allClusters
,
invalidForShake
);
}
}
};
struct
CudaIntegrationUtilities
::
ConstraintOrderer
:
public
binary_function
<
int
,
int
,
bool
>
{
const
vector
<
int
>&
atom1
;
const
vector
<
int
>&
atom2
;
const
vector
<
int
>&
constraints
;
ConstraintOrderer
(
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
int
>&
constraints
)
:
atom1
(
atom1
),
atom2
(
atom2
),
constraints
(
constraints
)
{
}
bool
operator
()(
int
x
,
int
y
)
{
int
ix
=
constraints
[
x
];
int
iy
=
constraints
[
y
];
if
(
atom1
[
ix
]
!=
atom1
[
iy
])
return
atom1
[
ix
]
<
atom1
[
iy
];
return
atom2
[
ix
]
<
atom2
[
iy
];
}
};
CudaIntegrationUtilities
::
CudaIntegrationUtilities
(
CudaContext
&
context
,
const
System
&
system
)
:
context
(
context
),
randomPos
(
0
),
ccmaConvergedMemory
(
NULL
)
{
// Create workspace arrays.
lastStepSize
=
make_double2
(
0.0
,
0.0
);
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
{
posDelta
.
initialize
<
double4
>
(
context
,
context
.
getPaddedNumAtoms
(),
"posDelta"
);
vector
<
double4
>
deltas
(
posDelta
.
getSize
(),
make_double4
(
0.0
,
0.0
,
0.0
,
0.0
));
posDelta
.
upload
(
deltas
);
stepSize
.
initialize
<
double2
>
(
context
,
1
,
"stepSize"
);
stepSize
.
upload
(
&
lastStepSize
);
}
else
{
posDelta
.
initialize
<
float4
>
(
context
,
context
.
getPaddedNumAtoms
(),
"posDelta"
);
vector
<
float4
>
deltas
(
posDelta
.
getSize
(),
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
));
posDelta
.
upload
(
deltas
);
stepSize
.
initialize
<
float2
>
(
context
,
1
,
"stepSize"
);
float2
lastStepSizeFloat
=
make_float2
(
0.0
f
,
0.0
f
);
stepSize
.
upload
(
&
lastStepSizeFloat
);
}
// Record the set of constraints and how many constraints each atom is involved in.
vector
<
int
>
atom1
;
vector
<
int
>
atom2
;
vector
<
double
>
distance
;
vector
<
int
>
constraintCount
(
context
.
getNumAtoms
(),
0
);
for
(
int
i
=
0
;
i
<
system
.
getNumConstraints
();
i
++
)
{
int
p1
,
p2
;
double
d
;
system
.
getConstraintParameters
(
i
,
p1
,
p2
,
d
);
if
(
system
.
getParticleMass
(
p1
)
!=
0
||
system
.
getParticleMass
(
p2
)
!=
0
)
{
atom1
.
push_back
(
p1
);
atom2
.
push_back
(
p2
);
distance
.
push_back
(
d
);
constraintCount
[
p1
]
++
;
constraintCount
[
p2
]
++
;
}
}
// Identify clusters of three atoms that can be treated with SETTLE. First, for every
// atom that might be part of such a cluster, make a list of the two other atoms it is
// connected to.
int
numAtoms
=
system
.
getNumParticles
();
vector
<
map
<
int
,
float
>
>
settleConstraints
(
numAtoms
);
for
(
int
i
=
0
;
i
<
(
int
)
atom1
.
size
();
i
++
)
{
if
(
constraintCount
[
atom1
[
i
]]
==
2
&&
constraintCount
[
atom2
[
i
]]
==
2
)
{
settleConstraints
[
atom1
[
i
]][
atom2
[
i
]]
=
(
float
)
distance
[
i
];
settleConstraints
[
atom2
[
i
]][
atom1
[
i
]]
=
(
float
)
distance
[
i
];
}
}
// Now remove the ones that don't actually form closed loops of three atoms.
vector
<
int
>
settleClusters
;
for
(
int
i
=
0
;
i
<
(
int
)
settleConstraints
.
size
();
i
++
)
{
if
(
settleConstraints
[
i
].
size
()
==
2
)
{
int
partner1
=
settleConstraints
[
i
].
begin
()
->
first
;
int
partner2
=
(
++
settleConstraints
[
i
].
begin
())
->
first
;
if
(
settleConstraints
[
partner1
].
size
()
!=
2
||
settleConstraints
[
partner2
].
size
()
!=
2
||
settleConstraints
[
partner1
].
find
(
partner2
)
==
settleConstraints
[
partner1
].
end
())
settleConstraints
[
i
].
clear
();
else
if
(
i
<
partner1
&&
i
<
partner2
)
settleClusters
.
push_back
(
i
);
}
else
settleConstraints
[
i
].
clear
();
}
// Record the SETTLE clusters.
vector
<
bool
>
isShakeAtom
(
numAtoms
,
false
);
if
(
settleClusters
.
size
()
>
0
)
{
vector
<
int4
>
atoms
;
vector
<
float2
>
params
;
for
(
int
i
=
0
;
i
<
(
int
)
settleClusters
.
size
();
i
++
)
{
int
atom1
=
settleClusters
[
i
];
int
atom2
=
settleConstraints
[
atom1
].
begin
()
->
first
;
int
atom3
=
(
++
settleConstraints
[
atom1
].
begin
())
->
first
;
float
dist12
=
settleConstraints
[
atom1
].
find
(
atom2
)
->
second
;
float
dist13
=
settleConstraints
[
atom1
].
find
(
atom3
)
->
second
;
float
dist23
=
settleConstraints
[
atom2
].
find
(
atom3
)
->
second
;
if
(
dist12
==
dist13
)
{
// atom1 is the central atom
atoms
.
push_back
(
make_int4
(
atom1
,
atom2
,
atom3
,
0
));
params
.
push_back
(
make_float2
(
dist12
,
dist23
));
}
else
if
(
dist12
==
dist23
)
{
// atom2 is the central atom
atoms
.
push_back
(
make_int4
(
atom2
,
atom1
,
atom3
,
0
));
params
.
push_back
(
make_float2
(
dist12
,
dist13
));
}
else
if
(
dist13
==
dist23
)
{
// atom3 is the central atom
atoms
.
push_back
(
make_int4
(
atom3
,
atom1
,
atom2
,
0
));
params
.
push_back
(
make_float2
(
dist13
,
dist12
));
}
else
continue
;
// We can't handle this with SETTLE
isShakeAtom
[
atom1
]
=
true
;
isShakeAtom
[
atom2
]
=
true
;
isShakeAtom
[
atom3
]
=
true
;
}
if
(
atoms
.
size
()
>
0
)
{
settleAtoms
.
initialize
<
int4
>
(
context
,
atoms
.
size
(),
"settleAtoms"
);
settleParams
.
initialize
<
float2
>
(
context
,
params
.
size
(),
"settleParams"
);
settleAtoms
.
upload
(
atoms
);
settleParams
.
upload
(
params
);
}
}
// Find clusters consisting of a central atom with up to three peripheral atoms.
map
<
int
,
ShakeCluster
>
clusters
;
vector
<
bool
>
invalidForShake
(
numAtoms
,
false
);
for
(
int
i
=
0
;
i
<
(
int
)
atom1
.
size
();
i
++
)
{
if
(
isShakeAtom
[
atom1
[
i
]])
continue
;
// This is being taken care of with SETTLE.
// Determine which is the central atom.
bool
firstIsCentral
;
if
(
constraintCount
[
atom1
[
i
]]
>
1
)
firstIsCentral
=
true
;
else
if
(
constraintCount
[
atom2
[
i
]]
>
1
)
firstIsCentral
=
false
;
else
if
(
atom1
[
i
]
<
atom2
[
i
])
firstIsCentral
=
true
;
else
firstIsCentral
=
false
;
int
centralID
,
peripheralID
;
if
(
firstIsCentral
)
{
centralID
=
atom1
[
i
];
peripheralID
=
atom2
[
i
];
}
else
{
centralID
=
atom2
[
i
];
peripheralID
=
atom1
[
i
];
}
// Add it to the cluster.
if
(
clusters
.
find
(
centralID
)
==
clusters
.
end
())
{
clusters
[
centralID
]
=
ShakeCluster
(
centralID
,
1.0
/
system
.
getParticleMass
(
centralID
));
}
ShakeCluster
&
cluster
=
clusters
[
centralID
];
cluster
.
addAtom
(
peripheralID
,
distance
[
i
],
1.0
/
system
.
getParticleMass
(
peripheralID
));
if
(
constraintCount
[
peripheralID
]
!=
1
||
invalidForShake
[
atom1
[
i
]]
||
invalidForShake
[
atom2
[
i
]])
{
cluster
.
markInvalid
(
clusters
,
invalidForShake
);
map
<
int
,
ShakeCluster
>::
iterator
otherCluster
=
clusters
.
find
(
peripheralID
);
if
(
otherCluster
!=
clusters
.
end
()
&&
otherCluster
->
second
.
valid
)
otherCluster
->
second
.
markInvalid
(
clusters
,
invalidForShake
);
}
}
int
validShakeClusters
=
0
;
for
(
map
<
int
,
ShakeCluster
>::
iterator
iter
=
clusters
.
begin
();
iter
!=
clusters
.
end
();
++
iter
)
{
ShakeCluster
&
cluster
=
iter
->
second
;
if
(
cluster
.
valid
)
{
cluster
.
valid
=
!
invalidForShake
[
cluster
.
centralID
]
&&
cluster
.
size
==
constraintCount
[
cluster
.
centralID
];
for
(
int
i
=
0
;
i
<
cluster
.
size
;
i
++
)
if
(
invalidForShake
[
cluster
.
peripheralID
[
i
]])
cluster
.
valid
=
false
;
if
(
cluster
.
valid
)
++
validShakeClusters
;
}
}
// Record the SHAKE clusters.
if
(
validShakeClusters
>
0
)
{
vector
<
int4
>
atoms
;
vector
<
float4
>
params
;
int
index
=
0
;
for
(
map
<
int
,
ShakeCluster
>::
const_iterator
iter
=
clusters
.
begin
();
iter
!=
clusters
.
end
();
++
iter
)
{
const
ShakeCluster
&
cluster
=
iter
->
second
;
if
(
!
cluster
.
valid
)
continue
;
atoms
.
push_back
(
make_int4
(
cluster
.
centralID
,
cluster
.
peripheralID
[
0
],
(
cluster
.
size
>
1
?
cluster
.
peripheralID
[
1
]
:
-
1
),
(
cluster
.
size
>
2
?
cluster
.
peripheralID
[
2
]
:
-
1
)));
params
.
push_back
(
make_float4
((
float
)
cluster
.
centralInvMass
,
(
float
)
(
0.5
/
(
cluster
.
centralInvMass
+
cluster
.
peripheralInvMass
)),
(
float
)
(
cluster
.
distance
*
cluster
.
distance
),
(
float
)
cluster
.
peripheralInvMass
));
isShakeAtom
[
cluster
.
centralID
]
=
true
;
isShakeAtom
[
cluster
.
peripheralID
[
0
]]
=
true
;
if
(
cluster
.
size
>
1
)
isShakeAtom
[
cluster
.
peripheralID
[
1
]]
=
true
;
if
(
cluster
.
size
>
2
)
isShakeAtom
[
cluster
.
peripheralID
[
2
]]
=
true
;
++
index
;
}
shakeAtoms
.
initialize
<
int4
>
(
context
,
atoms
.
size
(),
"shakeAtoms"
);
shakeParams
.
initialize
<
float4
>
(
context
,
params
.
size
(),
"shakeParams"
);
shakeAtoms
.
upload
(
atoms
);
shakeParams
.
upload
(
params
);
}
// Find connected constraints for CCMA.
vector
<
int
>
ccmaConstraints
;
for
(
unsigned
i
=
0
;
i
<
atom1
.
size
();
i
++
)
if
(
!
isShakeAtom
[
atom1
[
i
]])
ccmaConstraints
.
push_back
(
i
);
// Record the connections between constraints.
int
numCCMA
=
(
int
)
ccmaConstraints
.
size
();
if
(
numCCMA
>
0
)
{
// Record information needed by ReferenceCCMAAlgorithm.
vector
<
pair
<
int
,
int
>
>
refIndices
(
numCCMA
);
vector
<
double
>
refDistance
(
numCCMA
);
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
int
index
=
ccmaConstraints
[
i
];
refIndices
[
i
]
=
make_pair
(
atom1
[
index
],
atom2
[
index
]);
refDistance
[
i
]
=
distance
[
index
];
}
vector
<
double
>
refMasses
(
numAtoms
);
for
(
int
i
=
0
;
i
<
numAtoms
;
++
i
)
refMasses
[
i
]
=
system
.
getParticleMass
(
i
);
// Look up angles for CCMA.
vector
<
ReferenceCCMAAlgorithm
::
AngleInfo
>
angles
;
for
(
int
i
=
0
;
i
<
system
.
getNumForces
();
i
++
)
{
const
HarmonicAngleForce
*
force
=
dynamic_cast
<
const
HarmonicAngleForce
*>
(
&
system
.
getForce
(
i
));
if
(
force
!=
NULL
)
{
for
(
int
j
=
0
;
j
<
force
->
getNumAngles
();
j
++
)
{
int
atom1
,
atom2
,
atom3
;
double
angle
,
k
;
force
->
getAngleParameters
(
j
,
atom1
,
atom2
,
atom3
,
angle
,
k
);
angles
.
push_back
(
ReferenceCCMAAlgorithm
::
AngleInfo
(
atom1
,
atom2
,
atom3
,
angle
));
}
}
}
// Create a ReferenceCCMAAlgorithm. It will build and invert the constraint matrix for us.
ReferenceCCMAAlgorithm
ccma
(
numAtoms
,
numCCMA
,
refIndices
,
refDistance
,
refMasses
,
angles
,
0.1
);
vector
<
vector
<
pair
<
int
,
double
>
>
>
matrix
=
ccma
.
getMatrix
();
int
maxRowElements
=
0
;
for
(
unsigned
i
=
0
;
i
<
matrix
.
size
();
i
++
)
maxRowElements
=
max
(
maxRowElements
,
(
int
)
matrix
[
i
].
size
());
maxRowElements
++
;
// Build the list of constraints for each atom.
vector
<
vector
<
int
>
>
atomConstraints
(
context
.
getNumAtoms
());
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
atomConstraints
[
atom1
[
ccmaConstraints
[
i
]]].
push_back
(
i
);
atomConstraints
[
atom2
[
ccmaConstraints
[
i
]]].
push_back
(
i
);
}
int
maxAtomConstraints
=
0
;
for
(
unsigned
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
maxAtomConstraints
=
max
(
maxAtomConstraints
,
(
int
)
atomConstraints
[
i
].
size
());
// Sort the constraints.
vector
<
int
>
constraintOrder
(
numCCMA
);
for
(
int
i
=
0
;
i
<
numCCMA
;
++
i
)
constraintOrder
[
i
]
=
i
;
sort
(
constraintOrder
.
begin
(),
constraintOrder
.
end
(),
ConstraintOrderer
(
atom1
,
atom2
,
ccmaConstraints
));
vector
<
int
>
inverseOrder
(
numCCMA
);
for
(
int
i
=
0
;
i
<
numCCMA
;
++
i
)
inverseOrder
[
constraintOrder
[
i
]]
=
i
;
for
(
int
i
=
0
;
i
<
(
int
)
matrix
.
size
();
++
i
)
for
(
int
j
=
0
;
j
<
(
int
)
matrix
[
i
].
size
();
++
j
)
matrix
[
i
][
j
].
first
=
inverseOrder
[
matrix
[
i
][
j
].
first
];
// Record the CCMA data structures.
ccmaAtoms
.
initialize
<
int2
>
(
context
,
numCCMA
,
"CcmaAtoms"
);
ccmaAtomConstraints
.
initialize
<
int
>
(
context
,
numAtoms
*
maxAtomConstraints
,
"CcmaAtomConstraints"
);
ccmaNumAtomConstraints
.
initialize
<
int
>
(
context
,
numAtoms
,
"CcmaAtomConstraintsIndex"
);
ccmaConstraintMatrixColumn
.
initialize
<
int
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixColumn"
);
ccmaConverged
.
initialize
<
int
>
(
context
,
2
,
"ccmaConverged"
);
CHECK_RESULT2
(
cuMemHostAlloc
((
void
**
)
&
ccmaConvergedMemory
,
sizeof
(
int
),
CU_MEMHOSTALLOC_DEVICEMAP
),
"Error allocating pinned memory"
);
CHECK_RESULT2
(
cuMemHostAlloc
((
void
**
)
&
ccmaConvergedMemory
,
sizeof
(
int
),
CU_MEMHOSTALLOC_DEVICEMAP
),
"Error allocating pinned memory"
);
CHECK_RESULT2
(
cuMemHostGetDevicePointer
(
&
ccmaConvergedDeviceMemory
,
ccmaConvergedMemory
,
0
),
"Error getting device address for pinned memory"
);
CHECK_RESULT2
(
cuMemHostGetDevicePointer
(
&
ccmaConvergedDeviceMemory
,
ccmaConvergedMemory
,
0
),
"Error getting device address for pinned memory"
);
vector
<
int2
>
atomsVec
(
ccmaAtoms
.
getSize
());
vector
<
int
>
atomConstraintsVec
(
ccmaAtomConstraints
.
getSize
());
vector
<
int
>
numAtomConstraintsVec
(
ccmaNumAtomConstraints
.
getSize
());
vector
<
int
>
constraintMatrixColumnVec
(
ccmaConstraintMatrixColumn
.
getSize
());
int
elementSize
=
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
ccmaDistance
.
initialize
(
context
,
numCCMA
,
4
*
elementSize
,
"CcmaDistance"
);
ccmaDelta1
.
initialize
(
context
,
numCCMA
,
elementSize
,
"CcmaDelta1"
);
ccmaDelta2
.
initialize
(
context
,
numCCMA
,
elementSize
,
"CcmaDelta2"
);
ccmaReducedMass
.
initialize
(
context
,
numCCMA
,
elementSize
,
"CcmaReducedMass"
);
ccmaConstraintMatrixValue
.
initialize
(
context
,
numCCMA
*
maxRowElements
,
elementSize
,
"ConstraintMatrixValue"
);
vector
<
double4
>
distanceVec
(
ccmaDistance
.
getSize
());
vector
<
double
>
reducedMassVec
(
ccmaReducedMass
.
getSize
());
vector
<
double
>
constraintMatrixValueVec
(
ccmaConstraintMatrixValue
.
getSize
());
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
int
index
=
constraintOrder
[
i
];
int
c
=
ccmaConstraints
[
index
];
atomsVec
[
i
].
x
=
atom1
[
c
];
atomsVec
[
i
].
y
=
atom2
[
c
];
distanceVec
[
i
].
w
=
distance
[
c
];
reducedMassVec
[
i
]
=
(
0.5
/
(
1.0
/
system
.
getParticleMass
(
atom1
[
c
])
+
1.0
/
system
.
getParticleMass
(
atom2
[
c
])));
for
(
unsigned
int
j
=
0
;
j
<
matrix
[
index
].
size
();
j
++
)
{
constraintMatrixColumnVec
[
i
+
j
*
numCCMA
]
=
matrix
[
index
][
j
].
first
;
constraintMatrixValueVec
[
i
+
j
*
numCCMA
]
=
matrix
[
index
][
j
].
second
;
}
constraintMatrixColumnVec
[
i
+
matrix
[
index
].
size
()
*
numCCMA
]
=
numCCMA
;
}
ccmaDistance
.
upload
(
distanceVec
,
true
);
ccmaReducedMass
.
upload
(
reducedMassVec
,
true
);
ccmaConstraintMatrixValue
.
upload
(
constraintMatrixValueVec
,
true
);
for
(
unsigned
int
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
{
numAtomConstraintsVec
[
i
]
=
atomConstraints
[
i
].
size
();
for
(
unsigned
int
j
=
0
;
j
<
atomConstraints
[
i
].
size
();
j
++
)
{
bool
forward
=
(
atom1
[
ccmaConstraints
[
atomConstraints
[
i
][
j
]]]
==
i
);
atomConstraintsVec
[
i
+
j
*
numAtoms
]
=
(
forward
?
inverseOrder
[
atomConstraints
[
i
][
j
]]
+
1
:
-
inverseOrder
[
atomConstraints
[
i
][
j
]]
-
1
);
}
}
ccmaAtoms
.
upload
(
atomsVec
);
ccmaAtomConstraints
.
upload
(
atomConstraintsVec
);
ccmaNumAtomConstraints
.
upload
(
numAtomConstraintsVec
);
ccmaConstraintMatrixColumn
.
upload
(
constraintMatrixColumnVec
);
}
// Build the list of virtual sites.
vector
<
int4
>
vsite2AvgAtomVec
;
vector
<
double2
>
vsite2AvgWeightVec
;
vector
<
int4
>
vsite3AvgAtomVec
;
vector
<
double4
>
vsite3AvgWeightVec
;
vector
<
int4
>
vsiteOutOfPlaneAtomVec
;
vector
<
double4
>
vsiteOutOfPlaneWeightVec
;
vector
<
int
>
vsiteLocalCoordsIndexVec
;
vector
<
int
>
vsiteLocalCoordsAtomVec
;
vector
<
int
>
vsiteLocalCoordsStartVec
;
vector
<
double
>
vsiteLocalCoordsWeightVec
;
vector
<
double4
>
vsiteLocalCoordsPosVec
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
if
(
system
.
isVirtualSite
(
i
))
{
if
(
dynamic_cast
<
const
TwoParticleAverageSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
// A two particle average.
const
TwoParticleAverageSite
&
site
=
dynamic_cast
<
const
TwoParticleAverageSite
&>
(
system
.
getVirtualSite
(
i
));
vsite2AvgAtomVec
.
push_back
(
make_int4
(
i
,
site
.
getParticle
(
0
),
site
.
getParticle
(
1
),
0
));
vsite2AvgWeightVec
.
push_back
(
make_double2
(
site
.
getWeight
(
0
),
site
.
getWeight
(
1
)));
}
else
if
(
dynamic_cast
<
const
ThreeParticleAverageSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
// A three particle average.
const
ThreeParticleAverageSite
&
site
=
dynamic_cast
<
const
ThreeParticleAverageSite
&>
(
system
.
getVirtualSite
(
i
));
vsite3AvgAtomVec
.
push_back
(
make_int4
(
i
,
site
.
getParticle
(
0
),
site
.
getParticle
(
1
),
site
.
getParticle
(
2
)));
vsite3AvgWeightVec
.
push_back
(
make_double4
(
site
.
getWeight
(
0
),
site
.
getWeight
(
1
),
site
.
getWeight
(
2
),
0.0
));
}
else
if
(
dynamic_cast
<
const
OutOfPlaneSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
// An out of plane site.
const
OutOfPlaneSite
&
site
=
dynamic_cast
<
const
OutOfPlaneSite
&>
(
system
.
getVirtualSite
(
i
));
vsiteOutOfPlaneAtomVec
.
push_back
(
make_int4
(
i
,
site
.
getParticle
(
0
),
site
.
getParticle
(
1
),
site
.
getParticle
(
2
)));
vsiteOutOfPlaneWeightVec
.
push_back
(
make_double4
(
site
.
getWeight12
(),
site
.
getWeight13
(),
site
.
getWeightCross
(),
0.0
));
}
else
if
(
dynamic_cast
<
const
LocalCoordinatesSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
// A local coordinates site.
const
LocalCoordinatesSite
&
site
=
dynamic_cast
<
const
LocalCoordinatesSite
&>
(
system
.
getVirtualSite
(
i
));
int
numParticles
=
site
.
getNumParticles
();
vector
<
double
>
origin
,
x
,
y
;
site
.
getOriginWeights
(
origin
);
site
.
getXWeights
(
x
);
site
.
getYWeights
(
y
);
vsiteLocalCoordsIndexVec
.
push_back
(
i
);
vsiteLocalCoordsStartVec
.
push_back
(
vsiteLocalCoordsAtomVec
.
size
());
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
{
vsiteLocalCoordsAtomVec
.
push_back
(
site
.
getParticle
(
j
));
vsiteLocalCoordsWeightVec
.
push_back
(
origin
[
j
]);
vsiteLocalCoordsWeightVec
.
push_back
(
x
[
j
]);
vsiteLocalCoordsWeightVec
.
push_back
(
y
[
j
]);
}
Vec3
pos
=
site
.
getLocalPosition
();
vsiteLocalCoordsPosVec
.
push_back
(
make_double4
(
pos
[
0
],
pos
[
1
],
pos
[
2
],
0.0
));
}
}
}
vsiteLocalCoordsStartVec
.
push_back
(
vsiteLocalCoordsAtomVec
.
size
());
int
num2Avg
=
vsite2AvgAtomVec
.
size
();
int
num3Avg
=
vsite3AvgAtomVec
.
size
();
int
numOutOfPlane
=
vsiteOutOfPlaneAtomVec
.
size
();
int
numLocalCoords
=
vsiteLocalCoordsPosVec
.
size
();
vsite2AvgAtoms
.
initialize
<
int4
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgAtoms"
);
vsite3AvgAtoms
.
initialize
<
int4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgAtoms"
);
vsiteOutOfPlaneAtoms
.
initialize
<
int4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneAtoms"
);
vsiteLocalCoordsIndex
.
initialize
<
int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsIndexVec
.
size
()),
"vsiteLocalCoordsIndex"
);
vsiteLocalCoordsAtoms
.
initialize
<
int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsAtomVec
.
size
()),
"vsiteLocalCoordsAtoms"
);
vsiteLocalCoordsStartIndex
.
initialize
<
int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsStartVec
.
size
()),
"vsiteLocalCoordsStartIndex"
);
if
(
num2Avg
>
0
)
vsite2AvgAtoms
.
upload
(
vsite2AvgAtomVec
);
if
(
num3Avg
>
0
)
vsite3AvgAtoms
.
upload
(
vsite3AvgAtomVec
);
if
(
numOutOfPlane
>
0
)
vsiteOutOfPlaneAtoms
.
upload
(
vsiteOutOfPlaneAtomVec
);
if
(
numLocalCoords
>
0
)
{
vsiteLocalCoordsIndex
.
upload
(
vsiteLocalCoordsIndexVec
);
vsiteLocalCoordsAtoms
.
upload
(
vsiteLocalCoordsAtomVec
);
vsiteLocalCoordsStartIndex
.
upload
(
vsiteLocalCoordsStartVec
);
}
int
elementSize
=
(
context
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
vsite2AvgWeights
.
initialize
(
context
,
max
(
1
,
num2Avg
),
2
*
elementSize
,
"vsite2AvgWeights"
);
vsite3AvgWeights
.
initialize
(
context
,
max
(
1
,
num3Avg
),
4
*
elementSize
,
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
.
initialize
(
context
,
max
(
1
,
numOutOfPlane
),
4
*
elementSize
,
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsWeights
.
initialize
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
elementSize
,
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
.
initialize
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
4
*
elementSize
,
"vsiteLocalCoordsPos"
);
if
(
num2Avg
>
0
)
vsite2AvgWeights
.
upload
(
vsite2AvgWeightVec
,
true
);
if
(
num3Avg
>
0
)
vsite3AvgWeights
.
upload
(
vsite3AvgWeightVec
,
true
);
if
(
numOutOfPlane
>
0
)
vsiteOutOfPlaneWeights
.
upload
(
vsiteOutOfPlaneWeightVec
,
true
);
if
(
numLocalCoords
>
0
)
{
vsiteLocalCoordsWeights
.
upload
(
vsiteLocalCoordsWeightVec
,
true
);
vsiteLocalCoordsPos
.
upload
(
vsiteLocalCoordsPosVec
,
true
);
}
// Create the kernels used by this class.
map
<
string
,
string
>
defines
;
defines
[
"NUM_CCMA_CONSTRAINTS"
]
=
context
.
intToString
(
numCCMA
);
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
numAtoms
);
defines
[
"NUM_2_AVERAGE"
]
=
context
.
intToString
(
num2Avg
);
defines
[
"NUM_3_AVERAGE"
]
=
context
.
intToString
(
num3Avg
);
defines
[
"NUM_OUT_OF_PLANE"
]
=
context
.
intToString
(
numOutOfPlane
);
defines
[
"NUM_LOCAL_COORDS"
]
=
context
.
intToString
(
numLocalCoords
);
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
CUmodule
module
=
context
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
integrationUtilities
,
defines
);
settlePosKernel
=
context
.
getKernel
(
module
,
"applySettleToPositions"
);
settleVelKernel
=
context
.
getKernel
(
module
,
"applySettleToVelocities"
);
shakePosKernel
=
context
.
getKernel
(
module
,
"applyShakeToPositions"
);
shakeVelKernel
=
context
.
getKernel
(
module
,
"applyShakeToVelocities"
);
ccmaDirectionsKernel
=
context
.
getKernel
(
module
,
"computeCCMAConstraintDirections"
);
ccmaPosForceKernel
=
context
.
getKernel
(
module
,
"computeCCMAPositionConstraintForce"
);
ccmaVelForceKernel
=
context
.
getKernel
(
module
,
"computeCCMAVelocityConstraintForce"
);
ccmaMultiplyKernel
=
context
.
getKernel
(
module
,
"multiplyByCCMAConstraintMatrix"
);
ccmaUpdateKernel
=
context
.
getKernel
(
module
,
"updateCCMAAtomPositions"
);
CHECK_RESULT2
(
cuEventCreate
(
&
ccmaEvent
,
CU_EVENT_DISABLE_TIMING
),
"Error creating event for CCMA"
);
vsitePositionKernel
=
context
.
getKernel
(
module
,
"computeVirtualSites"
);
vsiteForceKernel
=
context
.
getKernel
(
module
,
"distributeVirtualSiteForces"
);
numVsites
=
num2Avg
+
num3Avg
+
numOutOfPlane
+
numLocalCoords
;
randomKernel
=
context
.
getKernel
(
module
,
"generateRandomNumbers"
);
timeShiftKernel
=
context
.
getKernel
(
module
,
"timeShiftVelocities"
);
}
}
CudaIntegrationUtilities
::~
CudaIntegrationUtilities
()
{
CudaIntegrationUtilities
::~
CudaIntegrationUtilities
()
{
context
.
setAsCurrent
();
context
.
setAsCurrent
();
if
(
ccmaConvergedMemory
!=
NULL
)
if
(
ccmaConvergedMemory
!=
NULL
)
{
cuMemFreeHost
(
ccmaConvergedMemory
);
cuMemFreeHost
(
ccmaConvergedMemory
);
}
cuEventDestroy
(
ccmaEvent
);
void
CudaIntegrationUtilities
::
setNextStepSize
(
double
size
)
{
if
(
size
!=
lastStepSize
.
x
||
size
!=
lastStepSize
.
y
)
{
lastStepSize
=
make_double2
(
size
,
size
);
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
stepSize
.
upload
(
&
lastStepSize
);
else
{
float2
lastStepSizeFloat
=
make_float2
((
float
)
size
,
(
float
)
size
);
stepSize
.
upload
(
&
lastStepSizeFloat
);
}
}
}
}
}
double
CudaIntegrationUtilities
::
getLastStepSize
()
{
CudaArray
&
CudaIntegrationUtilities
::
getPosDelta
()
{
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
return
dynamic_cast
<
CudaContext
&>
(
context
).
unwrap
(
posDelta
);
stepSize
.
download
(
&
lastStepSize
);
else
{
float2
lastStepSizeFloat
;
stepSize
.
download
(
&
lastStepSizeFloat
);
lastStepSize
=
make_double2
(
lastStepSizeFloat
.
x
,
lastStepSizeFloat
.
y
);
}
return
lastStepSize
.
y
;
}
}
void
CudaIntegrationUtilities
::
applyConstraints
(
double
tol
)
{
CudaArray
&
CudaIntegrationUtilities
::
getRandom
(
)
{
applyConstraints
(
false
,
tol
);
return
dynamic_cast
<
CudaContext
&>
(
context
).
unwrap
(
random
);
}
}
void
CudaIntegrationUtilities
::
applyVelocityConstraints
(
double
tol
)
{
CudaArray
&
CudaIntegrationUtilities
::
getStepSize
(
)
{
applyConstraints
(
true
,
tol
);
return
dynamic_cast
<
CudaContext
&>
(
context
).
unwrap
(
stepSize
);
}
}
void
CudaIntegrationUtilities
::
applyConstraints
(
bool
constrainVelocities
,
double
tol
)
{
void
CudaIntegrationUtilities
::
applyConstraints
Impl
(
bool
constrainVelocities
,
double
tol
)
{
C
Ufunction
settleKernel
,
shakeKernel
,
ccmaForceKernel
;
C
omputeKernel
settleKernel
,
shakeKernel
,
ccmaForceKernel
;
if
(
constrainVelocities
)
{
if
(
constrainVelocities
)
{
settleKernel
=
settleVelKernel
;
settleKernel
=
settleVelKernel
;
shakeKernel
=
shakeVelKernel
;
shakeKernel
=
shakeVelKernel
;
...
@@ -588,45 +77,39 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
...
@@ -588,45 +77,39 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
shakeKernel
=
shakePosKernel
;
shakeKernel
=
shakePosKernel
;
ccmaForceKernel
=
ccmaPosForceKernel
;
ccmaForceKernel
=
ccmaPosForceKernel
;
}
}
float
floatTol
=
(
float
)
tol
;
void
*
tolPointer
=
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
()
?
(
void
*
)
&
tol
:
(
void
*
)
&
floatTol
);
CUdeviceptr
posCorrection
=
(
context
.
getUseMixedPrecision
()
?
context
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
if
(
settleAtoms
.
isInitialized
())
{
if
(
settleAtoms
.
isInitialized
())
{
i
nt
numClusters
=
settleAtoms
.
getSize
()
;
i
f
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
()
)
void
*
args
[]
=
{
&
numClusters
,
tolPointer
,
&
context
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
settleKernel
->
setArg
(
1
,
tol
);
&
posDelta
.
getDevicePointer
(),
&
context
.
getVelm
().
getDevicePointer
(),
else
&
settle
Atoms
.
getDevicePointer
(),
&
settleParams
.
getDevicePointer
()}
;
settle
Kernel
->
setArg
(
1
,
(
float
)
tol
)
;
context
.
executeKernel
(
settleKernel
,
args
,
settleAtoms
.
getSize
());
settleKernel
->
execute
(
settleAtoms
.
getSize
());
}
}
if
(
shakeAtoms
.
isInitialized
())
{
if
(
shakeAtoms
.
isInitialized
())
{
i
nt
numClusters
=
shakeAtoms
.
getSize
()
;
i
f
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
()
)
void
*
args
[]
=
{
&
numClusters
,
tolPointer
,
&
context
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
shakeKernel
->
setArg
(
1
,
tol
);
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
.
getDevicePointer
(),
else
&
shakeAtoms
.
getDevicePointer
(),
&
shakeParams
.
getDevicePointer
()}
;
shakeKernel
->
setArg
(
1
,
(
float
)
tol
)
;
context
.
executeKernel
(
shakeKernel
,
args
,
shakeAtoms
.
getSize
());
shakeKernel
->
execute
(
shakeAtoms
.
getSize
());
}
}
if
(
ccmaAtoms
.
isInitialized
())
{
if
(
ccmaAtoms
.
isInitialized
())
{
void
*
directionsArgs
[]
=
{
&
ccmaAtoms
.
getDevicePointer
(),
&
ccmaDistance
.
getDevicePointer
(),
&
context
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
ccmaConverged
.
getDevicePointer
()};
ccmaForceKernel
->
setArg
(
6
,
ccmaConvergedDeviceMemory
);
context
.
executeKernel
(
ccmaDirectionsKernel
,
directionsArgs
,
ccmaAtoms
.
getSize
());
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
int
i
;
ccmaForceKernel
->
setArg
(
7
,
tol
);
void
*
forceArgs
[]
=
{
&
ccmaAtoms
.
getDevicePointer
(),
&
ccmaDistance
.
getDevicePointer
(),
else
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
.
getDevicePointer
(),
ccmaForceKernel
->
setArg
(
7
,
(
float
)
tol
);
&
ccmaReducedMass
.
getDevicePointer
(),
&
ccmaDelta1
.
getDevicePointer
(),
&
ccmaConverged
.
getDevicePointer
(),
ccmaDirectionsKernel
->
execute
(
ccmaAtoms
.
getSize
());
&
ccmaConvergedDeviceMemory
,
tolPointer
,
&
i
};
void
*
multiplyArgs
[]
=
{
&
ccmaDelta1
.
getDevicePointer
(),
&
ccmaDelta2
.
getDevicePointer
(),
&
ccmaConstraintMatrixColumn
.
getDevicePointer
(),
&
ccmaConstraintMatrixValue
.
getDevicePointer
(),
&
ccmaConverged
.
getDevicePointer
(),
&
i
};
void
*
updateArgs
[]
=
{
&
ccmaNumAtomConstraints
.
getDevicePointer
(),
&
ccmaAtomConstraints
.
getDevicePointer
(),
&
ccmaDistance
.
getDevicePointer
(),
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
.
getDevicePointer
(),
&
context
.
getVelm
().
getDevicePointer
(),
&
ccmaDelta1
.
getDevicePointer
(),
&
ccmaDelta2
.
getDevicePointer
(),
&
ccmaConverged
.
getDevicePointer
(),
&
i
};
const
int
checkInterval
=
4
;
const
int
checkInterval
=
4
;
ccmaConvergedMemory
[
0
]
=
0
;
ccmaConvergedMemory
[
0
]
=
0
;
for
(
i
=
0
;
i
<
150
;
i
++
)
{
ccmaUpdateKernel
->
setArg
(
3
,
constrainVelocities
?
context
.
getVelm
()
:
posDelta
);
context
.
executeKernel
(
ccmaForceKernel
,
forceArgs
,
ccmaAtoms
.
getSize
());
for
(
int
i
=
0
;
i
<
150
;
i
++
)
{
ccmaForceKernel
->
setArg
(
8
,
i
);
ccmaForceKernel
->
execute
(
ccmaAtoms
.
getSize
());
if
((
i
+
1
)
%
checkInterval
==
0
)
if
((
i
+
1
)
%
checkInterval
==
0
)
CHECK_RESULT2
(
cuEventRecord
(
ccmaEvent
,
0
),
"Error recording event for CCMA"
);
CHECK_RESULT2
(
cuEventRecord
(
ccmaEvent
,
0
),
"Error recording event for CCMA"
);
context
.
executeKernel
(
ccmaMultiplyKernel
,
multiplyArgs
,
ccmaAtoms
.
getSize
());
ccmaMultiplyKernel
->
setArg
(
5
,
i
);
context
.
executeKernel
(
ccmaUpdateKernel
,
updateArgs
,
context
.
getNumAtoms
());
ccmaMultiplyKernel
->
execute
(
ccmaAtoms
.
getSize
());
ccmaUpdateKernel
->
setArg
(
8
,
i
);
ccmaUpdateKernel
->
execute
(
context
.
getNumAtoms
());
if
((
i
+
1
)
%
checkInterval
==
0
)
{
if
((
i
+
1
)
%
checkInterval
==
0
)
{
CHECK_RESULT2
(
cuEventSynchronize
(
ccmaEvent
),
"Error synchronizing on event for CCMA"
);
CHECK_RESULT2
(
cuEventSynchronize
(
ccmaEvent
),
"Error synchronizing on event for CCMA"
);
if
(
ccmaConvergedMemory
[
0
])
if
(
ccmaConvergedMemory
[
0
])
...
@@ -636,142 +119,9 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
...
@@ -636,142 +119,9 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
}
}
}
}
void
CudaIntegrationUtilities
::
computeVirtualSites
()
{
if
(
numVsites
>
0
)
{
CUdeviceptr
posCorrection
=
(
context
.
getUseMixedPrecision
()
?
context
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
void
*
args
[]
=
{
&
context
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
vsite2AvgAtoms
.
getDevicePointer
(),
&
vsite2AvgWeights
.
getDevicePointer
(),
&
vsite3AvgAtoms
.
getDevicePointer
(),
&
vsite3AvgWeights
.
getDevicePointer
(),
&
vsiteOutOfPlaneAtoms
.
getDevicePointer
(),
&
vsiteOutOfPlaneWeights
.
getDevicePointer
(),
&
vsiteLocalCoordsIndex
.
getDevicePointer
(),
&
vsiteLocalCoordsAtoms
.
getDevicePointer
(),
&
vsiteLocalCoordsWeights
.
getDevicePointer
(),
&
vsiteLocalCoordsPos
.
getDevicePointer
(),
&
vsiteLocalCoordsStartIndex
.
getDevicePointer
()};
context
.
executeKernel
(
vsitePositionKernel
,
args
,
numVsites
);
}
}
void
CudaIntegrationUtilities
::
distributeForcesFromVirtualSites
()
{
void
CudaIntegrationUtilities
::
distributeForcesFromVirtualSites
()
{
if
(
numVsites
>
0
)
{
if
(
numVsites
>
0
)
{
CUdeviceptr
posCorrection
=
(
context
.
getUseMixedPrecision
()
?
context
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
vsiteForceKernel
->
setArg
(
2
,
context
.
getLongForceBuffer
());
void
*
args
[]
=
{
&
context
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
context
.
getForce
().
getDevicePointer
(),
vsiteForceKernel
->
execute
(
numVsites
);
&
vsite2AvgAtoms
.
getDevicePointer
(),
&
vsite2AvgWeights
.
getDevicePointer
(),
&
vsite3AvgAtoms
.
getDevicePointer
(),
&
vsite3AvgWeights
.
getDevicePointer
(),
&
vsiteOutOfPlaneAtoms
.
getDevicePointer
(),
&
vsiteOutOfPlaneWeights
.
getDevicePointer
(),
&
vsiteLocalCoordsIndex
.
getDevicePointer
(),
&
vsiteLocalCoordsAtoms
.
getDevicePointer
(),
&
vsiteLocalCoordsWeights
.
getDevicePointer
(),
&
vsiteLocalCoordsPos
.
getDevicePointer
(),
&
vsiteLocalCoordsStartIndex
.
getDevicePointer
()};
context
.
executeKernel
(
vsiteForceKernel
,
args
,
numVsites
);
}
}
void
CudaIntegrationUtilities
::
initRandomNumberGenerator
(
unsigned
int
randomNumberSeed
)
{
if
(
random
.
isInitialized
())
{
if
(
randomNumberSeed
!=
lastSeed
)
throw
OpenMMException
(
"CudaIntegrationUtilities::initRandomNumberGenerator(): Requested two different values for the random number seed"
);
return
;
}
// Create the random number arrays.
lastSeed
=
randomNumberSeed
;
random
.
initialize
<
float4
>
(
context
,
4
*
context
.
getPaddedNumAtoms
(),
"random"
);
randomSeed
.
initialize
<
int4
>
(
context
,
context
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
"randomSeed"
);
randomPos
=
random
.
getSize
();
// Use a quick and dirty RNG to pick seeds for the real random number generator.
vector
<
int4
>
seed
(
randomSeed
.
getSize
());
unsigned
int
r
=
randomNumberSeed
;
if
(
r
==
0
)
r
=
(
unsigned
int
)
osrngseed
();
for
(
int
i
=
0
;
i
<
randomSeed
.
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
.
upload
(
seed
);
}
int
CudaIntegrationUtilities
::
prepareRandomNumbers
(
int
numValues
)
{
if
(
randomPos
+
numValues
<=
random
.
getSize
())
{
int
oldPos
=
randomPos
;
randomPos
+=
numValues
;
return
oldPos
;
}
if
(
numValues
>
random
.
getSize
())
random
.
resize
(
numValues
);
int
size
=
random
.
getSize
();
void
*
args
[]
=
{
&
size
,
&
random
.
getDevicePointer
(),
&
randomSeed
.
getDevicePointer
()};
context
.
executeKernel
(
randomKernel
,
args
,
random
.
getSize
());
randomPos
=
numValues
;
return
0
;
}
void
CudaIntegrationUtilities
::
createCheckpoint
(
ostream
&
stream
)
{
if
(
!
random
.
isInitialized
())
return
;
stream
.
write
((
char
*
)
&
randomPos
,
sizeof
(
int
));
vector
<
float4
>
randomVec
;
random
.
download
(
randomVec
);
stream
.
write
((
char
*
)
&
randomVec
[
0
],
sizeof
(
float4
)
*
random
.
getSize
());
vector
<
int4
>
randomSeedVec
;
randomSeed
.
download
(
randomSeedVec
);
stream
.
write
((
char
*
)
&
randomSeedVec
[
0
],
sizeof
(
int4
)
*
randomSeed
.
getSize
());
}
void
CudaIntegrationUtilities
::
loadCheckpoint
(
istream
&
stream
)
{
if
(
!
random
.
isInitialized
())
return
;
stream
.
read
((
char
*
)
&
randomPos
,
sizeof
(
int
));
vector
<
float4
>
randomVec
(
random
.
getSize
());
stream
.
read
((
char
*
)
&
randomVec
[
0
],
sizeof
(
float4
)
*
random
.
getSize
());
random
.
upload
(
randomVec
);
vector
<
int4
>
randomSeedVec
(
randomSeed
.
getSize
());
stream
.
read
((
char
*
)
&
randomSeedVec
[
0
],
sizeof
(
int4
)
*
randomSeed
.
getSize
());
randomSeed
.
upload
(
randomSeedVec
);
}
double
CudaIntegrationUtilities
::
computeKineticEnergy
(
double
timeShift
)
{
int
numParticles
=
context
.
getNumAtoms
();
if
(
timeShift
!=
0
)
{
float
timeShiftFloat
=
(
float
)
timeShift
;
void
*
timeShiftPtr
=
(
context
.
getUseDoublePrecision
()
?
(
void
*
)
&
timeShift
:
(
void
*
)
&
timeShiftFloat
);
// Copy the velocities into the posDelta array while we temporarily modify them.
context
.
getVelm
().
copyTo
(
posDelta
);
// Apply the time shift.
void
*
args
[]
=
{
&
context
.
getVelm
().
getDevicePointer
(),
&
context
.
getForce
().
getDevicePointer
(),
timeShiftPtr
};
context
.
executeKernel
(
timeShiftKernel
,
args
,
numParticles
);
applyConstraints
(
true
,
1e-4
);
}
// Compute the kinetic energy.
double
energy
=
0.0
;
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
{
vector
<
double4
>
velm
;
context
.
getVelm
().
download
(
velm
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
double4
v
=
velm
[
i
];
if
(
v
.
w
!=
0
)
energy
+=
(
v
.
x
*
v
.
x
+
v
.
y
*
v
.
y
+
v
.
z
*
v
.
z
)
/
v
.
w
;
}
}
else
{
vector
<
float4
>
velm
;
context
.
getVelm
().
download
(
velm
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
float4
v
=
velm
[
i
];
if
(
v
.
w
!=
0
)
energy
+=
(
v
.
x
*
v
.
x
+
v
.
y
*
v
.
y
+
v
.
z
*
v
.
z
)
/
v
.
w
;
}
}
}
// Restore the velocities.
if
(
timeShift
!=
0
)
posDelta
.
copyTo
(
context
.
getVelm
());
return
0.5
*
energy
;
}
}
platforms/cuda/src/CudaKernel.cpp
0 → 100644
View file @
5a06df78
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaKernel.h"
#include "openmm/common/ComputeArray.h"
#include <cstring>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
CudaKernel
::
CudaKernel
(
CudaContext
&
context
,
CUfunction
kernel
,
const
string
&
name
)
:
context
(
context
),
kernel
(
kernel
),
name
(
name
)
{
}
string
CudaKernel
::
getName
()
const
{
return
name
;
}
void
CudaKernel
::
execute
(
int
threads
,
int
blockSize
)
{
int
numArgs
=
arrayArgs
.
size
();
argPointers
.
resize
(
numArgs
);
for
(
int
i
=
0
;
i
<
numArgs
;
i
++
)
{
if
(
arrayArgs
[
i
]
!=
NULL
)
argPointers
[
i
]
=
&
arrayArgs
[
i
]
->
getDevicePointer
();
else
argPointers
[
i
]
=
&
primitiveArgs
[
i
];
}
context
.
executeKernel
(
kernel
,
argPointers
.
data
(),
threads
,
blockSize
);
}
void
CudaKernel
::
addArrayArg
(
ArrayInterface
&
value
)
{
int
index
=
arrayArgs
.
size
();
addEmptyArg
();
setArrayArg
(
index
,
value
);
}
void
CudaKernel
::
addPrimitiveArg
(
const
void
*
value
,
int
size
)
{
int
index
=
arrayArgs
.
size
();
addEmptyArg
();
setPrimitiveArg
(
index
,
value
,
size
);
}
void
CudaKernel
::
addEmptyArg
()
{
primitiveArgs
.
push_back
(
make_double4
(
0
,
0
,
0
,
0
));
arrayArgs
.
push_back
(
NULL
);
}
void
CudaKernel
::
setArrayArg
(
int
index
,
ArrayInterface
&
value
)
{
arrayArgs
[
index
]
=
&
context
.
unwrap
(
value
);
}
void
CudaKernel
::
setPrimitiveArg
(
int
index
,
const
void
*
value
,
int
size
)
{
if
(
size
>
sizeof
(
double4
))
throw
OpenMMException
(
"Unsupported value type for kernel argument"
);
memcpy
(
&
primitiveArgs
[
index
],
value
,
size
);
arrayArgs
[
index
]
=
NULL
;
}
platforms/cuda/src/CudaKernelFactory.cpp
View file @
5a06df78
...
@@ -28,6 +28,7 @@
...
@@ -28,6 +28,7 @@
#include "CudaKernels.h"
#include "CudaKernels.h"
#include "CudaParallelKernels.h"
#include "CudaParallelKernels.h"
#include "CudaPlatform.h"
#include "CudaPlatform.h"
#include "openmm/common/CommonKernels.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
#include "openmm/OpenMMException.h"
...
@@ -77,64 +78,68 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
...
@@ -77,64 +78,68 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
if
(
name
==
VirtualSitesKernel
::
Name
())
if
(
name
==
VirtualSitesKernel
::
Name
())
return
new
CudaVirtualSitesKernel
(
name
,
platform
,
cu
);
return
new
CudaVirtualSitesKernel
(
name
,
platform
,
cu
);
if
(
name
==
CalcHarmonicBondForceKernel
::
Name
())
if
(
name
==
CalcHarmonicBondForceKernel
::
Name
())
return
new
C
uda
CalcHarmonicBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
C
ommon
CalcHarmonicBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCustomBondForceKernel
::
Name
())
if
(
name
==
CalcCustomBondForceKernel
::
Name
())
return
new
C
uda
CalcCustomBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
C
ommon
CalcCustomBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcHarmonicAngleForceKernel
::
Name
())
if
(
name
==
CalcHarmonicAngleForceKernel
::
Name
())
return
new
C
uda
CalcHarmonicAngleForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
C
ommon
CalcHarmonicAngleForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCustomAngleForceKernel
::
Name
())
if
(
name
==
CalcCustomAngleForceKernel
::
Name
())
return
new
C
uda
CalcCustomAngleForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
C
ommon
CalcCustomAngleForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcPeriodicTorsionForceKernel
::
Name
())
if
(
name
==
CalcPeriodicTorsionForceKernel
::
Name
())
return
new
C
uda
CalcPeriodicTorsionForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
C
ommon
CalcPeriodicTorsionForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcRBTorsionForceKernel
::
Name
())
if
(
name
==
CalcRBTorsionForceKernel
::
Name
())
return
new
C
uda
CalcRBTorsionForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
C
ommon
CalcRBTorsionForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCMAPTorsionForceKernel
::
Name
())
if
(
name
==
CalcCMAPTorsionForceKernel
::
Name
())
return
new
C
uda
CalcCMAPTorsionForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
C
ommon
CalcCMAPTorsionForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCustomTorsionForceKernel
::
Name
())
if
(
name
==
CalcCustomTorsionForceKernel
::
Name
())
return
new
C
uda
CalcCustomTorsionForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
C
ommon
CalcCustomTorsionForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcNonbondedForceKernel
::
Name
())
if
(
name
==
CalcNonbondedForceKernel
::
Name
())
return
new
CudaCalcNonbondedForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
CudaCalcNonbondedForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCustomNonbondedForceKernel
::
Name
())
if
(
name
==
CalcCustomNonbondedForceKernel
::
Name
())
return
new
C
uda
CalcCustomNonbondedForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
C
ommon
CalcCustomNonbondedForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcGBSAOBCForceKernel
::
Name
())
if
(
name
==
CalcGBSAOBCForceKernel
::
Name
())
return
new
C
uda
CalcGBSAOBCForceKernel
(
name
,
platform
,
cu
);
return
new
C
ommon
CalcGBSAOBCForceKernel
(
name
,
platform
,
cu
);
if
(
name
==
CalcCustomGBForceKernel
::
Name
())
if
(
name
==
CalcCustomGBForceKernel
::
Name
())
return
new
C
uda
CalcCustomGBForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
C
ommon
CalcCustomGBForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCustomExternalForceKernel
::
Name
())
if
(
name
==
CalcCustomExternalForceKernel
::
Name
())
return
new
C
uda
CalcCustomExternalForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
C
ommon
CalcCustomExternalForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCustomHbondForceKernel
::
Name
())
if
(
name
==
CalcCustomHbondForceKernel
::
Name
())
return
new
C
uda
CalcCustomHbondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
C
ommon
CalcCustomHbondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCustomCentroidBondForceKernel
::
Name
())
if
(
name
==
CalcCustomCentroidBondForceKernel
::
Name
())
return
new
C
uda
CalcCustomCentroidBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
C
ommon
CalcCustomCentroidBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCustomCompoundBondForceKernel
::
Name
())
if
(
name
==
CalcCustomCompoundBondForceKernel
::
Name
())
return
new
C
uda
CalcCustomCompoundBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
C
ommon
CalcCustomCompoundBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCustomCVForceKernel
::
Name
())
if
(
name
==
CalcCustomCVForceKernel
::
Name
())
return
new
CudaCalcCustomCVForceKernel
(
name
,
platform
,
cu
);
return
new
CudaCalcCustomCVForceKernel
(
name
,
platform
,
cu
);
if
(
name
==
CalcRMSDForceKernel
::
Name
())
if
(
name
==
CalcRMSDForceKernel
::
Name
())
return
new
C
uda
CalcRMSDForceKernel
(
name
,
platform
,
cu
);
return
new
C
ommon
CalcRMSDForceKernel
(
name
,
platform
,
cu
);
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
return
new
C
uda
CalcCustomManyParticleForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
C
ommon
CalcCustomManyParticleForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcGayBerneForceKernel
::
Name
())
if
(
name
==
CalcGayBerneForceKernel
::
Name
())
return
new
C
uda
CalcGayBerneForceKernel
(
name
,
platform
,
cu
);
return
new
C
ommon
CalcGayBerneForceKernel
(
name
,
platform
,
cu
);
if
(
name
==
IntegrateVerletStepKernel
::
Name
())
if
(
name
==
IntegrateVerletStepKernel
::
Name
())
return
new
C
uda
IntegrateVerletStepKernel
(
name
,
platform
,
cu
);
return
new
C
ommon
IntegrateVerletStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
IntegrateLangevinStepKernel
::
Name
())
if
(
name
==
IntegrateLangevinStepKernel
::
Name
())
return
new
C
uda
IntegrateLangevinStepKernel
(
name
,
platform
,
cu
);
return
new
C
ommon
IntegrateLangevinStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
Integrate
BAOAB
StepKernel
::
Name
())
if
(
name
==
Integrate
LangevinMiddle
StepKernel
::
Name
())
return
new
C
uda
Integrate
BAOAB
StepKernel
(
name
,
platform
,
cu
);
return
new
C
ommon
Integrate
LangevinMiddle
StepKernel
(
name
,
platform
,
cu
);
if
(
name
==
IntegrateBrownianStepKernel
::
Name
())
if
(
name
==
IntegrateBrownianStepKernel
::
Name
())
return
new
C
uda
IntegrateBrownianStepKernel
(
name
,
platform
,
cu
);
return
new
C
ommon
IntegrateBrownianStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
IntegrateVariableVerletStepKernel
::
Name
())
if
(
name
==
IntegrateVariableVerletStepKernel
::
Name
())
return
new
C
uda
IntegrateVariableVerletStepKernel
(
name
,
platform
,
cu
);
return
new
C
ommon
IntegrateVariableVerletStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
IntegrateVariableLangevinStepKernel
::
Name
())
if
(
name
==
IntegrateVariableLangevinStepKernel
::
Name
())
return
new
C
uda
IntegrateVariableLangevinStepKernel
(
name
,
platform
,
cu
);
return
new
C
ommon
IntegrateVariableLangevinStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
IntegrateCustomStepKernel
::
Name
())
if
(
name
==
IntegrateCustomStepKernel
::
Name
())
return
new
C
uda
IntegrateCustomStepKernel
(
name
,
platform
,
cu
);
return
new
C
ommon
IntegrateCustomStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
ApplyAndersenThermostatKernel
::
Name
())
if
(
name
==
ApplyAndersenThermostatKernel
::
Name
())
return
new
CudaApplyAndersenThermostatKernel
(
name
,
platform
,
cu
);
return
new
CommonApplyAndersenThermostatKernel
(
name
,
platform
,
cu
);
if
(
name
==
NoseHooverChainKernel
::
Name
())
return
new
CudaNoseHooverChainKernel
(
name
,
platform
,
cu
);
if
(
name
==
IntegrateVelocityVerletStepKernel
::
Name
())
return
new
CudaIntegrateVelocityVerletStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
ApplyMonteCarloBarostatKernel
::
Name
())
if
(
name
==
ApplyMonteCarloBarostatKernel
::
Name
())
return
new
CudaApplyMonteCarloBarostatKernel
(
name
,
platform
,
cu
);
return
new
CudaApplyMonteCarloBarostatKernel
(
name
,
platform
,
cu
);
if
(
name
==
RemoveCMMotionKernel
::
Name
())
if
(
name
==
RemoveCMMotionKernel
::
Name
())
return
new
C
uda
RemoveCMMotionKernel
(
name
,
platform
,
cu
);
return
new
C
ommon
RemoveCMMotionKernel
(
name
,
platform
,
cu
);
throw
OpenMMException
((
std
::
string
(
"Tried to create kernel with illegal kernel name '"
)
+
name
+
"'"
).
c_str
());
throw
OpenMMException
((
std
::
string
(
"Tried to create kernel with illegal kernel name '"
)
+
name
+
"'"
).
c_str
());
}
}
platforms/cuda/src/CudaKernelSources.h.in
View file @
5a06df78
...
@@ -27,7 +27,7 @@
...
@@ -27,7 +27,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "windowsExportC
uda
.h"
#include "
openmm/common/
windowsExportC
ommon
.h"
#include <string>
#include <string>
namespace OpenMM {
namespace OpenMM {
...
@@ -38,9 +38,9 @@ namespace OpenMM {
...
@@ -38,9 +38,9 @@ namespace OpenMM {
* kernels subfolder.
* kernels subfolder.
*/
*/
class OPENMM_EXPORT_C
UDA
CudaKernelSources {
class OPENMM_EXPORT_C
OMMON
CudaKernelSources {
public:
public:
@
CUDA
_FILE_DECLARATIONS@
@
KERNEL
_FILE_DECLARATIONS@
};
};
} // namespace OpenMM
} // namespace OpenMM
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
5a06df78
This source diff could not be displayed because it is too large. You can
view the blob
instead.
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
5a06df78
...
@@ -27,6 +27,7 @@
...
@@ -27,6 +27,7 @@
#include "openmm/OpenMMException.h"
#include "openmm/OpenMMException.h"
#include "CudaNonbondedUtilities.h"
#include "CudaNonbondedUtilities.h"
#include "CudaArray.h"
#include "CudaArray.h"
#include "CudaContext.h"
#include "CudaKernelSources.h"
#include "CudaKernelSources.h"
#include "CudaExpressionUtilities.h"
#include "CudaExpressionUtilities.h"
#include "CudaSort.h"
#include "CudaSort.h"
...
@@ -84,6 +85,10 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
...
@@ -84,6 +85,10 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
cuEventDestroy
(
downloadCountEvent
);
cuEventDestroy
(
downloadCountEvent
);
}
}
void
CudaNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
)
{
addInteraction
(
usesCutoff
,
usesPeriodic
,
usesExclusions
,
cutoffDistance
,
exclusionList
,
kernel
,
forceGroup
,
false
);
}
void
CudaNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
,
bool
supportsPairList
)
{
void
CudaNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
,
bool
supportsPairList
)
{
if
(
groupCutoff
.
size
()
>
0
)
{
if
(
groupCutoff
.
size
()
>
0
)
{
if
(
usesCutoff
!=
useCutoff
)
if
(
usesCutoff
!=
useCutoff
)
...
@@ -110,10 +115,20 @@ void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic,
...
@@ -110,10 +115,20 @@ void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic,
}
}
}
}
void
CudaNonbondedUtilities
::
addParameter
(
ComputeParameterInfo
parameter
)
{
parameters
.
push_back
(
ParameterInfo
(
parameter
.
getName
(),
parameter
.
getComponentType
(),
parameter
.
getNumComponents
(),
parameter
.
getSize
(),
context
.
unwrap
(
parameter
.
getArray
()).
getDevicePointer
()));
}
void
CudaNonbondedUtilities
::
addParameter
(
const
ParameterInfo
&
parameter
)
{
void
CudaNonbondedUtilities
::
addParameter
(
const
ParameterInfo
&
parameter
)
{
parameters
.
push_back
(
parameter
);
parameters
.
push_back
(
parameter
);
}
}
void
CudaNonbondedUtilities
::
addArgument
(
ComputeParameterInfo
parameter
)
{
arguments
.
push_back
(
ParameterInfo
(
parameter
.
getName
(),
parameter
.
getComponentType
(),
parameter
.
getNumComponents
(),
parameter
.
getSize
(),
context
.
unwrap
(
parameter
.
getArray
()).
getDevicePointer
()));
}
void
CudaNonbondedUtilities
::
addArgument
(
const
ParameterInfo
&
parameter
)
{
void
CudaNonbondedUtilities
::
addArgument
(
const
ParameterInfo
&
parameter
)
{
arguments
.
push_back
(
parameter
);
arguments
.
push_back
(
parameter
);
}
}
...
...
platforms/cuda/src/CudaParallelKernels.cpp
View file @
5a06df78
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
9
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -200,7 +200,7 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex
...
@@ -200,7 +200,7 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
data
.
contextEnergy
[
i
]
=
0.0
;
data
.
contextEnergy
[
i
]
=
0.0
;
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
C
uda
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
C
ompute
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
BeginComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
pinnedPositionBuffer
,
event
,
interactionCounts
[
i
]));
thread
.
addTask
(
new
BeginComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
pinnedPositionBuffer
,
event
,
interactionCounts
[
i
]));
}
}
}
}
...
@@ -208,7 +208,7 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex
...
@@ -208,7 +208,7 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex
double
CudaParallelCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
)
{
double
CudaParallelCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
C
uda
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
C
ompute
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceBuffer
,
contextForces
,
valid
,
interactionCounts
[
i
]));
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceBuffer
,
contextForces
,
valid
,
interactionCounts
[
i
]));
}
}
data
.
syncContexts
();
data
.
syncContexts
();
...
@@ -255,7 +255,7 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con
...
@@ -255,7 +255,7 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con
class
CudaParallelCalcHarmonicBondForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
class
CudaParallelCalcHarmonicBondForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
public:
Task
(
ContextImpl
&
context
,
C
uda
CalcHarmonicBondForceKernel
&
kernel
,
bool
includeForce
,
Task
(
ContextImpl
&
context
,
C
ommon
CalcHarmonicBondForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
}
...
@@ -264,7 +264,7 @@ public:
...
@@ -264,7 +264,7 @@ public:
}
}
private:
private:
ContextImpl
&
context
;
ContextImpl
&
context
;
C
uda
CalcHarmonicBondForceKernel
&
kernel
;
C
ommon
CalcHarmonicBondForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
double
&
energy
;
};
};
...
@@ -272,7 +272,7 @@ private:
...
@@ -272,7 +272,7 @@ private:
CudaParallelCalcHarmonicBondForceKernel
::
CudaParallelCalcHarmonicBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CudaParallelCalcHarmonicBondForceKernel
::
CudaParallelCalcHarmonicBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CalcHarmonicBondForceKernel
(
name
,
platform
),
data
(
data
)
{
CalcHarmonicBondForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
C
uda
CalcHarmonicBondForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
kernels
.
push_back
(
Kernel
(
new
C
ommon
CalcHarmonicBondForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
}
void
CudaParallelCalcHarmonicBondForceKernel
::
initialize
(
const
System
&
system
,
const
HarmonicBondForce
&
force
)
{
void
CudaParallelCalcHarmonicBondForceKernel
::
initialize
(
const
System
&
system
,
const
HarmonicBondForce
&
force
)
{
...
@@ -283,7 +283,7 @@ void CudaParallelCalcHarmonicBondForceKernel::initialize(const System& system, c
...
@@ -283,7 +283,7 @@ void CudaParallelCalcHarmonicBondForceKernel::initialize(const System& system, c
double
CudaParallelCalcHarmonicBondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
CudaParallelCalcHarmonicBondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
C
uda
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
C
ompute
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
}
return
0.0
;
return
0.0
;
...
@@ -296,7 +296,7 @@ void CudaParallelCalcHarmonicBondForceKernel::copyParametersToContext(ContextImp
...
@@ -296,7 +296,7 @@ void CudaParallelCalcHarmonicBondForceKernel::copyParametersToContext(ContextImp
class
CudaParallelCalcCustomBondForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
class
CudaParallelCalcCustomBondForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
public:
Task
(
ContextImpl
&
context
,
C
uda
CalcCustomBondForceKernel
&
kernel
,
bool
includeForce
,
Task
(
ContextImpl
&
context
,
C
ommon
CalcCustomBondForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
}
...
@@ -305,7 +305,7 @@ public:
...
@@ -305,7 +305,7 @@ public:
}
}
private:
private:
ContextImpl
&
context
;
ContextImpl
&
context
;
C
uda
CalcCustomBondForceKernel
&
kernel
;
C
ommon
CalcCustomBondForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
double
&
energy
;
};
};
...
@@ -313,7 +313,7 @@ private:
...
@@ -313,7 +313,7 @@ private:
CudaParallelCalcCustomBondForceKernel
::
CudaParallelCalcCustomBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CudaParallelCalcCustomBondForceKernel
::
CudaParallelCalcCustomBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CalcCustomBondForceKernel
(
name
,
platform
),
data
(
data
)
{
CalcCustomBondForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
C
uda
CalcCustomBondForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
kernels
.
push_back
(
Kernel
(
new
C
ommon
CalcCustomBondForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
}
void
CudaParallelCalcCustomBondForceKernel
::
initialize
(
const
System
&
system
,
const
CustomBondForce
&
force
)
{
void
CudaParallelCalcCustomBondForceKernel
::
initialize
(
const
System
&
system
,
const
CustomBondForce
&
force
)
{
...
@@ -324,7 +324,7 @@ void CudaParallelCalcCustomBondForceKernel::initialize(const System& system, con
...
@@ -324,7 +324,7 @@ void CudaParallelCalcCustomBondForceKernel::initialize(const System& system, con
double
CudaParallelCalcCustomBondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
CudaParallelCalcCustomBondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
C
uda
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
C
ompute
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
}
return
0.0
;
return
0.0
;
...
@@ -337,7 +337,7 @@ void CudaParallelCalcCustomBondForceKernel::copyParametersToContext(ContextImpl&
...
@@ -337,7 +337,7 @@ void CudaParallelCalcCustomBondForceKernel::copyParametersToContext(ContextImpl&
class
CudaParallelCalcHarmonicAngleForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
class
CudaParallelCalcHarmonicAngleForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
public:
Task
(
ContextImpl
&
context
,
C
uda
CalcHarmonicAngleForceKernel
&
kernel
,
bool
includeForce
,
Task
(
ContextImpl
&
context
,
C
ommon
CalcHarmonicAngleForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
}
...
@@ -346,7 +346,7 @@ public:
...
@@ -346,7 +346,7 @@ public:
}
}
private:
private:
ContextImpl
&
context
;
ContextImpl
&
context
;
C
uda
CalcHarmonicAngleForceKernel
&
kernel
;
C
ommon
CalcHarmonicAngleForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
double
&
energy
;
};
};
...
@@ -354,7 +354,7 @@ private:
...
@@ -354,7 +354,7 @@ private:
CudaParallelCalcHarmonicAngleForceKernel
::
CudaParallelCalcHarmonicAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CudaParallelCalcHarmonicAngleForceKernel
::
CudaParallelCalcHarmonicAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CalcHarmonicAngleForceKernel
(
name
,
platform
),
data
(
data
)
{
CalcHarmonicAngleForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
C
uda
CalcHarmonicAngleForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
kernels
.
push_back
(
Kernel
(
new
C
ommon
CalcHarmonicAngleForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
}
void
CudaParallelCalcHarmonicAngleForceKernel
::
initialize
(
const
System
&
system
,
const
HarmonicAngleForce
&
force
)
{
void
CudaParallelCalcHarmonicAngleForceKernel
::
initialize
(
const
System
&
system
,
const
HarmonicAngleForce
&
force
)
{
...
@@ -365,7 +365,7 @@ void CudaParallelCalcHarmonicAngleForceKernel::initialize(const System& system,
...
@@ -365,7 +365,7 @@ void CudaParallelCalcHarmonicAngleForceKernel::initialize(const System& system,
double
CudaParallelCalcHarmonicAngleForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
CudaParallelCalcHarmonicAngleForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
C
uda
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
C
ompute
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
}
return
0.0
;
return
0.0
;
...
@@ -378,7 +378,7 @@ void CudaParallelCalcHarmonicAngleForceKernel::copyParametersToContext(ContextIm
...
@@ -378,7 +378,7 @@ void CudaParallelCalcHarmonicAngleForceKernel::copyParametersToContext(ContextIm
class
CudaParallelCalcCustomAngleForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
class
CudaParallelCalcCustomAngleForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
public:
Task
(
ContextImpl
&
context
,
C
uda
CalcCustomAngleForceKernel
&
kernel
,
bool
includeForce
,
Task
(
ContextImpl
&
context
,
C
ommon
CalcCustomAngleForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
}
...
@@ -387,7 +387,7 @@ public:
...
@@ -387,7 +387,7 @@ public:
}
}
private:
private:
ContextImpl
&
context
;
ContextImpl
&
context
;
C
uda
CalcCustomAngleForceKernel
&
kernel
;
C
ommon
CalcCustomAngleForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
double
&
energy
;
};
};
...
@@ -395,7 +395,7 @@ private:
...
@@ -395,7 +395,7 @@ private:
CudaParallelCalcCustomAngleForceKernel
::
CudaParallelCalcCustomAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CudaParallelCalcCustomAngleForceKernel
::
CudaParallelCalcCustomAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CalcCustomAngleForceKernel
(
name
,
platform
),
data
(
data
)
{
CalcCustomAngleForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
C
uda
CalcCustomAngleForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
kernels
.
push_back
(
Kernel
(
new
C
ommon
CalcCustomAngleForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
}
void
CudaParallelCalcCustomAngleForceKernel
::
initialize
(
const
System
&
system
,
const
CustomAngleForce
&
force
)
{
void
CudaParallelCalcCustomAngleForceKernel
::
initialize
(
const
System
&
system
,
const
CustomAngleForce
&
force
)
{
...
@@ -406,7 +406,7 @@ void CudaParallelCalcCustomAngleForceKernel::initialize(const System& system, co
...
@@ -406,7 +406,7 @@ void CudaParallelCalcCustomAngleForceKernel::initialize(const System& system, co
double
CudaParallelCalcCustomAngleForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
CudaParallelCalcCustomAngleForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
C
uda
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
C
ompute
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
}
return
0.0
;
return
0.0
;
...
@@ -419,7 +419,7 @@ void CudaParallelCalcCustomAngleForceKernel::copyParametersToContext(ContextImpl
...
@@ -419,7 +419,7 @@ void CudaParallelCalcCustomAngleForceKernel::copyParametersToContext(ContextImpl
class
CudaParallelCalcPeriodicTorsionForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
class
CudaParallelCalcPeriodicTorsionForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
public:
Task
(
ContextImpl
&
context
,
C
uda
CalcPeriodicTorsionForceKernel
&
kernel
,
bool
includeForce
,
Task
(
ContextImpl
&
context
,
C
ommon
CalcPeriodicTorsionForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
}
...
@@ -428,7 +428,7 @@ public:
...
@@ -428,7 +428,7 @@ public:
}
}
private:
private:
ContextImpl
&
context
;
ContextImpl
&
context
;
C
uda
CalcPeriodicTorsionForceKernel
&
kernel
;
C
ommon
CalcPeriodicTorsionForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
double
&
energy
;
};
};
...
@@ -436,7 +436,7 @@ private:
...
@@ -436,7 +436,7 @@ private:
CudaParallelCalcPeriodicTorsionForceKernel
::
CudaParallelCalcPeriodicTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CudaParallelCalcPeriodicTorsionForceKernel
::
CudaParallelCalcPeriodicTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CalcPeriodicTorsionForceKernel
(
name
,
platform
),
data
(
data
)
{
CalcPeriodicTorsionForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
C
uda
CalcPeriodicTorsionForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
kernels
.
push_back
(
Kernel
(
new
C
ommon
CalcPeriodicTorsionForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
}
void
CudaParallelCalcPeriodicTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
PeriodicTorsionForce
&
force
)
{
void
CudaParallelCalcPeriodicTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
PeriodicTorsionForce
&
force
)
{
...
@@ -447,7 +447,7 @@ void CudaParallelCalcPeriodicTorsionForceKernel::initialize(const System& system
...
@@ -447,7 +447,7 @@ void CudaParallelCalcPeriodicTorsionForceKernel::initialize(const System& system
double
CudaParallelCalcPeriodicTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
CudaParallelCalcPeriodicTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
C
uda
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
C
ompute
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
}
return
0.0
;
return
0.0
;
...
@@ -460,7 +460,7 @@ void CudaParallelCalcPeriodicTorsionForceKernel::copyParametersToContext(Context
...
@@ -460,7 +460,7 @@ void CudaParallelCalcPeriodicTorsionForceKernel::copyParametersToContext(Context
class
CudaParallelCalcRBTorsionForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
class
CudaParallelCalcRBTorsionForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
public:
Task
(
ContextImpl
&
context
,
C
uda
CalcRBTorsionForceKernel
&
kernel
,
bool
includeForce
,
Task
(
ContextImpl
&
context
,
C
ommon
CalcRBTorsionForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
}
...
@@ -469,7 +469,7 @@ public:
...
@@ -469,7 +469,7 @@ public:
}
}
private:
private:
ContextImpl
&
context
;
ContextImpl
&
context
;
C
uda
CalcRBTorsionForceKernel
&
kernel
;
C
ommon
CalcRBTorsionForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
double
&
energy
;
};
};
...
@@ -477,7 +477,7 @@ private:
...
@@ -477,7 +477,7 @@ private:
CudaParallelCalcRBTorsionForceKernel
::
CudaParallelCalcRBTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CudaParallelCalcRBTorsionForceKernel
::
CudaParallelCalcRBTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CalcRBTorsionForceKernel
(
name
,
platform
),
data
(
data
)
{
CalcRBTorsionForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
C
uda
CalcRBTorsionForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
kernels
.
push_back
(
Kernel
(
new
C
ommon
CalcRBTorsionForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
}
void
CudaParallelCalcRBTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
RBTorsionForce
&
force
)
{
void
CudaParallelCalcRBTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
RBTorsionForce
&
force
)
{
...
@@ -488,7 +488,7 @@ void CudaParallelCalcRBTorsionForceKernel::initialize(const System& system, cons
...
@@ -488,7 +488,7 @@ void CudaParallelCalcRBTorsionForceKernel::initialize(const System& system, cons
double
CudaParallelCalcRBTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
CudaParallelCalcRBTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
C
uda
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
C
ompute
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
}
return
0.0
;
return
0.0
;
...
@@ -501,7 +501,7 @@ void CudaParallelCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl&
...
@@ -501,7 +501,7 @@ void CudaParallelCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl&
class
CudaParallelCalcCMAPTorsionForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
class
CudaParallelCalcCMAPTorsionForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
public:
Task
(
ContextImpl
&
context
,
C
uda
CalcCMAPTorsionForceKernel
&
kernel
,
bool
includeForce
,
Task
(
ContextImpl
&
context
,
C
ommon
CalcCMAPTorsionForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
}
...
@@ -510,7 +510,7 @@ public:
...
@@ -510,7 +510,7 @@ public:
}
}
private:
private:
ContextImpl
&
context
;
ContextImpl
&
context
;
C
uda
CalcCMAPTorsionForceKernel
&
kernel
;
C
ommon
CalcCMAPTorsionForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
double
&
energy
;
};
};
...
@@ -518,7 +518,7 @@ private:
...
@@ -518,7 +518,7 @@ private:
CudaParallelCalcCMAPTorsionForceKernel
::
CudaParallelCalcCMAPTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CudaParallelCalcCMAPTorsionForceKernel
::
CudaParallelCalcCMAPTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CalcCMAPTorsionForceKernel
(
name
,
platform
),
data
(
data
)
{
CalcCMAPTorsionForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
C
uda
CalcCMAPTorsionForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
kernels
.
push_back
(
Kernel
(
new
C
ommon
CalcCMAPTorsionForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
}
void
CudaParallelCalcCMAPTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
CMAPTorsionForce
&
force
)
{
void
CudaParallelCalcCMAPTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
CMAPTorsionForce
&
force
)
{
...
@@ -529,7 +529,7 @@ void CudaParallelCalcCMAPTorsionForceKernel::initialize(const System& system, co
...
@@ -529,7 +529,7 @@ void CudaParallelCalcCMAPTorsionForceKernel::initialize(const System& system, co
double
CudaParallelCalcCMAPTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
CudaParallelCalcCMAPTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
C
uda
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
C
ompute
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
}
return
0.0
;
return
0.0
;
...
@@ -542,7 +542,7 @@ void CudaParallelCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl
...
@@ -542,7 +542,7 @@ void CudaParallelCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl
class
CudaParallelCalcCustomTorsionForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
class
CudaParallelCalcCustomTorsionForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
public:
Task
(
ContextImpl
&
context
,
C
uda
CalcCustomTorsionForceKernel
&
kernel
,
bool
includeForce
,
Task
(
ContextImpl
&
context
,
C
ommon
CalcCustomTorsionForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
}
...
@@ -551,7 +551,7 @@ public:
...
@@ -551,7 +551,7 @@ public:
}
}
private:
private:
ContextImpl
&
context
;
ContextImpl
&
context
;
C
uda
CalcCustomTorsionForceKernel
&
kernel
;
C
ommon
CalcCustomTorsionForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
double
&
energy
;
};
};
...
@@ -559,7 +559,7 @@ private:
...
@@ -559,7 +559,7 @@ private:
CudaParallelCalcCustomTorsionForceKernel
::
CudaParallelCalcCustomTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CudaParallelCalcCustomTorsionForceKernel
::
CudaParallelCalcCustomTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CalcCustomTorsionForceKernel
(
name
,
platform
),
data
(
data
)
{
CalcCustomTorsionForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
C
uda
CalcCustomTorsionForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
kernels
.
push_back
(
Kernel
(
new
C
ommon
CalcCustomTorsionForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
}
void
CudaParallelCalcCustomTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
CustomTorsionForce
&
force
)
{
void
CudaParallelCalcCustomTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
CustomTorsionForce
&
force
)
{
...
@@ -570,7 +570,7 @@ void CudaParallelCalcCustomTorsionForceKernel::initialize(const System& system,
...
@@ -570,7 +570,7 @@ void CudaParallelCalcCustomTorsionForceKernel::initialize(const System& system,
double
CudaParallelCalcCustomTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
CudaParallelCalcCustomTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
C
uda
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
C
ompute
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
}
return
0.0
;
return
0.0
;
...
@@ -611,7 +611,7 @@ void CudaParallelCalcNonbondedForceKernel::initialize(const System& system, cons
...
@@ -611,7 +611,7 @@ void CudaParallelCalcNonbondedForceKernel::initialize(const System& system, cons
double
CudaParallelCalcNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
bool
includeDirect
,
bool
includeReciprocal
)
{
double
CudaParallelCalcNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
bool
includeDirect
,
bool
includeReciprocal
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
C
uda
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
C
ompute
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
includeDirect
,
includeReciprocal
,
data
.
contextEnergy
[
i
]));
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
includeDirect
,
includeReciprocal
,
data
.
contextEnergy
[
i
]));
}
}
return
0.0
;
return
0.0
;
...
@@ -632,7 +632,7 @@ void CudaParallelCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int
...
@@ -632,7 +632,7 @@ void CudaParallelCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int
class
CudaParallelCalcCustomNonbondedForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
class
CudaParallelCalcCustomNonbondedForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
public:
Task
(
ContextImpl
&
context
,
C
uda
CalcCustomNonbondedForceKernel
&
kernel
,
bool
includeForce
,
Task
(
ContextImpl
&
context
,
C
ommon
CalcCustomNonbondedForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
}
...
@@ -641,7 +641,7 @@ public:
...
@@ -641,7 +641,7 @@ public:
}
}
private:
private:
ContextImpl
&
context
;
ContextImpl
&
context
;
C
uda
CalcCustomNonbondedForceKernel
&
kernel
;
C
ommon
CalcCustomNonbondedForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
double
&
energy
;
};
};
...
@@ -649,7 +649,7 @@ private:
...
@@ -649,7 +649,7 @@ private:
CudaParallelCalcCustomNonbondedForceKernel
::
CudaParallelCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CudaParallelCalcCustomNonbondedForceKernel
::
CudaParallelCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
data
(
data
)
{
CalcCustomNonbondedForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
C
uda
CalcCustomNonbondedForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
kernels
.
push_back
(
Kernel
(
new
C
ommon
CalcCustomNonbondedForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
}
void
CudaParallelCalcCustomNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
CustomNonbondedForce
&
force
)
{
void
CudaParallelCalcCustomNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
CustomNonbondedForce
&
force
)
{
...
@@ -660,7 +660,7 @@ void CudaParallelCalcCustomNonbondedForceKernel::initialize(const System& system
...
@@ -660,7 +660,7 @@ void CudaParallelCalcCustomNonbondedForceKernel::initialize(const System& system
double
CudaParallelCalcCustomNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
CudaParallelCalcCustomNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
C
uda
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
C
ompute
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
}
return
0.0
;
return
0.0
;
...
@@ -673,7 +673,7 @@ void CudaParallelCalcCustomNonbondedForceKernel::copyParametersToContext(Context
...
@@ -673,7 +673,7 @@ void CudaParallelCalcCustomNonbondedForceKernel::copyParametersToContext(Context
class
CudaParallelCalcCustomExternalForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
class
CudaParallelCalcCustomExternalForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
public:
Task
(
ContextImpl
&
context
,
C
uda
CalcCustomExternalForceKernel
&
kernel
,
bool
includeForce
,
Task
(
ContextImpl
&
context
,
C
ommon
CalcCustomExternalForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
}
...
@@ -682,7 +682,7 @@ public:
...
@@ -682,7 +682,7 @@ public:
}
}
private:
private:
ContextImpl
&
context
;
ContextImpl
&
context
;
C
uda
CalcCustomExternalForceKernel
&
kernel
;
C
ommon
CalcCustomExternalForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
double
&
energy
;
};
};
...
@@ -690,7 +690,7 @@ private:
...
@@ -690,7 +690,7 @@ private:
CudaParallelCalcCustomExternalForceKernel
::
CudaParallelCalcCustomExternalForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CudaParallelCalcCustomExternalForceKernel
::
CudaParallelCalcCustomExternalForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CalcCustomExternalForceKernel
(
name
,
platform
),
data
(
data
)
{
CalcCustomExternalForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
C
uda
CalcCustomExternalForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
kernels
.
push_back
(
Kernel
(
new
C
ommon
CalcCustomExternalForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
}
void
CudaParallelCalcCustomExternalForceKernel
::
initialize
(
const
System
&
system
,
const
CustomExternalForce
&
force
)
{
void
CudaParallelCalcCustomExternalForceKernel
::
initialize
(
const
System
&
system
,
const
CustomExternalForce
&
force
)
{
...
@@ -701,7 +701,7 @@ void CudaParallelCalcCustomExternalForceKernel::initialize(const System& system,
...
@@ -701,7 +701,7 @@ void CudaParallelCalcCustomExternalForceKernel::initialize(const System& system,
double
CudaParallelCalcCustomExternalForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
CudaParallelCalcCustomExternalForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
C
uda
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
C
ompute
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
}
return
0.0
;
return
0.0
;
...
@@ -714,7 +714,7 @@ void CudaParallelCalcCustomExternalForceKernel::copyParametersToContext(ContextI
...
@@ -714,7 +714,7 @@ void CudaParallelCalcCustomExternalForceKernel::copyParametersToContext(ContextI
class
CudaParallelCalcCustomHbondForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
class
CudaParallelCalcCustomHbondForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
public:
Task
(
ContextImpl
&
context
,
C
uda
CalcCustomHbondForceKernel
&
kernel
,
bool
includeForce
,
Task
(
ContextImpl
&
context
,
C
ommon
CalcCustomHbondForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
}
...
@@ -723,7 +723,7 @@ public:
...
@@ -723,7 +723,7 @@ public:
}
}
private:
private:
ContextImpl
&
context
;
ContextImpl
&
context
;
C
uda
CalcCustomHbondForceKernel
&
kernel
;
C
ommon
CalcCustomHbondForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
double
&
energy
;
};
};
...
@@ -731,7 +731,7 @@ private:
...
@@ -731,7 +731,7 @@ private:
CudaParallelCalcCustomHbondForceKernel
::
CudaParallelCalcCustomHbondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CudaParallelCalcCustomHbondForceKernel
::
CudaParallelCalcCustomHbondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CalcCustomHbondForceKernel
(
name
,
platform
),
data
(
data
)
{
CalcCustomHbondForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
C
uda
CalcCustomHbondForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
kernels
.
push_back
(
Kernel
(
new
C
ommon
CalcCustomHbondForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
}
void
CudaParallelCalcCustomHbondForceKernel
::
initialize
(
const
System
&
system
,
const
CustomHbondForce
&
force
)
{
void
CudaParallelCalcCustomHbondForceKernel
::
initialize
(
const
System
&
system
,
const
CustomHbondForce
&
force
)
{
...
@@ -742,7 +742,7 @@ void CudaParallelCalcCustomHbondForceKernel::initialize(const System& system, co
...
@@ -742,7 +742,7 @@ void CudaParallelCalcCustomHbondForceKernel::initialize(const System& system, co
double
CudaParallelCalcCustomHbondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
CudaParallelCalcCustomHbondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
C
uda
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
C
ompute
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
}
return
0.0
;
return
0.0
;
...
@@ -755,7 +755,7 @@ void CudaParallelCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl
...
@@ -755,7 +755,7 @@ void CudaParallelCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl
class
CudaParallelCalcCustomCompoundBondForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
class
CudaParallelCalcCustomCompoundBondForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
public:
Task
(
ContextImpl
&
context
,
C
uda
CalcCustomCompoundBondForceKernel
&
kernel
,
bool
includeForce
,
Task
(
ContextImpl
&
context
,
C
ommon
CalcCustomCompoundBondForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
}
...
@@ -764,7 +764,7 @@ public:
...
@@ -764,7 +764,7 @@ public:
}
}
private:
private:
ContextImpl
&
context
;
ContextImpl
&
context
;
C
uda
CalcCustomCompoundBondForceKernel
&
kernel
;
C
ommon
CalcCustomCompoundBondForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
double
&
energy
;
};
};
...
@@ -772,7 +772,7 @@ private:
...
@@ -772,7 +772,7 @@ private:
CudaParallelCalcCustomCompoundBondForceKernel
::
CudaParallelCalcCustomCompoundBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CudaParallelCalcCustomCompoundBondForceKernel
::
CudaParallelCalcCustomCompoundBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
const
System
&
system
)
:
CalcCustomCompoundBondForceKernel
(
name
,
platform
),
data
(
data
)
{
CalcCustomCompoundBondForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
C
uda
CalcCustomCompoundBondForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
kernels
.
push_back
(
Kernel
(
new
C
ommon
CalcCustomCompoundBondForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
}
void
CudaParallelCalcCustomCompoundBondForceKernel
::
initialize
(
const
System
&
system
,
const
CustomCompoundBondForce
&
force
)
{
void
CudaParallelCalcCustomCompoundBondForceKernel
::
initialize
(
const
System
&
system
,
const
CustomCompoundBondForce
&
force
)
{
...
@@ -783,7 +783,7 @@ void CudaParallelCalcCustomCompoundBondForceKernel::initialize(const System& sys
...
@@ -783,7 +783,7 @@ void CudaParallelCalcCustomCompoundBondForceKernel::initialize(const System& sys
double
CudaParallelCalcCustomCompoundBondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
CudaParallelCalcCustomCompoundBondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
C
uda
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
C
ompute
Context
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
}
return
0.0
;
return
0.0
;
...
...
platforms/cuda/src/CudaParameterSet.cpp
View file @
5a06df78
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
2
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
9
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -25,174 +25,12 @@
...
@@ -25,174 +25,12 @@
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "CudaParameterSet.h"
#include "CudaParameterSet.h"
#include "openmm/OpenMMException.h"
#include <cmath>
#include <sstream>
using
namespace
OpenMM
;
using
namespace
OpenMM
;
using
namespace
std
;
using
namespace
std
;
#define CHECK_RESULT(result) \
if (result != CUDA_SUCCESS) { \
std::stringstream m; \
m<<errorMessage<<": "<<context.getErrorString(result)<<" ("<<result<<")"; \
throw OpenMMException(m.str());\
}
CudaParameterSet
::
CudaParameterSet
(
CudaContext
&
context
,
int
numParameters
,
int
numObjects
,
const
string
&
name
,
bool
bufferPerParameter
,
bool
useDoublePrecision
)
:
CudaParameterSet
::
CudaParameterSet
(
CudaContext
&
context
,
int
numParameters
,
int
numObjects
,
const
string
&
name
,
bool
bufferPerParameter
,
bool
useDoublePrecision
)
:
context
(
context
),
numParameters
(
numParameters
),
numObjects
(
numObjects
),
name
(
name
)
{
ComputeParameterSet
(
context
,
numParameters
,
numObjects
,
name
,
bufferPerParameter
,
useDoublePrecision
)
{
int
params
=
numParameters
;
for
(
auto
&
info
:
getParameterInfos
())
int
bufferCount
=
0
;
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
info
.
getName
(),
info
.
getComponentType
(),
info
.
getNumComponents
(),
info
.
getSize
(),
context
.
unwrap
(
info
.
getArray
()).
getDevicePointer
()));
elementSize
=
(
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
));
string
elementType
=
(
useDoublePrecision
?
"double"
:
"float"
);
CUdeviceptr
pointer
;
string
errorMessage
=
"Error creating parameter set "
+
name
;
if
(
!
bufferPerParameter
)
{
while
(
params
>
2
)
{
CHECK_RESULT
(
cuMemAlloc
(
&
pointer
,
numObjects
*
elementSize
*
4
));
std
::
stringstream
name
;
name
<<
"param"
<<
(
++
bufferCount
);
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
elementType
,
4
,
elementSize
*
4
,
pointer
));
params
-=
4
;
}
if
(
params
>
1
)
{
CHECK_RESULT
(
cuMemAlloc
(
&
pointer
,
numObjects
*
elementSize
*
2
));
std
::
stringstream
name
;
name
<<
"param"
<<
(
++
bufferCount
);
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
elementType
,
2
,
elementSize
*
2
,
pointer
));
params
-=
2
;
}
}
while
(
params
>
0
)
{
CHECK_RESULT
(
cuMemAlloc
(
&
pointer
,
numObjects
*
elementSize
));
std
::
stringstream
name
;
name
<<
"param"
<<
(
++
bufferCount
);
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
elementType
,
1
,
elementSize
,
pointer
));
params
--
;
}
}
CudaParameterSet
::~
CudaParameterSet
()
{
if
(
context
.
getContextIsValid
())
{
string
errorMessage
=
"Error freeing device memory"
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
CHECK_RESULT
(
cuMemFree
(
buffers
[
i
].
getMemory
()));
}
}
template
<
class
T
>
void
CudaParameterSet
::
getParameterValues
(
vector
<
vector
<
T
>
>&
values
)
{
if
(
sizeof
(
T
)
!=
elementSize
)
throw
OpenMMException
(
"Called getParameterValues() with vector of wrong type"
);
values
.
resize
(
numObjects
);
for
(
int
i
=
0
;
i
<
numObjects
;
i
++
)
values
[
i
].
resize
(
numParameters
);
int
base
=
0
;
string
errorMessage
=
"Error downloading parameter set "
+
name
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
getSize
()
==
4
*
elementSize
)
{
vector
<
T
>
data
(
4
*
numObjects
);
CHECK_RESULT
(
cuMemcpyDtoH
(
&
data
[
0
],
buffers
[
i
].
getMemory
(),
numObjects
*
buffers
[
i
].
getSize
()));
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
values
[
j
][
base
]
=
data
[
4
*
j
];
if
(
base
+
1
<
numParameters
)
values
[
j
][
base
+
1
]
=
data
[
4
*
j
+
1
];
if
(
base
+
2
<
numParameters
)
values
[
j
][
base
+
2
]
=
data
[
4
*
j
+
2
];
if
(
base
+
3
<
numParameters
)
values
[
j
][
base
+
3
]
=
data
[
4
*
j
+
3
];
}
base
+=
4
;
}
else
if
(
buffers
[
i
].
getSize
()
==
2
*
elementSize
)
{
vector
<
T
>
data
(
2
*
numObjects
);
CHECK_RESULT
(
cuMemcpyDtoH
(
&
data
[
0
],
buffers
[
i
].
getMemory
(),
numObjects
*
buffers
[
i
].
getSize
()));
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
values
[
j
][
base
]
=
data
[
2
*
j
];
if
(
base
+
1
<
numParameters
)
values
[
j
][
base
+
1
]
=
data
[
2
*
j
+
1
];
}
base
+=
2
;
}
else
if
(
buffers
[
i
].
getSize
()
==
elementSize
)
{
vector
<
T
>
data
(
numObjects
);
CHECK_RESULT
(
cuMemcpyDtoH
(
&
data
[
0
],
buffers
[
i
].
getMemory
(),
numObjects
*
buffers
[
i
].
getSize
()));
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
values
[
j
][
base
]
=
data
[
j
];
base
++
;
}
else
throw
OpenMMException
(
"Internal error: Unknown buffer type in CudaParameterSet"
);
}
}
}
template
<
class
T
>
void
CudaParameterSet
::
setParameterValues
(
const
vector
<
vector
<
T
>
>&
values
)
{
if
(
sizeof
(
T
)
!=
elementSize
)
throw
OpenMMException
(
"Called setParameterValues() with vector of wrong type"
);
int
base
=
0
;
string
errorMessage
=
"Error uploading parameter set "
+
name
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
getSize
()
==
4
*
elementSize
)
{
vector
<
T
>
data
(
4
*
numObjects
);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
data
[
4
*
j
]
=
values
[
j
][
base
];
if
(
base
+
1
<
numParameters
)
data
[
4
*
j
+
1
]
=
values
[
j
][
base
+
1
];
if
(
base
+
2
<
numParameters
)
data
[
4
*
j
+
2
]
=
values
[
j
][
base
+
2
];
if
(
base
+
3
<
numParameters
)
data
[
4
*
j
+
3
]
=
values
[
j
][
base
+
3
];
}
CHECK_RESULT
(
cuMemcpyHtoD
(
buffers
[
i
].
getMemory
(),
&
data
[
0
],
numObjects
*
buffers
[
i
].
getSize
()));
base
+=
4
;
}
else
if
(
buffers
[
i
].
getSize
()
==
2
*
elementSize
)
{
vector
<
T
>
data
(
2
*
numObjects
);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
data
[
2
*
j
]
=
values
[
j
][
base
];
if
(
base
+
1
<
numParameters
)
data
[
2
*
j
+
1
]
=
values
[
j
][
base
+
1
];
}
CHECK_RESULT
(
cuMemcpyHtoD
(
buffers
[
i
].
getMemory
(),
&
data
[
0
],
numObjects
*
buffers
[
i
].
getSize
()));
base
+=
2
;
}
else
if
(
buffers
[
i
].
getSize
()
==
elementSize
)
{
vector
<
T
>
data
(
numObjects
);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
data
[
j
]
=
values
[
j
][
base
];
CHECK_RESULT
(
cuMemcpyHtoD
(
buffers
[
i
].
getMemory
(),
&
data
[
0
],
numObjects
*
buffers
[
i
].
getSize
()));
base
++
;
}
else
throw
OpenMMException
(
"Internal error: Unknown buffer type in CudaParameterSet"
);
}
}
string
CudaParameterSet
::
getParameterSuffix
(
int
index
,
const
std
::
string
&
extraSuffix
)
const
{
const
string
suffixes
[]
=
{
".x"
,
".y"
,
".z"
,
".w"
};
int
buffer
=
-
1
;
for
(
int
i
=
0
;
buffer
==
-
1
&&
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
index
*
elementSize
<
buffers
[
i
].
getSize
())
buffer
=
i
;
else
index
-=
buffers
[
i
].
getSize
()
/
elementSize
;
}
if
(
buffer
==
-
1
)
throw
OpenMMException
(
"Internal error: Illegal argument to CudaParameterSet::getParameterSuffix() ("
+
name
+
")"
);
stringstream
suffix
;
suffix
<<
(
buffer
+
1
)
<<
extraSuffix
;
if
(
buffers
[
buffer
].
getSize
()
!=
elementSize
)
suffix
<<
suffixes
[
index
];
return
suffix
.
str
();
}
/**
* Define template instantiations for float and double versions of getParameterValues() and setParameterValues().
*/
namespace
OpenMM
{
template
OPENMM_EXPORT_CUDA
void
CudaParameterSet
::
getParameterValues
<
float
>(
vector
<
vector
<
float
>
>&
values
);
template
OPENMM_EXPORT_CUDA
void
CudaParameterSet
::
setParameterValues
<
float
>(
const
vector
<
vector
<
float
>
>&
values
);
template
OPENMM_EXPORT_CUDA
void
CudaParameterSet
::
getParameterValues
<
double
>(
vector
<
vector
<
double
>
>&
values
);
template
OPENMM_EXPORT_CUDA
void
CudaParameterSet
::
setParameterValues
<
double
>(
const
vector
<
vector
<
double
>
>&
values
);
}
\ No newline at end of file
platforms/cuda/src/CudaPlatform.cpp
View file @
5a06df78
...
@@ -51,12 +51,12 @@ using namespace std;
...
@@ -51,12 +51,12 @@ using namespace std;
}
}
#ifdef OPENMM_C
UDA
_BUILDING_STATIC_LIBRARY
#ifdef OPENMM_C
OMMON
_BUILDING_STATIC_LIBRARY
extern
"C"
void
registerCudaPlatform
()
{
extern
"C"
void
registerCudaPlatform
()
{
Platform
::
registerPlatform
(
new
CudaPlatform
());
Platform
::
registerPlatform
(
new
CudaPlatform
());
}
}
#else
#else
extern
"C"
OPENMM_EXPORT_C
UDA
void
registerPlatforms
()
{
extern
"C"
OPENMM_EXPORT_C
OMMON
void
registerPlatforms
()
{
Platform
::
registerPlatform
(
new
CudaPlatform
());
Platform
::
registerPlatform
(
new
CudaPlatform
());
}
}
#endif
#endif
...
@@ -96,13 +96,15 @@ CudaPlatform::CudaPlatform() {
...
@@ -96,13 +96,15 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGayBerneForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGayBerneForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVelocityVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
Integrate
BAOAB
StepKernel
::
Name
(),
factory
);
registerKernelFactory
(
Integrate
LangevinMiddle
StepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateBrownianStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateBrownianStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVariableVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVariableVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVariableLangevinStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVariableLangevinStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateCustomStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateCustomStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
ApplyAndersenThermostatKernel
::
Name
(),
factory
);
registerKernelFactory
(
ApplyAndersenThermostatKernel
::
Name
(),
factory
);
registerKernelFactory
(
NoseHooverChainKernel
::
Name
(),
factory
);
registerKernelFactory
(
ApplyMonteCarloBarostatKernel
::
Name
(),
factory
);
registerKernelFactory
(
ApplyMonteCarloBarostatKernel
::
Name
(),
factory
);
registerKernelFactory
(
RemoveCMMotionKernel
::
Name
(),
factory
);
registerKernelFactory
(
RemoveCMMotionKernel
::
Name
(),
factory
);
platformProperties
.
push_back
(
CudaDeviceIndex
());
platformProperties
.
push_back
(
CudaDeviceIndex
());
...
...
platforms/cuda/src/Cuda
ForceInfo
.cpp
→
platforms/cuda/src/Cuda
Program
.cpp
View file @
5a06df78
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 201
2
Stanford University and the Authors. *
* Portions copyright (c) 201
9
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -24,23 +24,16 @@
...
@@ -24,23 +24,16 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "CudaForceInfo.h"
#include "CudaProgram.h"
#include "CudaKernel.h"
using
namespace
OpenMM
;
using
namespace
OpenMM
;
using
namespace
std
;
using
namespace
std
;
bool
CudaForceInfo
::
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
CudaProgram
::
CudaProgram
(
CudaContext
&
context
,
CUmodule
module
)
:
context
(
context
),
module
(
module
)
{
return
true
;
}
}
int
CudaForceInfo
::
getNumParticleGroups
()
{
ComputeKernel
CudaProgram
::
createKernel
(
const
string
&
name
)
{
return
0
;
CUfunction
kernel
=
context
.
getKernel
(
module
,
name
.
c_str
());
}
return
shared_ptr
<
ComputeKernelImpl
>
(
new
CudaKernel
(
context
,
kernel
,
name
));
}
void
CudaForceInfo
::
getParticlesInGroup
(
int
index
,
vector
<
int
>&
particles
)
{
\ No newline at end of file
return
;
}
bool
CudaForceInfo
::
areGroupsIdentical
(
int
group1
,
int
group2
)
{
return
true
;
}
platforms/cuda/src/kernels/common.cu
0 → 100644
View file @
5a06df78
/**
* This file contains CUDA definitions for the macros and functions needed for the
* common compute framework.
*/
#define KERNEL extern "C" __global__
#define DEVICE __device__
#define LOCAL __shared__
#define LOCAL_ARG
#define GLOBAL
#define RESTRICT __restrict__
#define LOCAL_ID threadIdx.x
#define LOCAL_SIZE blockDim.x
#define GLOBAL_ID (blockIdx.x*blockDim.x+threadIdx.x)
#define GLOBAL_SIZE (blockDim.x*gridDim.x)
#define GROUP_ID blockIdx.x
#define NUM_GROUPS gridDim.x
#define SYNC_THREADS __syncthreads();
#define MEM_FENCE __threadfence_block();
#define ATOMIC_ADD(dest, value) atomicAdd(dest, value)
typedef
long
long
mm_long
;
typedef
unsigned
long
long
mm_ulong
;
#define SUPPORTS_64_BIT_ATOMICS 1
#define SUPPORTS_DOUBLE_PRECISION 1
Prev
1
…
3
4
5
6
7
8
9
10
11
…
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment