Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
abb8cb4b
Commit
abb8cb4b
authored
Jun 04, 2012
by
Peter Eastman
Browse files
Very early beginnings of new CUDA platform
parent
18501459
Changes
21
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
4779 additions
and
0 deletions
+4779
-0
platforms/cuda2/CMakeLists.txt
platforms/cuda2/CMakeLists.txt
+102
-0
platforms/cuda2/EncodeCUDAFiles.cmake
platforms/cuda2/EncodeCUDAFiles.cmake
+27
-0
platforms/cuda2/include/CudaKernelFactory.h
platforms/cuda2/include/CudaKernelFactory.h
+45
-0
platforms/cuda2/include/CudaPlatform.h
platforms/cuda2/include/CudaPlatform.h
+112
-0
platforms/cuda2/sharedTarget/CMakeLists.txt
platforms/cuda2/sharedTarget/CMakeLists.txt
+24
-0
platforms/cuda2/src/CudaArray.cpp
platforms/cuda2/src/CudaArray.cpp
+79
-0
platforms/cuda2/src/CudaArray.h
platforms/cuda2/src/CudaArray.h
+134
-0
platforms/cuda2/src/CudaContext.cpp
platforms/cuda2/src/CudaContext.cpp
+972
-0
platforms/cuda2/src/CudaContext.h
platforms/cuda2/src/CudaContext.h
+531
-0
platforms/cuda2/src/CudaExpressionUtilities.cpp
platforms/cuda2/src/CudaExpressionUtilities.cpp
+369
-0
platforms/cuda2/src/CudaExpressionUtilities.h
platforms/cuda2/src/CudaExpressionUtilities.h
+126
-0
platforms/cuda2/src/CudaForceInfo.cpp
platforms/cuda2/src/CudaForceInfo.cpp
+46
-0
platforms/cuda2/src/CudaForceInfo.h
platforms/cuda2/src/CudaForceInfo.h
+72
-0
platforms/cuda2/src/CudaKernelFactory.cpp
platforms/cuda2/src/CudaKernelFactory.cpp
+129
-0
platforms/cuda2/src/CudaKernelSources.cpp.in
platforms/cuda2/src/CudaKernelSources.cpp.in
+31
-0
platforms/cuda2/src/CudaKernelSources.h.in
platforms/cuda2/src/CudaKernelSources.h.in
+48
-0
platforms/cuda2/src/CudaKernels.h
platforms/cuda2/src/CudaKernels.h
+1197
-0
platforms/cuda2/src/CudaPlatform.cpp
platforms/cuda2/src/CudaPlatform.cpp
+176
-0
platforms/cuda2/src/kernels/utilities.cu
platforms/cuda2/src/kernels/utilities.cu
+102
-0
platforms/cuda2/src/kernels/vectorOps.cu
platforms/cuda2/src/kernels/vectorOps.cu
+457
-0
No files found.
platforms/cuda2/CMakeLists.txt
0 → 100644
View file @
abb8cb4b
#---------------------------------------------------
# OpenMM CUDA Platform
#
# Creates OpenMM library, base name=OpenMMCUDA.
# Default libraries are shared & optimized. Variants
# are created for static (_static) and debug (_d).
#
# Windows:
# OpenMMCUDA[_d].dll
# OpenMMCUDA[_d].lib
# OpenMMCUDA_static[_d].lib
# Unix:
# libOpenMMCUDA[_d].so
# libOpenMMCUDA_static[_d].a
#----------------------------------------------------
IF
(
APPLE
)
SET
(
CMAKE_OSX_DEPLOYMENT_TARGET
"10.6"
)
SET
(
CMAKE_OSX_SYSROOT
"/Developer/SDKs/MacOSX10.6.sdk"
)
ENDIF
(
APPLE
)
set
(
OPENMM_BUILD_CUDA_TESTS TRUE CACHE BOOL
"Whether to build CUDA test cases"
)
if
(
OPENMM_BUILD_CUDA_TESTS
)
SUBDIRS
(
tests
)
endif
(
OPENMM_BUILD_CUDA_TESTS
)
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET
(
OPENMM_SOURCE_SUBDIRS .
)
# Collect up information about the version of the OpenMM library we're building
# and make it available to the code so it can be built into the binaries.
SET
(
OPENMMCUDA_LIBRARY_NAME OpenMMCUDA
)
SET
(
SHARED_TARGET
${
OPENMMCUDA_LIBRARY_NAME
}
)
SET
(
STATIC_TARGET
${
OPENMMCUDA_LIBRARY_NAME
}
_static
)
# Ensure that debug libraries have "_d" appended to their names.
# CMake gets this right on Windows automatically with this definition.
IF
(
${
CMAKE_GENERATOR
}
MATCHES
"Visual Studio"
)
SET
(
CMAKE_DEBUG_POSTFIX
"_d"
CACHE INTERNAL
""
FORCE
)
ENDIF
(
${
CMAKE_GENERATOR
}
MATCHES
"Visual Studio"
)
# But on Unix or Cygwin we have to add the suffix manually
IF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
SET
(
SHARED_TARGET
${
SHARED_TARGET
}
_d
)
SET
(
STATIC_TARGET
${
STATIC_TARGET
}
_d
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
# These are all the places to search for header files which are
# to be part of the API.
SET
(
API_INCLUDE_DIRS
)
# start empty
FOREACH
(
subdir
${
OPENMM_SOURCE_SUBDIRS
}
)
# append
SET
(
API_INCLUDE_DIRS
${
API_INCLUDE_DIRS
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/include
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/include/internal
)
ENDFOREACH
(
subdir
)
# We'll need both *relative* path names, starting with their API_INCLUDE_DIRS,
# and absolute pathnames.
SET
(
API_REL_INCLUDE_FILES
)
# start these out empty
SET
(
API_ABS_INCLUDE_FILES
)
FOREACH
(
dir
${
API_INCLUDE_DIRS
}
)
FILE
(
GLOB fullpaths
${
dir
}
/*.h
)
# returns full pathnames
SET
(
API_ABS_INCLUDE_FILES
${
API_ABS_INCLUDE_FILES
}
${
fullpaths
}
)
FOREACH
(
pathname
${
fullpaths
}
)
GET_FILENAME_COMPONENT
(
filename
${
pathname
}
NAME
)
SET
(
API_REL_INCLUDE_FILES
${
API_REL_INCLUDE_FILES
}
${
dir
}
/
${
filename
}
)
ENDFOREACH
(
pathname
)
ENDFOREACH
(
dir
)
# collect up source files
SET
(
SOURCE_FILES
)
# empty
SET
(
SOURCE_INCLUDE_FILES
)
FOREACH
(
subdir
${
OPENMM_SOURCE_SUBDIRS
}
)
FILE
(
GLOB_RECURSE src_files
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/src/*.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/src/*.c
)
FILE
(
GLOB incl_files
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/src/*.h
)
SET
(
SOURCE_FILES
${
SOURCE_FILES
}
${
src_files
}
)
#append
SET
(
SOURCE_INCLUDE_FILES
${
SOURCE_INCLUDE_FILES
}
${
incl_files
}
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/include
)
ENDFOREACH
(
subdir
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
)
# Set variables needed for encoding kernel sources into a C++ class
SET
(
CUDA_SOURCE_DIR
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
)
SET
(
CUDA_SOURCE_CLASS CudaKernelSources
)
SET
(
CUDA_KERNELS_CPP
${
CMAKE_CURRENT_BINARY_DIR
}
/src/
${
CUDA_SOURCE_CLASS
}
.cpp
)
SET
(
CUDA_KERNELS_H
${
CMAKE_CURRENT_BINARY_DIR
}
/src/
${
CUDA_SOURCE_CLASS
}
.h
)
SET
(
SOURCE_FILES
${
SOURCE_FILES
}
${
CUDA_KERNELS_CPP
}
${
CUDA_KERNELS_H
}
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_BINARY_DIR
}
/src
)
SUBDIRS
(
sharedTarget
)
platforms/cuda2/EncodeCUDAFiles.cmake
0 → 100644
View file @
abb8cb4b
FILE
(
GLOB CUDA_KERNELS
${
CUDA_SOURCE_DIR
}
/kernels/*.cu
)
SET
(
CUDA_FILE_DECLARATIONS
)
SET
(
CUDA_FILE_DEFINITIONS
)
CONFIGURE_FILE
(
${
CUDA_SOURCE_DIR
}
/
${
CUDA_SOURCE_CLASS
}
.cpp.in
${
CUDA_KERNELS_CPP
}
)
FOREACH
(
file
${
CUDA_KERNELS
}
)
# Load the file contents and process it.
FILE
(
STRINGS
${
file
}
file_content NEWLINE_CONSUME
)
# Replace all backslashes by double backslashes as they are being put in a C string.
# Be careful not to replace the backslash before a semicolon as that is the CMAKE
# internal escaping of a semicolon to prevent it from acting as a list seperator.
STRING
(
REGEX REPLACE
"
\\\\
([^;])"
"
\\\\\\\\\\
1"
file_content
"
${
file_content
}
"
)
# Escape double quotes as being put in a C string.
STRING
(
REPLACE
"
\"
"
"
\\\"
"
file_content
"
${
file_content
}
"
)
# Split in separate C strings for each line.
STRING
(
REPLACE
"
\n
"
"
\\
n
\"\n\"
"
file_content
"
${
file_content
}
"
)
# Determine a name for the variable that will contain this file's contents
FILE
(
RELATIVE_PATH filename
${
CUDA_SOURCE_DIR
}
/kernels
${
file
}
)
STRING
(
LENGTH
${
filename
}
filename_length
)
MATH
(
EXPR filename_length
${
filename_length
}
-3
)
STRING
(
SUBSTRING
${
filename
}
0
${
filename_length
}
variable_name
)
# Record the variable declaration and definition.
SET
(
CUDA_FILE_DECLARATIONS
${
CUDA_FILE_DECLARATIONS
}
static\ const\ std::string\
${
variable_name
}
;\n
)
FILE
(
APPEND
${
CUDA_KERNELS_CPP
}
const\ string\
${
CUDA_SOURCE_CLASS
}
::
${
variable_name
}
\ =\ \"
${
file_content
}
\"\;\n
)
ENDFOREACH
(
file
)
CONFIGURE_FILE
(
${
CUDA_SOURCE_DIR
}
/
${
CUDA_SOURCE_CLASS
}
.h.in
${
CUDA_KERNELS_H
}
)
platforms/cuda2/include/CudaKernelFactory.h
0 → 100644
View file @
abb8cb4b
#ifndef OPENMM_CUDAKERNELFACTORY_H_
#define OPENMM_CUDAKERNELFACTORY_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/KernelFactory.h"
namespace
OpenMM
{
/**
* This KernelFactory creates all kernels for CudaPlatform.
*/
class
CudaKernelFactory
:
public
KernelFactory
{
public:
KernelImpl
*
createKernelImpl
(
std
::
string
name
,
const
Platform
&
platform
,
ContextImpl
&
context
)
const
;
};
}
// namespace OpenMM
#endif
/*OPENMM_CUDAKERNELFACTORY_H_*/
platforms/cuda2/include/CudaPlatform.h
0 → 100644
View file @
abb8cb4b
#ifndef OPENMM_CUDAPLATFORM_H_
#define OPENMM_CUDAPLATFORM_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/Platform.h"
#include "openmm/System.h"
namespace
OpenMM
{
class
CudaContext
;
/**
* This Platform subclass uses CUDA implementations of the OpenMM kernels.
*/
class
OPENMM_EXPORT
CudaPlatform
:
public
Platform
{
public:
class
PlatformData
;
CudaPlatform
();
const
std
::
string
&
getName
()
const
{
static
const
std
::
string
name
=
"CUDA"
;
return
name
;
}
double
getSpeed
()
const
{
return
100
;
}
bool
supportsDoublePrecision
()
const
;
const
std
::
string
&
getPropertyValue
(
const
Context
&
context
,
const
std
::
string
&
property
)
const
;
void
setPropertyValue
(
Context
&
context
,
const
std
::
string
&
property
,
const
std
::
string
&
value
)
const
;
void
contextCreated
(
ContextImpl
&
context
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
properties
)
const
;
void
contextDestroyed
(
ContextImpl
&
context
)
const
;
/**
* This is the name of the parameter for selecting which CUDA device or devices to use.
*/
static
const
std
::
string
&
CudaDeviceIndex
()
{
static
const
std
::
string
key
=
"CudaDeviceIndex"
;
return
key
;
}
/**
* This is the name of the parameter for selecting whether CUDA should sync or spin loop while waiting for results.
*/
static
const
std
::
string
&
CudaUseBlockingSync
()
{
static
const
std
::
string
key
=
"CudaUseBlockingSync"
;
return
key
;
}
/**
* This is the name of the parameter for selecting what numerical precision to use.
*/
static
const
std
::
string
&
CudaPrecision
()
{
static
const
std
::
string
key
=
"CudaPrecision"
;
return
key
;
}
/**
* This is the name of the parameter for specifying the path to the CUDA compiler.
*/
static
const
std
::
string
&
CudaCompiler
()
{
static
const
std
::
string
key
=
"CudaCompiler"
;
return
key
;
}
/**
* This is the name of the parameter for specifying the path to the directory for creating temporary files.
*/
static
const
std
::
string
&
CudaTempDirectory
()
{
static
const
std
::
string
key
=
"CudaTempDirectory"
;
return
key
;
}
};
class
OPENMM_EXPORT
CudaPlatform
::
PlatformData
{
public:
PlatformData
(
const
System
&
system
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
blockingProperty
,
const
std
::
string
&
precisionProperty
,
const
std
::
string
&
compilerProperty
,
const
std
::
string
&
tempProperty
);
~
PlatformData
();
void
initializeContexts
(
const
System
&
system
);
void
syncContexts
();
std
::
vector
<
CudaContext
*>
contexts
;
std
::
vector
<
double
>
contextEnergy
;
bool
removeCM
;
int
cmMotionFrequency
;
int
stepCount
,
computeForceCount
;
double
time
;
std
::
map
<
std
::
string
,
std
::
string
>
propertyValues
;
};
}
// namespace OpenMM
#endif
/*OPENMM_CUDAPLATFORM_H_*/
platforms/cuda2/sharedTarget/CMakeLists.txt
0 → 100644
View file @
abb8cb4b
#
# Include CUDA related files.
#
INCLUDE
(
FindCUDA
)
INCLUDE_DIRECTORIES
(
${
CUDA_INCLUDE_DIRS
}
)
FILE
(
GLOB CUDA_KERNELS
${
CUDA_SOURCE_DIR
}
/kernels/*.cu
)
ADD_CUSTOM_COMMAND
(
OUTPUT
${
CUDA_KERNELS_CPP
}
${
CUDA_KERNELS_H
}
COMMAND
${
CMAKE_COMMAND
}
ARGS -D CUDA_SOURCE_DIR=
${
CUDA_SOURCE_DIR
}
-D CUDA_KERNELS_CPP=
${
CUDA_KERNELS_CPP
}
-D CUDA_KERNELS_H=
${
CUDA_KERNELS_H
}
-D CUDA_SOURCE_CLASS=
${
CUDA_SOURCE_CLASS
}
-P
${
CMAKE_CURRENT_SOURCE_DIR
}
/../EncodeCUDAFiles.cmake
DEPENDS
${
CUDA_KERNELS
}
)
SET_SOURCE_FILES_PROPERTIES
(
${
CUDA_KERNELS_CPP
}
${
CUDA_KERNELS_H
}
PROPERTIES GENERATED TRUE
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
IF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
_d
)
ELSE
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
MAIN_OPENMM_LIB
}
${
CUDA_LIBRARIES
}
${
PTHREADS_LIB
}
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES COMPILE_FLAGS
"-DOPENMM_BUILDING_SHARED_LIBRARY"
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_TARGET
}
)
platforms/cuda2/src/CudaArray.cpp
0 → 100644
View file @
abb8cb4b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaArray.h"
#include <iostream>
#include <sstream>
#include <vector>
using
namespace
OpenMM
;
CudaArray
::
CudaArray
(
int
size
,
int
elementSize
,
const
std
::
string
&
name
)
:
size
(
size
),
elementSize
(
elementSize
),
name
(
name
),
ownsMemory
(
true
)
{
CUresult
result
=
cuMemAlloc
(
&
pointer
,
size
*
elementSize
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error creating array "
<<
name
<<
": "
<<
result
;
throw
OpenMMException
(
str
.
str
());
}
}
CudaArray
::~
CudaArray
()
{
if
(
ownsMemory
)
{
CUresult
result
=
cuMemFree
(
pointer
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error deleting array "
<<
name
<<
": "
<<
result
;
throw
OpenMMException
(
str
.
str
());
}
}
}
void
CudaArray
::
upload
(
void
*
data
,
bool
blocking
)
{
CUresult
result
;
if
(
blocking
)
result
=
cuMemcpyHtoD
(
pointer
,
data
,
size
*
elementSize
);
else
result
=
cuMemcpyHtoDAsync
(
pointer
,
data
,
size
*
elementSize
,
0
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error uploading array "
<<
name
<<
": "
<<
result
;
throw
OpenMMException
(
str
.
str
());
}
}
void
CudaArray
::
download
(
void
*
data
,
bool
blocking
)
const
{
CUresult
result
;
if
(
blocking
)
result
=
cuMemcpyDtoH
(
data
,
pointer
,
size
*
elementSize
);
else
result
=
cuMemcpyDtoHAsync
(
data
,
pointer
,
size
*
elementSize
,
0
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error downloading array "
<<
name
<<
": "
<<
result
;
throw
OpenMMException
(
str
.
str
());
}
}
platforms/cuda2/src/CudaArray.h
0 → 100644
View file @
abb8cb4b
#ifndef OPENMM_CUDAARRAY_H_
#define OPENMM_CUDAARRAY_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/OpenMMException.h"
#include <cuda.h>
#include <iostream>
#include <sstream>
#include <vector>
namespace
OpenMM
{
/**
* This class encapsulates a block of CUDA device memory. It provides a simplified API
* for working with it and for copying data to and from device memory.
*/
class
CudaArray
{
public:
/**
* Create a CudaArray object. The object is allocated on the heap with the "new" operator.
* The template argument is the data type of each array element.
*
* @param size the number of elements in the array
* @param name the name of the array
*/
template
<
class
T
>
static
CudaArray
*
create
(
int
size
,
const
std
::
string
&
name
)
{
return
new
CudaArray
(
size
,
sizeof
(
T
),
name
);
}
/**
* Create a CudaArray object.
*
* @param size the number of elements in the array
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
CudaArray
(
int
size
,
int
elementSize
,
const
std
::
string
&
name
);
~
CudaArray
();
/**
* Get the number of elements in the array.
*/
int
getSize
()
const
{
return
size
;
}
/**
* Get the size of each element in bytes.
*/
int
getElementSize
()
const
{
return
elementSize
;
}
/**
* Get the name of the array.
*/
const
std
::
string
&
getName
()
const
{
return
name
;
}
/**
* Get a pointer to the device memory.
*/
CUdeviceptr
getDevicePointer
()
{
return
pointer
;
}
/**
* Copy the values in a vector to the device memory.
*/
template
<
class
T
>
void
upload
(
std
::
vector
<
T
>&
data
)
{
if
(
sizeof
(
T
)
!=
elementSize
||
data
.
size
()
!=
size
)
throw
OpenMMException
(
"Error uploading array "
+
name
+
": The specified vector does not match the size of the array"
);
upload
(
&
data
[
0
],
true
);
}
/**
* Copy the values in the Buffer to a vector.
*/
template
<
class
T
>
void
download
(
std
::
vector
<
T
>&
data
)
const
{
if
(
sizeof
(
T
)
!=
elementSize
)
throw
OpenMMException
(
"Error downloading array "
+
name
+
": The specified vector has the wrong element size"
);
if
(
data
.
size
()
!=
size
)
data
.
resize
(
size
);
download
(
&
data
[
0
],
true
);
}
/**
* Copy the values in an array to the device memory.
*
* @param data the data to copy
* @param blocking if true, this call will block until the transfer is complete. If false,
* the source array must be in page-locked memory.
*/
void
upload
(
void
*
data
,
bool
blocking
=
true
);
/**
* Copy the values in the device memory to an array.
*
* @param data the array to copy the memory to
* @param blocking if true, this call will block until the transfer is complete. If false,
* the destination array must be in page-locked memory.
*/
void
download
(
void
*
data
,
bool
blocking
=
true
)
const
;
private:
CUdeviceptr
pointer
;
int
size
,
elementSize
;
bool
ownsMemory
;
std
::
string
name
;
};
}
// namespace OpenMM
#endif
/*OPENMM_CUDAARRAY_H_*/
platforms/cuda2/src/CudaContext.cpp
0 → 100644
View file @
abb8cb4b
This diff is collapsed.
Click to expand it.
platforms/cuda2/src/CudaContext.h
0 → 100644
View file @
abb8cb4b
#ifndef OPENMM_CUDACONTEXT_H_
#define OPENMM_CUDACONTEXT_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <map>
#include <queue>
#include <string>
#include <pthread.h>
#define __CL_ENABLE_EXCEPTIONS
#ifdef _MSC_VER
// Prevent Windows from defining macros that interfere with other code.
#define NOMINMAX
#endif
#include <cuda.h>
#include <builtin_types.h>
#include <vector_functions.h>
#include "openmm/internal/windowsExport.h"
#include "CudaPlatform.h"
namespace
OpenMM
{
class
CudaArray
;
class
CudaForceInfo
;
class
CudaIntegrationUtilities
;
class
CudaBondedUtilities
;
class
CudaNonbondedUtilities
;
class
System
;
/**
* This class contains the information associated with a Context by the CUDA Platform. Each CudaContext is
* specific to a particular device, and manages data structures and kernels for that device. When running a simulation
* in parallel on multiple devices, there is a separate CudaContext for each one. The list of all contexts is
* stored in the CudaPlatform::PlatformData.
* <p>
* In addition, a worker thread is created for each CudaContext. This is used for parallel computations, so that
* blocking calls to one device will not block other devices. When only a single device is being used, the worker
* thread is not used and calculations are performed on the main application thread.
*/
class
OPENMM_EXPORT
CudaContext
{
public:
class
WorkTask
;
class
WorkThread
;
class
ReorderListener
;
static
const
int
ThreadBlockSize
;
static
const
int
TileSize
;
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
std
::
string
&
precision
,
const
std
::
string
&
compiler
,
const
std
::
string
&
tempDir
,
CudaPlatform
::
PlatformData
&
platformData
);
~
CudaContext
();
// /**
// * This is called to initialize internal data structures after all Forces in the system
// * have been initialized.
// */
// void initialize();
/**
* Add a CudaForce to this context.
*/
void
addForce
(
CudaForceInfo
*
force
);
/**
* Get the CUcontext associated with this object.
*/
CUcontext
getContext
()
{
return
context
;
}
/**
* Get the CUdevice associated with this object.
*/
CUdevice
getDevice
()
{
return
device
;
}
/**
* Get the index of the CUdevice associated with this object.
*/
int
getDeviceIndex
()
{
return
deviceIndex
;
}
/**
* Get the PlatformData object this context is part of.
*/
CudaPlatform
::
PlatformData
&
getPlatformData
()
{
return
platformData
;
}
/**
* Get the index of this context in the list stored in the PlatformData.
*/
int
getContextIndex
()
const
{
return
contextIndex
;
}
/**
* Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
*/
CudaArray
&
getPosq
()
{
return
*
posq
;
}
/**
* Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom.
*/
CudaArray
&
getVelm
()
{
return
*
velm
;
}
// /**
// * Get the array which contains the force on each atom.
// */
// CudaArray<mm_float4>& getForce() {
// return *force;
// }
// /**
// * Get the array which contains the buffers in which forces are computed.
// */
// CudaArray<mm_float4>& getForceBuffers() {
// return *forceBuffers;
// }
// /**
// * Get the array which contains a contribution to each force represented as 64 bit fixed point.
// */
// CudaArray<cl_long>& getLongForceBuffer() {
// return *longForceBuffer;
// }
// /**
// * Get the array which contains the buffer in which energy is computed.
// */
// CudaArray<cl_float>& getEnergyBuffer() {
// return *energyBuffer;
// }
// /**
// * Get the array which contains the index of each atom.
// */
// CudaArray<cl_int>& getAtomIndex() {
// return *atomIndex;
// }
// /**
// * Get the number of cells by which the positions are offset.
// */
// std::vector<mm_int4>& getPosCellOffsets() {
// return posCellOffsets;
// }
/**
* Replace all occurrences of a list of substrings.
*
* @param input a string to process
* @param replacements a set of strings that should be replaced with new strings wherever they appear in the input string
* @return a new string produced by performing the replacements
*/
std
::
string
replaceStrings
(
const
std
::
string
&
input
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
replacements
)
const
;
/**
* Create a CUDA module from source code.
*
* @param source the source code of the module
* @param optimizationFlags the optimization flags to pass to the CUDA compiler. If this is
* omitted, a default set of options will be used
*/
CUmodule
createModule
(
const
std
::
string
source
,
const
char
*
optimizationFlags
=
NULL
);
/**
* Create a CUDA module from source code.
*
* @param source the source code of the module
* @param defines a set of preprocessor definitions (name, value) to define when compiling the program
* @param optimizationFlags the optimization flags to pass to the CUDA compiler. If this is
* omitted, a default set of options will be used
*/
CUmodule
createModule
(
const
std
::
string
source
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
defines
,
const
char
*
optimizationFlags
=
NULL
);
// /**
// * Execute a kernel.
// *
// * @param kernel the kernel to execute
// * @param workUnits the maximum number of work units that should be used
// * @param blockSize the size of each thread block to use
// */
// void executeKernel(cl::Kernel& kernel, int workUnits, int blockSize = -1);
// /**
// * Set all elements of an array to 0.
// */
// void clearBuffer(CudaArray<float>& array);
// /**
// * Set all elements of an array to 0.
// */
// void clearBuffer(CudaArray<mm_float4>& array);
// /**
// * Set all elements of an array to 0.
// *
// * @param memory the Memory to clear
// * @param size the number of float elements in the buffer
// */
// void clearBuffer(cl::Memory& memory, int size);
// /**
// * Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
// *
// * @param memory the Memory to clear
// * @param size the number of float elements in the buffer
// */
// void addAutoclearBuffer(cl::Memory& memory, int size);
// /**
// * Clear all buffers that have been registered with addAutoclearBuffer().
// */
// void clearAutoclearBuffers();
// /**
// * Given a collection of buffers packed into an array, sum them and store
// * the sum in the first buffer.
// *
// * @param array the array containing the buffers to reduce
// * @param numBuffers the number of buffers packed into the array
// */
// void reduceBuffer(CudaArray<mm_float4>& array, int numBuffers);
// /**
// * Sum the buffesr containing forces.
// */
// void reduceForces();
// /**
// * Get the current simulation time.
// */
// double getTime() {
// return time;
// }
// /**
// * Set the current simulation time.
// */
// void setTime(double t) {
// time = t;
// }
// /**
// * Get the number of integration steps that have been taken.
// */
// int getStepCount() {
// return stepCount;
// }
// /**
// * Set the number of integration steps that have been taken.
// */
// void setStepCount(int steps) {
// stepCount = steps;
// }
// /**
// * Get the number of times forces or energy has been computed.
// */
// int getComputeForceCount() {
// return computeForceCount;
// }
// /**
// * Set the number of times forces or energy has been computed.
// */
// void setComputeForceCount(int count) {
// computeForceCount = count;
// }
// /**
// * Get the number of atoms.
// */
// int getNumAtoms() const {
// return numAtoms;
// }
// /**
// * Get the number of atoms, rounded up to a multiple of TileSize. This is the actual size of
// * most arrays with one element per atom.
// */
// int getPaddedNumAtoms() const {
// return paddedNumAtoms;
// }
// /**
// * Get the number of blocks of TileSize atoms.
// */
// int getNumAtomBlocks() const {
// return numAtomBlocks;
// }
// /**
// * Get the standard number of thread blocks to use when executing kernels.
// */
// int getNumThreadBlocks() const {
// return numThreadBlocks;
// }
// /**
// * Get the number of force buffers.
// */
// int getNumForceBuffers() const {
// return numForceBuffers;
// }
// /**
// * Get the SIMD width of the device being used.
// */
// int getSIMDWidth() const {
// return simdWidth;
// }
// /**
// * Get whether the device being used supports 64 bit atomic operations on global memory.
// */
// bool getSupports64BitGlobalAtomics() {
// return supports64BitGlobalAtomics;
// }
// /**
// * Get whether the device being used supports double precision math.
// */
// bool getSupportsDoublePrecision() {
// return supportsDoublePrecision;
// }
// /**
// * Get the size of the periodic box.
// */
// mm_float4 getPeriodicBoxSize() const {
// return periodicBoxSize;
// }
// /**
// * Set the size of the periodic box.
// */
// void setPeriodicBoxSize(double xsize, double ysize, double zsize) {
// periodicBoxSize = mm_float4((float) xsize, (float) ysize, (float) zsize, 0);
// invPeriodicBoxSize = mm_float4((float) (1.0/xsize), (float) (1.0/ysize), (float) (1.0/zsize), 0);
// }
// /**
// * Get the inverse of the size of the periodic box.
// */
// mm_float4 getInvPeriodicBoxSize() const {
// return invPeriodicBoxSize;
// }
// /**
// * Get the CudaIntegrationUtilities for this context.
// */
// CudaIntegrationUtilities& getIntegrationUtilities() {
// return *integration;
// }
// /**
// * Get the CudaBondedUtilities for this context.
// */
// CudaBondedUtilities& getBondedUtilities() {
// return *bonded;
// }
// /**
// * Get the CudaNonbondedUtilities for this context.
// */
// CudaNonbondedUtilities& getNonbondedUtilities() {
// return *nonbonded;
// }
// /**
// * Get the thread used by this context for executing parallel computations.
// */
// WorkThread& getWorkThread() {
// return *thread;
// }
// /**
// * Get whether atoms were reordered during the most recent force/energy computation.
// */
// bool getAtomsWereReordered() const {
// return atomsWereReordered;
// }
// /**
// * Set whether atoms were reordered during the most recent force/energy computation.
// */
// void setAtomsWereReordered(bool wereReordered) {
// atomsWereReordered = wereReordered;
// }
// /**
// * Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
// * together in the arrays.
// *
// * @param enforcePeriodic if true, the atom positions may be altered to enforce periodic boundary conditions
// */
// void reorderAtoms(bool enforcePeriodic);
// /**
// * Add a listener that should be called whenever atoms get reordered. The CudaContext
// * assumes ownership of the object, and deletes it when the context itself is deleted.
// */
// void addReorderListener(ReorderListener* listener);
// /**
// * Get the list of ReorderListeners.
// */
// std::vector<ReorderListener*>& getReorderListeners() {
// return reorderListeners;
// }
// /**
// * Mark that the current molecule definitions (and hence the atom order) may be invalid.
// * This should be called whenever force field parameters change. It will cause the definitions
// * and order to be revalidated the next to reorderAtoms() is called.
// */
// void invalidateMolecules();
// /**
// * Get whether the current molecule definitions are valid.
// */
// bool getMoleculesAreInvalid() {
// return moleculesInvalid;
// }
private:
struct
Molecule
;
struct
MoleculeGroup
;
class
VirtualSiteInfo
;
// void findMoleculeGroups();
// static void tagAtomsInMolecule(int atom, int molecule, std::vector<int>& atomMolecule, std::vector<std::vector<int> >& atomBonds);
// /**
// * Ensure that all molecules marked as "identical" really are identical. This should be
// * called whenever force field parameters change. If necessary, it will rebuild the list
// * of molecules and resort the atoms.
// */
// void validateMolecules();
static
bool
hasInitializedCuda
;
const
System
&
system
;
double
time
;
CudaPlatform
::
PlatformData
&
platformData
;
int
deviceIndex
;
int
contextIndex
;
int
stepCount
;
int
computeForceCount
;
int
numAtoms
;
int
paddedNumAtoms
;
int
numAtomBlocks
;
int
numThreadBlocks
;
// int numForceBuffers;
// int simdWidth;
bool
useBlockingSync
,
useDoublePrecision
,
accumulateInDouble
,
contextIsValid
,
atomsWereReordered
,
moleculesInvalid
;
std
::
string
compiler
,
tempDir
,
gpuArchitecture
;
float4
periodicBoxSize
;
float4
invPeriodicBoxSize
;
std
::
string
defaultOptimizationOptions
;
std
::
map
<
std
::
string
,
std
::
string
>
compilationDefines
;
CUcontext
context
;
CUdevice
device
;
CUfunction
clearBufferKernel
;
CUfunction
clearTwoBuffersKernel
;
CUfunction
clearThreeBuffersKernel
;
CUfunction
clearFourBuffersKernel
;
CUfunction
clearFiveBuffersKernel
;
CUfunction
clearSixBuffersKernel
;
CUfunction
reduceFloat4Kernel
;
CUfunction
reduceForcesKernel
;
std
::
vector
<
CudaForceInfo
*>
forces
;
std
::
vector
<
Molecule
>
molecules
;
std
::
vector
<
MoleculeGroup
>
moleculeGroups
;
std
::
vector
<
int4
>
posCellOffsets
;
CudaArray
*
posq
;
CudaArray
*
velm
;
// CudaArray<mm_float4>* force;
// CudaArray<mm_float4>* forceBuffers;
// CudaArray<cl_long>* longForceBuffer;
// CudaArray<cl_float>* energyBuffer;
// CudaArray<cl_int>* atomIndex;
// std::vector<cl::Memory*> autoclearBuffers;
// std::vector<int> autoclearBufferSizes;
std
::
vector
<
ReorderListener
*>
reorderListeners
;
// CudaIntegrationUtilities* integration;
// CudaBondedUtilities* bonded;
// CudaNonbondedUtilities* nonbonded;
WorkThread
*
thread
;
};
struct
CudaContext
::
Molecule
{
std
::
vector
<
int
>
atoms
;
std
::
vector
<
int
>
constraints
;
std
::
vector
<
std
::
vector
<
int
>
>
groups
;
};
struct
CudaContext
::
MoleculeGroup
{
std
::
vector
<
int
>
atoms
;
std
::
vector
<
int
>
instances
;
std
::
vector
<
int
>
offsets
;
};
/**
* This abstract class defines a task to be executed on the worker thread.
*/
class
CudaContext
::
WorkTask
{
public:
virtual
void
execute
()
=
0
;
virtual
~
WorkTask
()
{
}
};
class
CudaContext
::
WorkThread
{
public:
struct
ThreadData
;
WorkThread
();
~
WorkThread
();
/**
* Request that a task be executed on the worker thread. The argument should have been allocated on the
* heap with the "new" operator. After its execute() method finishes, the object will be deleted automatically.
*/
void
addTask
(
CudaContext
::
WorkTask
*
task
);
/**
* Get whether the worker thread is idle, waiting for a task to be added.
*/
bool
isWaiting
();
/**
* Get whether the worker thread has exited.
*/
bool
isFinished
();
/**
* Block until all tasks have finished executing and the worker thread is idle.
*/
void
flush
();
private:
std
::
queue
<
CudaContext
::
WorkTask
*>
tasks
;
bool
waiting
,
finished
;
pthread_mutex_t
queueLock
;
pthread_cond_t
waitForTaskCondition
,
queueEmptyCondition
;
pthread_t
thread
;
};
/**
* This abstract class defines a function to be executed whenever atoms get reordered.
* Objects that need to know when reordering happens should create a reorderListener
* and register it by calling addReorderListener().
*/
class
CudaContext
::
ReorderListener
{
public:
virtual
void
execute
()
=
0
;
virtual
~
ReorderListener
()
{
}
};
}
// namespace OpenMM
#endif
/*OPENMM_CUDACONTEXT_H_*/
platforms/cuda2/src/CudaExpressionUtilities.cpp
0 → 100644
View file @
abb8cb4b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaExpressionUtilities.h"
#include "openmm/OpenMMException.h"
#include "openmm/internal/SplineFitter.h"
#include "lepton/Operation.h"
using
namespace
OpenMM
;
using
namespace
Lepton
;
using
namespace
std
;
string
CudaExpressionUtilities
::
doubleToString
(
double
value
)
{
stringstream
s
;
s
.
precision
(
8
);
s
<<
scientific
<<
value
<<
"f"
;
return
s
.
str
();
}
string
CudaExpressionUtilities
::
intToString
(
int
value
)
{
stringstream
s
;
s
<<
value
;
return
s
.
str
();
}
string
CudaExpressionUtilities
::
createExpressions
(
const
map
<
string
,
ParsedExpression
>&
expressions
,
const
map
<
string
,
string
>&
variables
,
const
vector
<
pair
<
string
,
string
>
>&
functions
,
const
string
&
prefix
,
const
string
&
functionParams
,
const
string
&
tempType
)
{
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
variableNodes
;
for
(
map
<
string
,
string
>::
const_iterator
iter
=
variables
.
begin
();
iter
!=
variables
.
end
();
++
iter
)
variableNodes
.
push_back
(
make_pair
(
ExpressionTreeNode
(
new
Operation
::
Variable
(
iter
->
first
)),
iter
->
second
));
return
createExpressions
(
expressions
,
variableNodes
,
functions
,
prefix
,
functionParams
,
tempType
);
}
string
CudaExpressionUtilities
::
createExpressions
(
const
map
<
string
,
ParsedExpression
>&
expressions
,
const
vector
<
pair
<
ExpressionTreeNode
,
string
>
>&
variables
,
const
vector
<
pair
<
string
,
string
>
>&
functions
,
const
string
&
prefix
,
const
string
&
functionParams
,
const
string
&
tempType
)
{
stringstream
out
;
vector
<
ParsedExpression
>
allExpressions
;
for
(
map
<
string
,
ParsedExpression
>::
const_iterator
iter
=
expressions
.
begin
();
iter
!=
expressions
.
end
();
++
iter
)
allExpressions
.
push_back
(
iter
->
second
);
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
temps
=
variables
;
for
(
map
<
string
,
ParsedExpression
>::
const_iterator
iter
=
expressions
.
begin
();
iter
!=
expressions
.
end
();
++
iter
)
{
processExpression
(
out
,
iter
->
second
.
getRootNode
(),
temps
,
functions
,
prefix
,
functionParams
,
allExpressions
,
tempType
);
out
<<
iter
->
first
<<
getTempName
(
iter
->
second
.
getRootNode
(),
temps
)
<<
";
\n
"
;
}
return
out
.
str
();
}
void
CudaExpressionUtilities
::
processExpression
(
stringstream
&
out
,
const
ExpressionTreeNode
&
node
,
vector
<
pair
<
ExpressionTreeNode
,
string
>
>&
temps
,
const
vector
<
pair
<
string
,
string
>
>&
functions
,
const
string
&
prefix
,
const
string
&
functionParams
,
const
vector
<
ParsedExpression
>&
allExpressions
,
const
string
&
tempType
)
{
for
(
int
i
=
0
;
i
<
(
int
)
temps
.
size
();
i
++
)
if
(
temps
[
i
].
first
==
node
)
return
;
for
(
int
i
=
0
;
i
<
(
int
)
node
.
getChildren
().
size
();
i
++
)
processExpression
(
out
,
node
.
getChildren
()[
i
],
temps
,
functions
,
prefix
,
functionParams
,
allExpressions
,
tempType
);
string
name
=
prefix
+
intToString
(
temps
.
size
());
bool
hasRecordedNode
=
false
;
out
<<
tempType
<<
" "
<<
name
<<
" = "
;
switch
(
node
.
getOperation
().
getId
())
{
case
Operation
::
CONSTANT
:
out
<<
doubleToString
(
dynamic_cast
<
const
Operation
::
Constant
*>
(
&
node
.
getOperation
())
->
getValue
());
break
;
case
Operation
::
VARIABLE
:
throw
OpenMMException
(
"Unknown variable in expression: "
+
node
.
getOperation
().
getName
());
case
Operation
::
CUSTOM
:
{
int
i
;
for
(
i
=
0
;
i
<
(
int
)
functions
.
size
()
&&
functions
[
i
].
first
!=
node
.
getOperation
().
getName
();
i
++
)
;
if
(
i
==
functions
.
size
())
throw
OpenMMException
(
"Unknown function in expression: "
+
node
.
getOperation
().
getName
());
bool
isDeriv
=
(
dynamic_cast
<
const
Operation
::
Custom
*>
(
&
node
.
getOperation
())
->
getDerivOrder
()[
0
]
==
1
);
out
<<
"0.0f;
\n
"
;
temps
.
push_back
(
make_pair
(
node
,
name
));
hasRecordedNode
=
true
;
// If both the value and derivative of the function are needed, it's faster to calculate them both
// at once, so check to see if both are needed.
const
ExpressionTreeNode
*
valueNode
=
NULL
;
const
ExpressionTreeNode
*
derivNode
=
NULL
;
for
(
int
j
=
0
;
j
<
(
int
)
allExpressions
.
size
();
j
++
)
findRelatedTabulatedFunctions
(
node
,
allExpressions
[
j
].
getRootNode
(),
valueNode
,
derivNode
);
string
valueName
=
name
;
string
derivName
=
name
;
if
(
valueNode
!=
NULL
&&
derivNode
!=
NULL
)
{
string
name2
=
prefix
+
intToString
(
temps
.
size
());
out
<<
tempType
<<
" "
<<
name2
<<
" = 0.0f;
\n
"
;
if
(
isDeriv
)
{
valueName
=
name2
;
temps
.
push_back
(
make_pair
(
*
valueNode
,
name2
));
}
else
{
derivName
=
name2
;
temps
.
push_back
(
make_pair
(
*
derivNode
,
name2
));
}
}
out
<<
"{
\n
"
;
out
<<
"float4 params = "
<<
functionParams
<<
"["
<<
i
<<
"];
\n
"
;
out
<<
"float x = "
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
";
\n
"
;
out
<<
"if (x >= params.x && x <= params.y) {
\n
"
;
out
<<
"x = (x-params.x)*params.z;
\n
"
;
out
<<
"int index = (int) (floor(x));
\n
"
;
out
<<
"index = min(index, (int) params.w);
\n
"
;
out
<<
"float4 coeff = "
<<
functions
[
i
].
second
<<
"[index];
\n
"
;
out
<<
"float b = x-index;
\n
"
;
out
<<
"float a = 1.0f-b;
\n
"
;
if
(
valueNode
!=
NULL
)
out
<<
valueName
<<
" = a*coeff.x+b*coeff.y+((a*a*a-a)*coeff.z+(b*b*b-b)*coeff.w)/(params.z*params.z);
\n
"
;
if
(
derivNode
!=
NULL
)
out
<<
derivName
<<
" = (coeff.y-coeff.x)*params.z+((1.0f-3.0f*a*a)*coeff.z+(3.0f*b*b-1.0f)*coeff.w)/params.z;
\n
"
;
out
<<
"}
\n
"
;
out
<<
"}"
;
break
;
}
case
Operation
::
ADD
:
out
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
"+"
<<
getTempName
(
node
.
getChildren
()[
1
],
temps
);
break
;
case
Operation
::
SUBTRACT
:
out
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
"-"
<<
getTempName
(
node
.
getChildren
()[
1
],
temps
);
break
;
case
Operation
::
MULTIPLY
:
out
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
"*"
<<
getTempName
(
node
.
getChildren
()[
1
],
temps
);
break
;
case
Operation
::
DIVIDE
:
{
bool
haveReciprocal
=
false
;
for
(
int
i
=
0
;
i
<
(
int
)
temps
.
size
();
i
++
)
if
(
temps
[
i
].
first
.
getOperation
().
getId
()
==
Operation
::
RECIPROCAL
&&
temps
[
i
].
first
.
getChildren
()[
0
]
==
node
.
getChildren
()[
1
])
{
haveReciprocal
=
true
;
out
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
"*"
<<
temps
[
i
].
second
;
}
if
(
!
haveReciprocal
)
out
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
"/"
<<
getTempName
(
node
.
getChildren
()[
1
],
temps
);
break
;
}
case
Operation
::
POWER
:
out
<<
"pow("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
", "
<<
getTempName
(
node
.
getChildren
()[
1
],
temps
)
<<
")"
;
break
;
case
Operation
::
NEGATE
:
out
<<
"-"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
break
;
case
Operation
::
SQRT
:
out
<<
"sqrt("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
EXP
:
out
<<
"EXP("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
LOG
:
out
<<
"LOG("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
SIN
:
out
<<
"sin("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
COS
:
out
<<
"cos("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
SEC
:
out
<<
"1.0f/cos("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
CSC
:
out
<<
"1.0f/sin("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
TAN
:
out
<<
"tan("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
COT
:
out
<<
"1.0f/tan("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
ASIN
:
out
<<
"asin("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
ACOS
:
out
<<
"acos("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
ATAN
:
out
<<
"atan("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
SINH
:
out
<<
"sinh("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
COSH
:
out
<<
"cosh("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
TANH
:
out
<<
"tanh("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
ERF
:
out
<<
"erf("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
ERFC
:
out
<<
"erfc("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
STEP
:
out
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
" >= 0.0f ? 1.0f : 0.0f"
;
break
;
case
Operation
::
DELTA
:
out
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
" == 0.0f ? 1.0f : 0.0f"
;
break
;
case
Operation
::
SQUARE
:
{
string
arg
=
getTempName
(
node
.
getChildren
()[
0
],
temps
);
out
<<
arg
<<
"*"
<<
arg
;
break
;
}
case
Operation
::
CUBE
:
{
string
arg
=
getTempName
(
node
.
getChildren
()[
0
],
temps
);
out
<<
arg
<<
"*"
<<
arg
<<
"*"
<<
arg
;
break
;
}
case
Operation
::
RECIPROCAL
:
out
<<
"RECIP("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
ADD_CONSTANT
:
out
<<
doubleToString
(
dynamic_cast
<
const
Operation
::
AddConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"+"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
break
;
case
Operation
::
MULTIPLY_CONSTANT
:
out
<<
doubleToString
(
dynamic_cast
<
const
Operation
::
MultiplyConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"*"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
break
;
case
Operation
::
POWER_CONSTANT
:
{
double
exponent
=
dynamic_cast
<
const
Operation
::
PowerConstant
*>
(
&
node
.
getOperation
())
->
getValue
();
if
(
exponent
==
0.0
)
out
<<
"1.0f"
;
else
if
(
exponent
==
(
int
)
exponent
)
{
out
<<
"0.0f;
\n
"
;
temps
.
push_back
(
make_pair
(
node
,
name
));
hasRecordedNode
=
true
;
// If multiple integral powers of the same base are needed, it's faster to calculate all of them
// at once, so check to see if others are also needed.
map
<
int
,
const
ExpressionTreeNode
*>
powers
;
powers
[(
int
)
exponent
]
=
&
node
;
for
(
int
j
=
0
;
j
<
(
int
)
allExpressions
.
size
();
j
++
)
findRelatedPowers
(
node
,
allExpressions
[
j
].
getRootNode
(),
powers
);
vector
<
int
>
exponents
;
vector
<
string
>
names
;
vector
<
bool
>
hasAssigned
(
powers
.
size
(),
false
);
exponents
.
push_back
((
int
)
fabs
(
exponent
));
names
.
push_back
(
name
);
for
(
map
<
int
,
const
ExpressionTreeNode
*>::
const_iterator
iter
=
powers
.
begin
();
iter
!=
powers
.
end
();
++
iter
)
{
if
(
iter
->
first
!=
exponent
)
{
exponents
.
push_back
(
iter
->
first
>=
0
?
iter
->
first
:
-
iter
->
first
);
string
name2
=
prefix
+
intToString
(
temps
.
size
());
names
.
push_back
(
name2
);
temps
.
push_back
(
make_pair
(
*
iter
->
second
,
name2
));
out
<<
tempType
<<
" "
<<
name2
<<
" = 0.0f;
\n
"
;
}
}
out
<<
"{
\n
"
;
out
<<
"float multiplier = "
<<
(
exponent
<
0.0
?
"1.0f/"
:
""
)
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
";
\n
"
;
bool
done
=
false
;
while
(
!
done
)
{
done
=
true
;
for
(
int
i
=
0
;
i
<
(
int
)
exponents
.
size
();
i
++
)
{
if
(
exponents
[
i
]
%
2
==
1
)
{
if
(
!
hasAssigned
[
i
])
out
<<
names
[
i
]
<<
" = multiplier;
\n
"
;
else
out
<<
names
[
i
]
<<
" *= multiplier;
\n
"
;
hasAssigned
[
i
]
=
true
;
}
exponents
[
i
]
>>=
1
;
if
(
exponents
[
i
]
!=
0
)
done
=
false
;
}
if
(
!
done
)
out
<<
"multiplier *= multiplier;
\n
"
;
}
out
<<
"}"
;
}
else
out
<<
"pow("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
", "
<<
doubleToString
(
exponent
)
<<
")"
;
break
;
}
case
Operation
::
MIN
:
out
<<
"min("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
", "
<<
getTempName
(
node
.
getChildren
()[
1
],
temps
)
<<
")"
;
break
;
case
Operation
::
MAX
:
out
<<
"max("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
", "
<<
getTempName
(
node
.
getChildren
()[
1
],
temps
)
<<
")"
;
break
;
case
Operation
::
ABS
:
out
<<
"fabs("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
default:
throw
OpenMMException
(
"Internal error: Unknown operation in user-defined expression: "
+
node
.
getOperation
().
getName
());
}
out
<<
";
\n
"
;
if
(
!
hasRecordedNode
)
temps
.
push_back
(
make_pair
(
node
,
name
));
}
string
CudaExpressionUtilities
::
getTempName
(
const
ExpressionTreeNode
&
node
,
const
vector
<
pair
<
ExpressionTreeNode
,
string
>
>&
temps
)
{
for
(
int
i
=
0
;
i
<
(
int
)
temps
.
size
();
i
++
)
if
(
temps
[
i
].
first
==
node
)
return
temps
[
i
].
second
;
stringstream
out
;
out
<<
"Internal error: No temporary variable for expression node: "
<<
node
;
throw
OpenMMException
(
out
.
str
());
}
void
CudaExpressionUtilities
::
findRelatedTabulatedFunctions
(
const
ExpressionTreeNode
&
node
,
const
ExpressionTreeNode
&
searchNode
,
const
ExpressionTreeNode
*&
valueNode
,
const
ExpressionTreeNode
*&
derivNode
)
{
if
(
searchNode
.
getOperation
().
getId
()
==
Operation
::
CUSTOM
&&
node
.
getChildren
()[
0
]
==
searchNode
.
getChildren
()[
0
])
{
if
(
dynamic_cast
<
const
Operation
::
Custom
*>
(
&
searchNode
.
getOperation
())
->
getDerivOrder
()[
0
]
==
0
)
valueNode
=
&
searchNode
;
else
derivNode
=
&
searchNode
;
}
else
for
(
int
i
=
0
;
i
<
(
int
)
searchNode
.
getChildren
().
size
();
i
++
)
findRelatedTabulatedFunctions
(
node
,
searchNode
.
getChildren
()[
i
],
valueNode
,
derivNode
);
}
void
CudaExpressionUtilities
::
findRelatedPowers
(
const
ExpressionTreeNode
&
node
,
const
ExpressionTreeNode
&
searchNode
,
map
<
int
,
const
ExpressionTreeNode
*>&
powers
)
{
if
(
searchNode
.
getOperation
().
getId
()
==
Operation
::
POWER_CONSTANT
&&
node
.
getChildren
()[
0
]
==
searchNode
.
getChildren
()[
0
])
{
double
realPower
=
dynamic_cast
<
const
Operation
::
PowerConstant
*>
(
&
searchNode
.
getOperation
())
->
getValue
();
int
power
=
(
int
)
realPower
;
if
(
power
!=
realPower
)
return
;
// We are only interested in integer powers.
if
(
powers
.
find
(
power
)
!=
powers
.
end
())
return
;
// This power is already in the map.
if
(
powers
.
begin
()
->
first
*
power
<
0
)
return
;
// All powers must have the same sign.
powers
[
power
]
=
&
searchNode
;
}
else
for
(
int
i
=
0
;
i
<
(
int
)
searchNode
.
getChildren
().
size
();
i
++
)
findRelatedPowers
(
node
,
searchNode
.
getChildren
()[
i
],
powers
);
}
vector
<
float4
>
CudaExpressionUtilities
::
computeFunctionCoefficients
(
const
vector
<
double
>&
values
,
double
min
,
double
max
)
{
// Compute the spline coefficients.
int
numValues
=
values
.
size
();
vector
<
double
>
x
(
numValues
),
derivs
;
for
(
int
i
=
0
;
i
<
numValues
;
i
++
)
x
[
i
]
=
min
+
i
*
(
max
-
min
)
/
(
numValues
-
1
);
SplineFitter
::
createNaturalSpline
(
x
,
values
,
derivs
);
vector
<
float4
>
f
(
numValues
-
1
);
for
(
int
i
=
0
;
i
<
(
int
)
values
.
size
()
-
1
;
i
++
)
f
[
i
]
=
make_float4
((
float
)
values
[
i
],
(
float
)
values
[
i
+
1
],
(
float
)
(
derivs
[
i
]
/
6.0
),
(
float
)
(
derivs
[
i
+
1
]
/
6.0
));
return
f
;
}
platforms/cuda2/src/CudaExpressionUtilities.h
0 → 100644
View file @
abb8cb4b
#ifndef OPENMM_CUDAEXPRESSIONUTILITIES_H_
#define OPENMM_CUDAEXPRESSIONUTILITIES_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaContext.h"
#include "lepton/CustomFunction.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/ParsedExpression.h"
#include <map>
#include <sstream>
#include <string>
#include <utility>
namespace
OpenMM
{
/**
* This class is used by various classes to generate CUDA source code implementing
* user defined mathematical expressions.
*/
class
OPENMM_EXPORT
CudaExpressionUtilities
{
public:
/**
* Generate the source code for calculating a set of expressions.
*
* @param expressions the expressions to generate code for (keys are the variables to store the output values in)
* @param variables defines the source code to generate for each variable that may appear in the expressions. Keys are
* variable names, and the values are the code to generate for them.
* @param functions defines the variable name for each tabulated function that may appear in the expressions
* @param prefix a prefix to put in front of temporary variables
* @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "float")
*/
static
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
variables
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"float"
);
/**
* Generate the source code for calculating a set of expressions.
*
* @param expressions the expressions to generate code for (keys are the variables to store the output values in)
* @param variables defines the source code to generate for each variable or precomputed sub-expression that may appear in the expressions.
* Each entry is an ExpressionTreeNode, and the code to generate wherever an identical node appears.
* @param functions defines the variable name for each tabulated function that may appear in the expressions
* @param prefix a prefix to put in front of temporary variables
* @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "float")
*/
static
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variables
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"float"
);
/**
* Calculate the spline coefficients for a tabulated function that appears in expressions.
*
* @param values the tabulated values of the function
* @param min the value of the independent variable corresponding to the first element of values
* @param max the value of the independent variable corresponding to the last element of values
* @return the spline coefficients
*/
static
std
::
vector
<
float4
>
computeFunctionCoefficients
(
const
std
::
vector
<
double
>&
values
,
double
min
,
double
max
);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static
std
::
string
doubleToString
(
double
value
);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static
std
::
string
intToString
(
int
value
);
class
FunctionPlaceholder
;
private:
static
void
processExpression
(
std
::
stringstream
&
out
,
const
Lepton
::
ExpressionTreeNode
&
node
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
vector
<
Lepton
::
ParsedExpression
>&
allExpressions
,
const
std
::
string
&
tempType
);
static
std
::
string
getTempName
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
);
static
void
findRelatedTabulatedFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
const
Lepton
::
ExpressionTreeNode
*&
valueNode
,
const
Lepton
::
ExpressionTreeNode
*&
derivNode
);
static
void
findRelatedPowers
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
std
::
map
<
int
,
const
Lepton
::
ExpressionTreeNode
*>&
powers
);
};
/**
* This class serves as a placeholder for custom functions in expressions.
*/
class
CudaExpressionUtilities
::
FunctionPlaceholder
:
public
Lepton
::
CustomFunction
{
public:
int
getNumArguments
()
const
{
return
1
;
}
double
evaluate
(
const
double
*
arguments
)
const
{
return
0.0
;
}
double
evaluateDerivative
(
const
double
*
arguments
,
const
int
*
derivOrder
)
const
{
return
0.0
;
}
CustomFunction
*
clone
()
const
{
return
new
FunctionPlaceholder
();
}
};
}
// namespace OpenMM
#endif
/*OPENMM_CUDAEXPRESSIONUTILITIES_H_*/
platforms/cuda2/src/CudaForceInfo.cpp
0 → 100644
View file @
abb8cb4b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaForceInfo.h"
using
namespace
OpenMM
;
using
namespace
std
;
bool
CudaForceInfo
::
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
return
true
;
}
int
CudaForceInfo
::
getNumParticleGroups
()
{
return
0
;
}
void
CudaForceInfo
::
getParticlesInGroup
(
int
index
,
vector
<
int
>&
particles
)
{
return
;
}
bool
CudaForceInfo
::
areGroupsIdentical
(
int
group1
,
int
group2
)
{
return
true
;
}
platforms/cuda2/src/CudaForceInfo.h
0 → 100644
View file @
abb8cb4b
#ifndef OPENMM_CUDAFORCEINFO_H_
#define OPENMM_CUDAFORCEINFO_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/internal/windowsExport.h"
#include <vector>
namespace
OpenMM
{
/**
* This class is used by the Cuda implementation of a Force class to convey information
* about the behavior and requirements of that force.
*/
class
OPENMM_EXPORT
CudaForceInfo
{
public:
CudaForceInfo
(
int
requiredForceBuffers
)
:
requiredForceBuffers
(
requiredForceBuffers
)
{
}
/**
* Get the number of force buffers this force requires.
*/
int
getRequiredForceBuffers
()
{
return
requiredForceBuffers
;
}
/**
* Get whether or not two particles have identical force field parameters.
*/
virtual
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
);
/**
* Get the number of particle groups defined by this force.
*/
virtual
int
getNumParticleGroups
();
/**
* Get the list of particles in a particular group.
*/
virtual
void
getParticlesInGroup
(
int
index
,
std
::
vector
<
int
>&
particles
);
/**
* Get whether two particle groups are identical.
*/
virtual
bool
areGroupsIdentical
(
int
group1
,
int
group2
);
private:
int
requiredForceBuffers
;
};
}
// namespace OpenMM
#endif
/*OPENMM_CUDAFORCEINFO_H_*/
platforms/cuda2/src/CudaKernelFactory.cpp
0 → 100644
View file @
abb8cb4b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaKernelFactory.h"
//#include "CudaParallelKernels.h"
#include "CudaPlatform.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
using
namespace
OpenMM
;
KernelImpl
*
CudaKernelFactory
::
createKernelImpl
(
std
::
string
name
,
const
Platform
&
platform
,
ContextImpl
&
context
)
const
{
CudaPlatform
::
PlatformData
&
data
=
*
static_cast
<
CudaPlatform
::
PlatformData
*>
(
context
.
getPlatformData
());
// if (data.contexts.size() > 1) {
// // We are running in parallel on multiple devices, so we may want to create a parallel kernel.
//
// if (name == CalcForcesAndEnergyKernel::Name())
// return new CudaParallelCalcForcesAndEnergyKernel(name, platform, data);
// if (name == CalcHarmonicBondForceKernel::Name())
// return new CudaParallelCalcHarmonicBondForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomBondForceKernel::Name())
// return new CudaParallelCalcCustomBondForceKernel(name, platform, data, context.getSystem());
// if (name == CalcHarmonicAngleForceKernel::Name())
// return new CudaParallelCalcHarmonicAngleForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomAngleForceKernel::Name())
// return new CudaParallelCalcCustomAngleForceKernel(name, platform, data, context.getSystem());
// if (name == CalcPeriodicTorsionForceKernel::Name())
// return new CudaParallelCalcPeriodicTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcRBTorsionForceKernel::Name())
// return new CudaParallelCalcRBTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCMAPTorsionForceKernel::Name())
// return new CudaParallelCalcCMAPTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomTorsionForceKernel::Name())
// return new CudaParallelCalcCustomTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcNonbondedForceKernel::Name())
// return new CudaParallelCalcNonbondedForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomNonbondedForceKernel::Name())
// return new CudaParallelCalcCustomNonbondedForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomExternalForceKernel::Name())
// return new CudaParallelCalcCustomExternalForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomHbondForceKernel::Name())
// return new CudaParallelCalcCustomHbondForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomCompoundBondForceKernel::Name())
// return new CudaParallelCalcCustomCompoundBondForceKernel(name, platform, data, context.getSystem());
// }
// CudaContext& cl = *data.contexts[0];
// if (name == CalcForcesAndEnergyKernel::Name())
// return new CudaCalcForcesAndEnergyKernel(name, platform, cl);
// if (name == UpdateStateDataKernel::Name())
// return new CudaUpdateStateDataKernel(name, platform, cl);
// if (name == ApplyConstraintsKernel::Name())
// return new CudaApplyConstraintsKernel(name, platform, cl);
// if (name == VirtualSitesKernel::Name())
// return new CudaVirtualSitesKernel(name, platform, cl);
// if (name == CalcHarmonicBondForceKernel::Name())
// return new CudaCalcHarmonicBondForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomBondForceKernel::Name())
// return new CudaCalcCustomBondForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcHarmonicAngleForceKernel::Name())
// return new CudaCalcHarmonicAngleForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomAngleForceKernel::Name())
// return new CudaCalcCustomAngleForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcPeriodicTorsionForceKernel::Name())
// return new CudaCalcPeriodicTorsionForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcRBTorsionForceKernel::Name())
// return new CudaCalcRBTorsionForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCMAPTorsionForceKernel::Name())
// return new CudaCalcCMAPTorsionForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomTorsionForceKernel::Name())
// return new CudaCalcCustomTorsionForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcNonbondedForceKernel::Name())
// return new CudaCalcNonbondedForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomNonbondedForceKernel::Name())
// return new CudaCalcCustomNonbondedForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcGBSAOBCForceKernel::Name())
// return new CudaCalcGBSAOBCForceKernel(name, platform, cl);
// if (name == CalcCustomGBForceKernel::Name())
// return new CudaCalcCustomGBForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomExternalForceKernel::Name())
// return new CudaCalcCustomExternalForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomHbondForceKernel::Name())
// return new CudaCalcCustomHbondForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomCompoundBondForceKernel::Name())
// return new CudaCalcCustomCompoundBondForceKernel(name, platform, cl, context.getSystem());
// if (name == IntegrateVerletStepKernel::Name())
// return new CudaIntegrateVerletStepKernel(name, platform, cl);
// if (name == IntegrateLangevinStepKernel::Name())
// return new CudaIntegrateLangevinStepKernel(name, platform, cl);
// if (name == IntegrateBrownianStepKernel::Name())
// return new CudaIntegrateBrownianStepKernel(name, platform, cl);
// if (name == IntegrateVariableVerletStepKernel::Name())
// return new CudaIntegrateVariableVerletStepKernel(name, platform, cl);
// if (name == IntegrateVariableLangevinStepKernel::Name())
// return new CudaIntegrateVariableLangevinStepKernel(name, platform, cl);
// if (name == IntegrateCustomStepKernel::Name())
// return new CudaIntegrateCustomStepKernel(name, platform, cl);
// if (name == ApplyAndersenThermostatKernel::Name())
// return new CudaApplyAndersenThermostatKernel(name, platform, cl);
// if (name == ApplyMonteCarloBarostatKernel::Name())
// return new CudaApplyMonteCarloBarostatKernel(name, platform, cl);
// if (name == CalcKineticEnergyKernel::Name())
// return new CudaCalcKineticEnergyKernel(name, platform, cl);
// if (name == RemoveCMMotionKernel::Name())
// return new CudaRemoveCMMotionKernel(name, platform, cl);
throw
OpenMMException
((
std
::
string
(
"Tried to create kernel with illegal kernel name '"
)
+
name
+
"'"
).
c_str
());
}
platforms/cuda2/src/CudaKernelSources.cpp.in
0 → 100644
View file @
abb8cb4b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaKernelSources.h"
using namespace OpenMM;
using namespace std;
platforms/cuda2/src/CudaKernelSources.h.in
0 → 100644
View file @
abb8cb4b
#ifndef OPENMM_CUDAKERNELSOURCES_H_
#define OPENMM_CUDAKERNELSOURCES_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/internal/windowsExport.h"
#include <string>
namespace OpenMM {
/**
* This class is a central holding place for the source code of CUDA kernels.
* The CMake build script inserts declarations into it based on the .cu files in the
* kernels subfolder.
*/
class OPENMM_EXPORT CudaKernelSources {
public:
@CUDA_FILE_DECLARATIONS@
};
} // namespace OpenMM
#endif /*OPENMM_CUDAKERNELSOURCES_H_*/
platforms/cuda2/src/CudaKernels.h
0 → 100644
View file @
abb8cb4b
This diff is collapsed.
Click to expand it.
platforms/cuda2/src/CudaPlatform.cpp
0 → 100644
View file @
abb8cb4b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaContext.h"
#include "CudaExpressionUtilities.h"
#include "CudaPlatform.h"
#include "CudaKernelFactory.h"
#include "CudaKernels.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/Context.h"
#include "openmm/System.h"
#include <algorithm>
#include <cctype>
#include <sstream>
using
namespace
OpenMM
;
using
namespace
std
;
extern
"C"
OPENMM_EXPORT
void
registerPlatforms
()
{
Platform
::
registerPlatform
(
new
CudaPlatform
());
}
CudaPlatform
::
CudaPlatform
()
{
CudaKernelFactory
*
factory
=
new
CudaKernelFactory
();
registerKernelFactory
(
CalcForcesAndEnergyKernel
::
Name
(),
factory
);
registerKernelFactory
(
UpdateStateDataKernel
::
Name
(),
factory
);
registerKernelFactory
(
ApplyConstraintsKernel
::
Name
(),
factory
);
registerKernelFactory
(
VirtualSitesKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcHarmonicBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcHarmonicAngleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomAngleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcPeriodicTorsionForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcRBTorsionForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCMAPTorsionForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomTorsionForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcNonbondedForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomNonbondedForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGBSAOBCForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomGBForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomExternalForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomHbondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateBrownianStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVariableVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVariableLangevinStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateCustomStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
ApplyAndersenThermostatKernel
::
Name
(),
factory
);
registerKernelFactory
(
ApplyMonteCarloBarostatKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcKineticEnergyKernel
::
Name
(),
factory
);
registerKernelFactory
(
RemoveCMMotionKernel
::
Name
(),
factory
);
platformProperties
.
push_back
(
CudaDeviceIndex
());
platformProperties
.
push_back
(
CudaUseBlockingSync
());
platformProperties
.
push_back
(
CudaPrecision
());
platformProperties
.
push_back
(
CudaCompiler
());
platformProperties
.
push_back
(
CudaTempDirectory
());
setPropertyDefaultValue
(
CudaDeviceIndex
(),
""
);
setPropertyDefaultValue
(
CudaUseBlockingSync
(),
"true"
);
setPropertyDefaultValue
(
CudaPrecision
(),
"single"
);
#ifdef _MSC_VER
setPropertyDefaultValue
(
CudaCompiler
(),
"nvcc"
);
setPropertyDefaultValue
(
CudaTempDirectory
(),
string
(
getenv
(
"TEMP"
)));
#else
setPropertyDefaultValue
(
CudaCompiler
(),
"/usr/local/cuda/bin/nvcc"
);
setPropertyDefaultValue
(
CudaTempDirectory
(),
string
(
getenv
(
"TMPDIR"
)));
#endif
}
bool
CudaPlatform
::
supportsDoublePrecision
()
const
{
return
false
;
}
const
string
&
CudaPlatform
::
getPropertyValue
(
const
Context
&
context
,
const
string
&
property
)
const
{
const
ContextImpl
&
impl
=
getContextImpl
(
context
);
const
PlatformData
*
data
=
reinterpret_cast
<
const
PlatformData
*>
(
impl
.
getPlatformData
());
map
<
string
,
string
>::
const_iterator
value
=
data
->
propertyValues
.
find
(
property
);
if
(
value
!=
data
->
propertyValues
.
end
())
return
value
->
second
;
return
Platform
::
getPropertyValue
(
context
,
property
);
}
void
CudaPlatform
::
setPropertyValue
(
Context
&
context
,
const
string
&
property
,
const
string
&
value
)
const
{
}
void
CudaPlatform
::
contextCreated
(
ContextImpl
&
context
,
const
map
<
string
,
string
>&
properties
)
const
{
const
string
&
devicePropValue
=
(
properties
.
find
(
CudaDeviceIndex
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaDeviceIndex
())
:
properties
.
find
(
CudaDeviceIndex
())
->
second
);
string
blockingPropValue
=
(
properties
.
find
(
CudaUseBlockingSync
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaUseBlockingSync
())
:
properties
.
find
(
CudaUseBlockingSync
())
->
second
);
string
precisionPropValue
=
(
properties
.
find
(
CudaPrecision
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaPrecision
())
:
properties
.
find
(
CudaPrecision
())
->
second
);
const
string
&
compilerPropValue
=
(
properties
.
find
(
CudaCompiler
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaCompiler
())
:
properties
.
find
(
CudaCompiler
())
->
second
);
const
string
&
tempPropValue
=
(
properties
.
find
(
CudaTempDirectory
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaTempDirectory
())
:
properties
.
find
(
CudaTempDirectory
())
->
second
);
transform
(
blockingPropValue
.
begin
(),
blockingPropValue
.
end
(),
blockingPropValue
.
begin
(),
::
tolower
);
transform
(
precisionPropValue
.
begin
(),
precisionPropValue
.
end
(),
precisionPropValue
.
begin
(),
::
tolower
);
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
compilerPropValue
,
tempPropValue
));
}
void
CudaPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
PlatformData
*
data
=
reinterpret_cast
<
PlatformData
*>
(
context
.
getPlatformData
());
delete
data
;
}
CudaPlatform
::
PlatformData
::
PlatformData
(
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
)
:
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
bool
blocking
=
(
blockingProperty
==
"true"
);
vector
<
string
>
devices
;
size_t
searchPos
=
0
,
nextPos
;
while
((
nextPos
=
deviceIndexProperty
.
find_first_of
(
", "
,
searchPos
))
!=
string
::
npos
)
{
devices
.
push_back
(
deviceIndexProperty
.
substr
(
searchPos
,
nextPos
-
searchPos
));
searchPos
=
nextPos
+
1
;
}
devices
.
push_back
(
deviceIndexProperty
.
substr
(
searchPos
));
for
(
int
i
=
0
;
i
<
(
int
)
devices
.
size
();
i
++
)
{
if
(
devices
[
i
].
length
()
>
0
)
{
unsigned
int
deviceIndex
;
stringstream
(
devices
[
i
])
>>
deviceIndex
;
contexts
.
push_back
(
new
CudaContext
(
system
,
deviceIndex
,
blocking
,
precisionProperty
,
compilerProperty
,
tempProperty
,
*
this
));
}
}
if
(
contexts
.
size
()
==
0
)
contexts
.
push_back
(
new
CudaContext
(
system
,
-
1
,
blocking
,
precisionProperty
,
compilerProperty
,
tempProperty
,
*
this
));
stringstream
device
;
for
(
int
i
=
0
;
i
<
(
int
)
contexts
.
size
();
i
++
)
{
if
(
i
>
0
)
device
<<
','
;
device
<<
contexts
[
i
]
->
getDeviceIndex
();
}
propertyValues
[
CudaPlatform
::
CudaDeviceIndex
()]
=
device
.
str
();
propertyValues
[
CudaPlatform
::
CudaPrecision
()]
=
precisionProperty
;
propertyValues
[
CudaPlatform
::
CudaCompiler
()]
=
compilerProperty
;
propertyValues
[
CudaPlatform
::
CudaTempDirectory
()]
=
tempProperty
;
contextEnergy
.
resize
(
contexts
.
size
());
}
CudaPlatform
::
PlatformData
::~
PlatformData
()
{
for
(
int
i
=
0
;
i
<
(
int
)
contexts
.
size
();
i
++
)
delete
contexts
[
i
];
}
void
CudaPlatform
::
PlatformData
::
initializeContexts
(
const
System
&
system
)
{
// for (int i = 0; i < (int) contexts.size(); i++)
// contexts[i]->initialize();
}
void
CudaPlatform
::
PlatformData
::
syncContexts
()
{
// for (int i = 0; i < (int) contexts.size(); i++)
// contexts[i]->getWorkThread().flush();
}
platforms/cuda2/src/kernels/utilities.cu
0 → 100644
View file @
abb8cb4b
/**
* This is called by the various functions below to clear a buffer.
*/
__device__
void
clearSingleBuffer
(
int
*
__restrict__
buffer
,
int
size
)
{
int
index
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
int4
*
buffer4
=
(
int4
*
)
buffer
;
int
sizeDiv4
=
size
/
4
;
while
(
index
<
sizeDiv4
)
{
buffer4
[
index
]
=
make_int4
(
0
);
index
+=
blockDim
.
x
*
gridDim
.
x
;
}
if
(
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
==
0
)
for
(
int
i
=
sizeDiv4
*
4
;
i
<
size
;
i
++
)
buffer
[
i
]
=
0
;
}
/**
* Fill a buffer with 0.
*/
__global__
void
clearBuffer
(
int
*
__restrict__
buffer
,
int
size
)
{
clearSingleBuffer
(
buffer
,
size
);
}
/**
* Fill two buffers with 0.
*/
__global__
void
clearTwoBuffers
(
int
*
__restrict__
buffer1
,
int
size1
,
int
*
__restrict__
buffer2
,
int
size2
)
{
clearSingleBuffer
(
buffer1
,
size1
);
clearSingleBuffer
(
buffer2
,
size2
);
}
/**
* Fill three buffers with 0.
*/
__global__
void
clearThreeBuffers
(
int
*
__restrict__
buffer1
,
int
size1
,
int
*
__restrict__
buffer2
,
int
size2
,
int
*
__restrict__
buffer3
,
int
size3
)
{
clearSingleBuffer
(
buffer1
,
size1
);
clearSingleBuffer
(
buffer2
,
size2
);
clearSingleBuffer
(
buffer3
,
size3
);
}
/**
* Fill four buffers with 0.
*/
__global__
void
clearFourBuffers
(
int
*
__restrict__
buffer1
,
int
size1
,
int
*
__restrict__
buffer2
,
int
size2
,
int
*
__restrict__
buffer3
,
int
size3
,
int
*
__restrict__
buffer4
,
int
size4
)
{
clearSingleBuffer
(
buffer1
,
size1
);
clearSingleBuffer
(
buffer2
,
size2
);
clearSingleBuffer
(
buffer3
,
size3
);
clearSingleBuffer
(
buffer4
,
size4
);
}
/**
* Fill five buffers with 0.
*/
__global__
void
clearFiveBuffers
(
int
*
__restrict__
buffer1
,
int
size1
,
int
*
__restrict__
buffer2
,
int
size2
,
int
*
__restrict__
buffer3
,
int
size3
,
int
*
__restrict__
buffer4
,
int
size4
,
int
*
__restrict__
buffer5
,
int
size5
)
{
clearSingleBuffer
(
buffer1
,
size1
);
clearSingleBuffer
(
buffer2
,
size2
);
clearSingleBuffer
(
buffer3
,
size3
);
clearSingleBuffer
(
buffer4
,
size4
);
clearSingleBuffer
(
buffer5
,
size5
);
}
/**
* Fill six buffers with 0.
*/
__global__
void
clearSixBuffers
(
int
*
__restrict__
buffer1
,
int
size1
,
int
*
__restrict__
buffer2
,
int
size2
,
int
*
__restrict__
buffer3
,
int
size3
,
int
*
__restrict__
buffer4
,
int
size4
,
int
*
__restrict__
buffer5
,
int
size5
,
int
*
__restrict__
buffer6
,
int
size6
)
{
clearSingleBuffer
(
buffer1
,
size1
);
clearSingleBuffer
(
buffer2
,
size2
);
clearSingleBuffer
(
buffer3
,
size3
);
clearSingleBuffer
(
buffer4
,
size4
);
clearSingleBuffer
(
buffer5
,
size5
);
clearSingleBuffer
(
buffer6
,
size6
);
}
/**
* Sum a collection of buffers into the first one.
*/
__global__
void
reduceFloat4Buffer
(
float4
*
__restrict__
buffer
,
int
bufferSize
,
int
numBuffers
)
{
int
index
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
int
totalSize
=
bufferSize
*
numBuffers
;
while
(
index
<
bufferSize
)
{
float4
sum
=
buffer
[
index
];
for
(
int
i
=
index
+
bufferSize
;
i
<
totalSize
;
i
+=
bufferSize
)
sum
+=
buffer
[
i
];
buffer
[
index
]
=
sum
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
/**
* Sum the various buffers containing forces.
*/
__global__
void
reduceForces
(
const
long
*
__restrict__
longBuffer
,
float4
*
__restrict__
buffer
,
int
bufferSize
,
int
numBuffers
)
{
int
totalSize
=
bufferSize
*
numBuffers
;
float
scale
=
1.0
f
/
(
float
)
0xFFFFFFFF
;
for
(
int
index
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
index
<
bufferSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
float4
sum
=
make_float4
(
scale
*
longBuffer
[
index
],
scale
*
longBuffer
[
index
+
bufferSize
],
scale
*
longBuffer
[
index
+
2
*
bufferSize
],
0.0
f
);
for
(
int
i
=
index
;
i
<
totalSize
;
i
+=
bufferSize
)
sum
+=
buffer
[
i
];
buffer
[
index
]
=
sum
;
}
}
platforms/cuda2/src/kernels/vectorOps.cu
0 → 100644
View file @
abb8cb4b
/**
* This file defines vector operations to simplify code elsewhere.
*/
// Versions of make_x() that take a single value and set all components to that.
inline
__device__
int2
make_int2
(
int
a
)
{
return
make_int2
(
a
,
a
);
}
inline
__device__
int3
make_int3
(
int
a
)
{
return
make_int3
(
a
,
a
,
a
);
}
inline
__device__
int4
make_int4
(
int
a
)
{
return
make_int4
(
a
,
a
,
a
,
a
);
}
inline
__device__
float2
make_float2
(
float
a
)
{
return
make_float2
(
a
,
a
);
}
inline
__device__
float3
make_float3
(
float
a
)
{
return
make_float3
(
a
,
a
,
a
);
}
inline
__device__
float4
make_float4
(
float
a
)
{
return
make_float4
(
a
,
a
,
a
,
a
);
}
inline
__device__
double2
make_double2
(
double
a
)
{
return
make_double2
(
a
,
a
);
}
inline
__device__
double3
make_double3
(
double
a
)
{
return
make_double3
(
a
,
a
,
a
);
}
inline
__device__
double4
make_double4
(
double
a
)
{
return
make_double4
(
a
,
a
,
a
,
a
);
}
// Negate a vector.
inline
__device__
int2
operator
*
(
int2
a
)
{
return
make_int2
(
-
a
.
x
,
-
a
.
y
);
}
inline
__device__
int3
operator
-
(
int3
a
)
{
return
make_int3
(
-
a
.
x
,
-
a
.
y
,
-
a
.
z
);
}
inline
__device__
int4
operator
-
(
int4
a
)
{
return
make_int4
(
-
a
.
x
,
-
a
.
y
,
-
a
.
z
,
-
a
.
w
);
}
inline
__device__
float2
operator
-
(
float2
a
)
{
return
make_float2
(
-
a
.
x
,
-
a
.
y
);
}
inline
__device__
float3
operator
-
(
float3
a
)
{
return
make_float3
(
-
a
.
x
,
-
a
.
y
,
-
a
.
z
);
}
inline
__device__
float4
operator
-
(
float4
a
)
{
return
make_float4
(
-
a
.
x
,
-
a
.
y
,
-
a
.
z
,
-
a
.
w
);
}
inline
__device__
double2
operator
-
(
double2
a
)
{
return
make_double2
(
-
a
.
x
,
-
a
.
y
);
}
inline
__device__
double3
operator
-
(
double3
a
)
{
return
make_double3
(
-
a
.
x
,
-
a
.
y
,
-
a
.
z
);
}
inline
__device__
double4
operator
-
(
double4
a
)
{
return
make_double4
(
-
a
.
x
,
-
a
.
y
,
-
a
.
z
,
-
a
.
w
);
}
// Add two vectors.
inline
__device__
int2
operator
+
(
int2
a
,
int2
b
)
{
return
make_int2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
inline
__device__
int3
operator
+
(
int3
a
,
int3
b
)
{
return
make_int3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
inline
__device__
int4
operator
+
(
int4
a
,
int4
b
)
{
return
make_int4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
inline
__device__
float2
operator
+
(
float2
a
,
float2
b
)
{
return
make_float2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
inline
__device__
float3
operator
+
(
float3
a
,
float3
b
)
{
return
make_float3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
inline
__device__
float4
operator
+
(
float4
a
,
float4
b
)
{
return
make_float4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
inline
__device__
double2
operator
+
(
double2
a
,
double2
b
)
{
return
make_double2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
inline
__device__
double3
operator
+
(
double3
a
,
double3
b
)
{
return
make_double3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
inline
__device__
double4
operator
+
(
double4
a
,
double4
b
)
{
return
make_double4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
// Subtract two vectors.
inline
__device__
int2
operator
-
(
int2
a
,
int2
b
)
{
return
make_int2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
inline
__device__
int3
operator
-
(
int3
a
,
int3
b
)
{
return
make_int3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
inline
__device__
int4
operator
-
(
int4
a
,
int4
b
)
{
return
make_int4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
inline
__device__
float2
operator
-
(
float2
a
,
float2
b
)
{
return
make_float2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
inline
__device__
float3
operator
-
(
float3
a
,
float3
b
)
{
return
make_float3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
inline
__device__
float4
operator
-
(
float4
a
,
float4
b
)
{
return
make_float4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
inline
__device__
double2
operator
-
(
double2
a
,
double2
b
)
{
return
make_double2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
inline
__device__
double3
operator
-
(
double3
a
,
double3
b
)
{
return
make_double3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
inline
__device__
double4
operator
-
(
double4
a
,
double4
b
)
{
return
make_double4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
// Multiply two vectors.
inline
__device__
int2
operator
*
(
int2
a
,
int2
b
)
{
return
make_int2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
inline
__device__
int3
operator
*
(
int3
a
,
int3
b
)
{
return
make_int3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
inline
__device__
int4
operator
*
(
int4
a
,
int4
b
)
{
return
make_int4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
inline
__device__
float2
operator
*
(
float2
a
,
float2
b
)
{
return
make_float2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
inline
__device__
float3
operator
*
(
float3
a
,
float3
b
)
{
return
make_float3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
inline
__device__
float4
operator
*
(
float4
a
,
float4
b
)
{
return
make_float4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
inline
__device__
double2
operator
*
(
double2
a
,
double2
b
)
{
return
make_double2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
inline
__device__
double3
operator
*
(
double3
a
,
double3
b
)
{
return
make_double3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
inline
__device__
double4
operator
*
(
double4
a
,
double4
b
)
{
return
make_double4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
// Divide two vectors.
inline
__device__
int2
operator
/
(
int2
a
,
int2
b
)
{
return
make_int2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
inline
__device__
int3
operator
/
(
int3
a
,
int3
b
)
{
return
make_int3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
inline
__device__
int4
operator
/
(
int4
a
,
int4
b
)
{
return
make_int4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
inline
__device__
float2
operator
/
(
float2
a
,
float2
b
)
{
return
make_float2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
inline
__device__
float3
operator
/
(
float3
a
,
float3
b
)
{
return
make_float3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
inline
__device__
float4
operator
/
(
float4
a
,
float4
b
)
{
return
make_float4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
inline
__device__
double2
operator
/
(
double2
a
,
double2
b
)
{
return
make_double2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
inline
__device__
double3
operator
/
(
double3
a
,
double3
b
)
{
return
make_double3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
inline
__device__
double4
operator
/
(
double4
a
,
double4
b
)
{
return
make_double4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
// += operator
inline
__device__
void
operator
+=
(
int2
&
a
,
int2
b
)
{
a
.
x
+=
b
.
x
;
a
.
y
+=
b
.
y
;
}
inline
__device__
void
operator
+=
(
int3
&
a
,
int3
b
)
{
a
.
x
+=
b
.
x
;
a
.
y
+=
b
.
y
;
a
.
z
+=
b
.
z
;
}
inline
__device__
void
operator
+=
(
int4
&
a
,
int4
b
)
{
a
.
x
+=
b
.
x
;
a
.
y
+=
b
.
y
;
a
.
z
+=
b
.
z
;
a
.
w
+=
b
.
w
;
}
inline
__device__
void
operator
+=
(
float2
&
a
,
float2
b
)
{
a
.
x
+=
b
.
x
;
a
.
y
+=
b
.
y
;
}
inline
__device__
void
operator
+=
(
float3
&
a
,
float3
b
)
{
a
.
x
+=
b
.
x
;
a
.
y
+=
b
.
y
;
a
.
z
+=
b
.
z
;
}
inline
__device__
void
operator
+=
(
float4
&
a
,
float4
b
)
{
a
.
x
+=
b
.
x
;
a
.
y
+=
b
.
y
;
a
.
z
+=
b
.
z
;
a
.
w
+=
b
.
w
;
}
inline
__device__
void
operator
+=
(
double2
&
a
,
double2
b
)
{
a
.
x
+=
b
.
x
;
a
.
y
+=
b
.
y
;
}
inline
__device__
void
operator
+=
(
double3
&
a
,
double3
b
)
{
a
.
x
+=
b
.
x
;
a
.
y
+=
b
.
y
;
a
.
z
+=
b
.
z
;
}
inline
__device__
void
operator
+=
(
double4
&
a
,
double4
b
)
{
a
.
x
+=
b
.
x
;
a
.
y
+=
b
.
y
;
a
.
z
+=
b
.
z
;
a
.
w
+=
b
.
w
;
}
// -= operator
inline
__device__
void
operator
-=
(
int2
&
a
,
int2
b
)
{
a
.
x
-=
b
.
x
;
a
.
y
-=
b
.
y
;
}
inline
__device__
void
operator
-=
(
int3
&
a
,
int3
b
)
{
a
.
x
-=
b
.
x
;
a
.
y
-=
b
.
y
;
a
.
z
-=
b
.
z
;
}
inline
__device__
void
operator
-=
(
int4
&
a
,
int4
b
)
{
a
.
x
-=
b
.
x
;
a
.
y
-=
b
.
y
;
a
.
z
-=
b
.
z
;
a
.
w
-=
b
.
w
;
}
inline
__device__
void
operator
-=
(
float2
&
a
,
float2
b
)
{
a
.
x
-=
b
.
x
;
a
.
y
-=
b
.
y
;
}
inline
__device__
void
operator
-=
(
float3
&
a
,
float3
b
)
{
a
.
x
-=
b
.
x
;
a
.
y
-=
b
.
y
;
a
.
z
-=
b
.
z
;
}
inline
__device__
void
operator
-=
(
float4
&
a
,
float4
b
)
{
a
.
x
-=
b
.
x
;
a
.
y
-=
b
.
y
;
a
.
z
-=
b
.
z
;
a
.
w
-=
b
.
w
;
}
inline
__device__
void
operator
-=
(
double2
&
a
,
double2
b
)
{
a
.
x
-=
b
.
x
;
a
.
y
-=
b
.
y
;
}
inline
__device__
void
operator
-=
(
double3
&
a
,
double3
b
)
{
a
.
x
-=
b
.
x
;
a
.
y
-=
b
.
y
;
a
.
z
-=
b
.
z
;
}
inline
__device__
void
operator
-=
(
double4
&
a
,
double4
b
)
{
a
.
x
-=
b
.
x
;
a
.
y
-=
b
.
y
;
a
.
z
-=
b
.
z
;
a
.
w
-=
b
.
w
;
}
// *= operator
inline
__device__
void
operator
*=
(
int2
&
a
,
int2
b
)
{
a
.
x
*=
b
.
x
;
a
.
y
*=
b
.
y
;
}
inline
__device__
void
operator
*=
(
int3
&
a
,
int3
b
)
{
a
.
x
*=
b
.
x
;
a
.
y
*=
b
.
y
;
a
.
z
*=
b
.
z
;
}
inline
__device__
void
operator
*=
(
int4
&
a
,
int4
b
)
{
a
.
x
*=
b
.
x
;
a
.
y
*=
b
.
y
;
a
.
z
*=
b
.
z
;
a
.
w
*=
b
.
w
;
}
inline
__device__
void
operator
*=
(
float2
&
a
,
float2
b
)
{
a
.
x
*=
b
.
x
;
a
.
y
*=
b
.
y
;
}
inline
__device__
void
operator
*=
(
float3
&
a
,
float3
b
)
{
a
.
x
*=
b
.
x
;
a
.
y
*=
b
.
y
;
a
.
z
*=
b
.
z
;
}
inline
__device__
void
operator
*=
(
float4
&
a
,
float4
b
)
{
a
.
x
*=
b
.
x
;
a
.
y
*=
b
.
y
;
a
.
z
*=
b
.
z
;
a
.
w
*=
b
.
w
;
}
inline
__device__
void
operator
*=
(
double2
&
a
,
double2
b
)
{
a
.
x
*=
b
.
x
;
a
.
y
*=
b
.
y
;
}
inline
__device__
void
operator
*=
(
double3
&
a
,
double3
b
)
{
a
.
x
*=
b
.
x
;
a
.
y
*=
b
.
y
;
a
.
z
*=
b
.
z
;
}
inline
__device__
void
operator
*=
(
double4
&
a
,
double4
b
)
{
a
.
x
*=
b
.
x
;
a
.
y
*=
b
.
y
;
a
.
z
*=
b
.
z
;
a
.
w
*=
b
.
w
;
}
// /= operator
inline
__device__
void
operator
/=
(
int2
&
a
,
int2
b
)
{
a
.
x
/=
b
.
x
;
a
.
y
/=
b
.
y
;
}
inline
__device__
void
operator
/=
(
int3
&
a
,
int3
b
)
{
a
.
x
/=
b
.
x
;
a
.
y
/=
b
.
y
;
a
.
z
/=
b
.
z
;
}
inline
__device__
void
operator
/=
(
int4
&
a
,
int4
b
)
{
a
.
x
/=
b
.
x
;
a
.
y
/=
b
.
y
;
a
.
z
/=
b
.
z
;
a
.
w
/=
b
.
w
;
}
inline
__device__
void
operator
/=
(
float2
&
a
,
float2
b
)
{
a
.
x
/=
b
.
x
;
a
.
y
/=
b
.
y
;
}
inline
__device__
void
operator
/=
(
float3
&
a
,
float3
b
)
{
a
.
x
/=
b
.
x
;
a
.
y
/=
b
.
y
;
a
.
z
/=
b
.
z
;
}
inline
__device__
void
operator
/=
(
float4
&
a
,
float4
b
)
{
a
.
x
/=
b
.
x
;
a
.
y
/=
b
.
y
;
a
.
z
/=
b
.
z
;
a
.
w
/=
b
.
w
;
}
inline
__device__
void
operator
/=
(
double2
&
a
,
double2
b
)
{
a
.
x
/=
b
.
x
;
a
.
y
/=
b
.
y
;
}
inline
__device__
void
operator
/=
(
double3
&
a
,
double3
b
)
{
a
.
x
/=
b
.
x
;
a
.
y
/=
b
.
y
;
a
.
z
/=
b
.
z
;
}
inline
__device__
void
operator
/=
(
double4
&
a
,
double4
b
)
{
a
.
x
/=
b
.
x
;
a
.
y
/=
b
.
y
;
a
.
z
/=
b
.
z
;
a
.
w
/=
b
.
w
;
}
// Multiply a vector by a constant.
inline
__device__
int2
operator
*
(
int2
a
,
int
b
)
{
return
make_int2
(
a
.
x
*
b
,
a
.
y
*
b
);
}
inline
__device__
int3
operator
*
(
int3
a
,
int
b
)
{
return
make_int3
(
a
.
x
*
b
,
a
.
y
*
b
,
a
.
z
*
b
);
}
inline
__device__
int4
operator
*
(
int4
a
,
int
b
)
{
return
make_int4
(
a
.
x
*
b
,
a
.
y
*
b
,
a
.
z
*
b
,
a
.
w
*
b
);
}
inline
__device__
int2
operator
*
(
int
a
,
int2
b
)
{
return
make_int2
(
a
*
b
.
x
,
a
*
b
.
y
);
}
inline
__device__
int3
operator
*
(
int
a
,
int3
b
)
{
return
make_int3
(
a
*
b
.
x
,
a
*
b
.
y
,
a
*
b
.
z
);
}
inline
__device__
int4
operator
*
(
int
a
,
int4
b
)
{
return
make_int4
(
a
*
b
.
x
,
a
*
b
.
y
,
a
*
b
.
z
,
a
*
b
.
w
);
}
inline
__device__
float2
operator
*
(
float2
a
,
float
b
)
{
return
make_float2
(
a
.
x
*
b
,
a
.
y
*
b
);
}
inline
__device__
float3
operator
*
(
float3
a
,
float
b
)
{
return
make_float3
(
a
.
x
*
b
,
a
.
y
*
b
,
a
.
z
*
b
);
}
inline
__device__
float4
operator
*
(
float4
a
,
float
b
)
{
return
make_float4
(
a
.
x
*
b
,
a
.
y
*
b
,
a
.
z
*
b
,
a
.
w
*
b
);
}
inline
__device__
float2
operator
*
(
float
a
,
float2
b
)
{
return
make_float2
(
a
*
b
.
x
,
a
*
b
.
y
);
}
inline
__device__
float3
operator
*
(
float
a
,
float3
b
)
{
return
make_float3
(
a
*
b
.
x
,
a
*
b
.
y
,
a
*
b
.
z
);
}
inline
__device__
float4
operator
*
(
float
a
,
float4
b
)
{
return
make_float4
(
a
*
b
.
x
,
a
*
b
.
y
,
a
*
b
.
z
,
a
*
b
.
w
);
}
inline
__device__
double2
operator
*
(
double2
a
,
double
b
)
{
return
make_double2
(
a
.
x
*
b
,
a
.
y
*
b
);
}
inline
__device__
double3
operator
*
(
double3
a
,
double
b
)
{
return
make_double3
(
a
.
x
*
b
,
a
.
y
*
b
,
a
.
z
*
b
);
}
inline
__device__
double4
operator
*
(
double4
a
,
double
b
)
{
return
make_double4
(
a
.
x
*
b
,
a
.
y
*
b
,
a
.
z
*
b
,
a
.
w
*
b
);
}
inline
__device__
double2
operator
*
(
double
a
,
double2
b
)
{
return
make_double2
(
a
*
b
.
x
,
a
*
b
.
y
);
}
inline
__device__
double3
operator
*
(
double
a
,
double3
b
)
{
return
make_double3
(
a
*
b
.
x
,
a
*
b
.
y
,
a
*
b
.
z
);
}
inline
__device__
double4
operator
*
(
double
a
,
double4
b
)
{
return
make_double4
(
a
*
b
.
x
,
a
*
b
.
y
,
a
*
b
.
z
,
a
*
b
.
w
);
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment