Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
05198df6
"platforms/cuda-old/tests/TestCudaNonbondedForce.cpp" did not exist on "74c3def497df969bbe72638d324ab8464ec00234"
Commit
05198df6
authored
Nov 05, 2012
by
Peter Eastman
Browse files
Created CUDA implementation of RPMD
parent
99bca8c1
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
1302 additions
and
7 deletions
+1302
-7
plugins/rpmd/CMakeLists.txt
plugins/rpmd/CMakeLists.txt
+9
-0
plugins/rpmd/openmmapi/include/openmm/RPMDIntegrator.h
plugins/rpmd/openmmapi/include/openmm/RPMDIntegrator.h
+1
-1
plugins/rpmd/openmmapi/src/RPMDIntegrator.cpp
plugins/rpmd/openmmapi/src/RPMDIntegrator.cpp
+10
-1
plugins/rpmd/platforms/cuda/CMakeLists.txt
plugins/rpmd/platforms/cuda/CMakeLists.txt
+129
-0
plugins/rpmd/platforms/cuda/include/CudaRpmdKernelFactory.h
plugins/rpmd/platforms/cuda/include/CudaRpmdKernelFactory.h
+50
-0
plugins/rpmd/platforms/cuda/src/CudaRpmdKernelFactory.cpp
plugins/rpmd/platforms/cuda/src/CudaRpmdKernelFactory.cpp
+56
-0
plugins/rpmd/platforms/cuda/src/CudaRpmdKernelSources.cpp.in
plugins/rpmd/platforms/cuda/src/CudaRpmdKernelSources.cpp.in
+31
-0
plugins/rpmd/platforms/cuda/src/CudaRpmdKernelSources.h.in
plugins/rpmd/platforms/cuda/src/CudaRpmdKernelSources.h.in
+47
-0
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
+399
-0
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
+99
-0
plugins/rpmd/platforms/cuda/src/kernels/rpmd.cu
plugins/rpmd/platforms/cuda/src/kernels/rpmd.cu
+215
-0
plugins/rpmd/platforms/cuda/tests/CMakeLists.txt
plugins/rpmd/platforms/cuda/tests/CMakeLists.txt
+19
-0
plugins/rpmd/platforms/cuda/tests/TestCudaRpmd.cpp
plugins/rpmd/platforms/cuda/tests/TestCudaRpmd.cpp
+237
-0
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
+0
-3
plugins/rpmd/platforms/opencl/src/kernels/rpmd.cl
plugins/rpmd/platforms/opencl/src/kernels/rpmd.cl
+0
-2
No files found.
plugins/rpmd/CMakeLists.txt
View file @
05198df6
...
...
@@ -170,6 +170,15 @@ IF(OPENMM_BUILD_RPMD_OPENCL_LIB)
ADD_SUBDIRECTORY
(
platforms/opencl
)
ENDIF
(
OPENMM_BUILD_RPMD_OPENCL_LIB
)
IF
(
CUDA_FOUND
)
SET
(
OPENMM_BUILD_RPMD_CUDA_LIB ON CACHE BOOL
"Build RPMD implementation for CUDA"
)
ELSE
(
CUDA_FOUND
)
SET
(
OPENMM_BUILD_RPMD_CUDA_LIB OFF CACHE BOOL
"Build RPMD implementation for CUDA"
)
ENDIF
(
CUDA_FOUND
)
IF
(
OPENMM_BUILD_RPMD_CUDA_LIB
)
ADD_SUBDIRECTORY
(
platforms/cuda
)
ENDIF
(
OPENMM_BUILD_RPMD_CUDA_LIB
)
INSTALL_TARGETS
(
/lib RUNTIME_DIRECTORY /lib
${
SHARED_RPMD_TARGET
}
)
IF
(
CREATE_SERIALIZABLE_OPENMM_RPMD
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_RPMD_SERIALIZABLE_TARGET
}
)
...
...
plugins/rpmd/openmmapi/include/openmm/RPMDIntegrator.h
View file @
05198df6
...
...
@@ -182,7 +182,7 @@ protected:
private:
double
temperature
,
friction
;
int
numCopies
,
randomNumberSeed
;
bool
forcesAreValid
,
hasSetPosition
,
hasSetVelocity
;
bool
forcesAreValid
,
hasSetPosition
,
hasSetVelocity
,
isFirstStep
;
ContextImpl
*
context
;
Context
*
owner
;
Kernel
kernel
;
...
...
plugins/rpmd/openmmapi/src/RPMDIntegrator.cpp
View file @
05198df6
...
...
@@ -42,7 +42,7 @@ using std::string;
using
std
::
vector
;
RPMDIntegrator
::
RPMDIntegrator
(
int
numCopies
,
double
temperature
,
double
frictionCoeff
,
double
stepSize
)
:
owner
(
NULL
),
numCopies
(
numCopies
),
forcesAreValid
(
false
),
hasSetPosition
(
false
),
hasSetVelocity
(
false
)
{
owner
(
NULL
),
numCopies
(
numCopies
),
forcesAreValid
(
false
),
hasSetPosition
(
false
),
hasSetVelocity
(
false
)
,
isFirstStep
(
true
)
{
setTemperature
(
temperature
);
setFriction
(
frictionCoeff
);
setStepSize
(
stepSize
);
...
...
@@ -109,6 +109,15 @@ void RPMDIntegrator::step(int steps) {
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
setVelocities
(
i
,
s
.
getVelocities
());
}
if
(
isFirstStep
)
{
// Call setPositions() on the Context so it doesn't think the user is trying to
// run a simulation without setting positions first. These positions will
// immediately get overwritten by the ones stored in this integrator.
vector
<
Vec3
>
p
(
context
->
getSystem
().
getNumParticles
(),
Vec3
());
context
->
getOwner
().
setPositions
(
p
);
isFirstStep
=
false
;
}
for
(
int
i
=
0
;
i
<
steps
;
++
i
)
{
kernel
.
getAs
<
IntegrateRPMDStepKernel
>
().
execute
(
*
context
,
*
this
,
forcesAreValid
);
forcesAreValid
=
true
;
...
...
plugins/rpmd/platforms/cuda/CMakeLists.txt
0 → 100644
View file @
05198df6
#---------------------------------------------------
# OpenMM CUDA RPMD Integrator
#
# Creates OpenMM library, base name=OpenMMRPMDCUDA.
# Default libraries are shared & optimized. Variants
# are created for debug (_d).
#
# Windows:
# OpenMMRPMDCUDA[_d].dll
# OpenMMRPMDCUDA[_d].lib
# Unix:
# libOpenMMRPMDCUDA[_d].so
#----------------------------------------------------
IF
(
APPLE
)
SET
(
CMAKE_OSX_DEPLOYMENT_TARGET
"10.6"
)
SET
(
CMAKE_OSX_SYSROOT
"/Developer/SDKs/MacOSX10.6.sdk"
)
ENDIF
(
APPLE
)
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET
(
OPENMM_SOURCE_SUBDIRS .
)
# Collect up information about the version of the OpenMM library we're building
# and make it available to the code so it can be built into the binaries.
SET
(
OPENMMRPMDCUDA_LIBRARY_NAME OpenMMRPMDCUDA
)
SET
(
SHARED_TARGET
${
OPENMMRPMDCUDA_LIBRARY_NAME
}
)
# Ensure that debug libraries have "_d" appended to their names.
# CMake gets this right on Windows automatically with this definition.
IF
(
${
CMAKE_GENERATOR
}
MATCHES
"Visual Studio"
)
SET
(
CMAKE_DEBUG_POSTFIX
"_d"
CACHE INTERNAL
""
FORCE
)
ENDIF
(
${
CMAKE_GENERATOR
}
MATCHES
"Visual Studio"
)
# But on Unix or Cygwin we have to add the suffix manually
IF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
SET
(
SHARED_TARGET
${
SHARED_TARGET
}
_d
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
# These are all the places to search for header files which are
# to be part of the API.
SET
(
API_INCLUDE_DIRS
)
# start empty
FOREACH
(
subdir
${
OPENMM_SOURCE_SUBDIRS
}
)
# append
SET
(
API_INCLUDE_DIRS
${
API_INCLUDE_DIRS
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/include
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/include/internal
)
ENDFOREACH
(
subdir
)
# We'll need both *relative* path names, starting with their API_INCLUDE_DIRS,
# and absolute pathnames.
SET
(
API_REL_INCLUDE_FILES
)
# start these out empty
SET
(
API_ABS_INCLUDE_FILES
)
FOREACH
(
dir
${
API_INCLUDE_DIRS
}
)
FILE
(
GLOB fullpaths
${
dir
}
/*.h
)
# returns full pathnames
SET
(
API_ABS_INCLUDE_FILES
${
API_ABS_INCLUDE_FILES
}
${
fullpaths
}
)
FOREACH
(
pathname
${
fullpaths
}
)
GET_FILENAME_COMPONENT
(
filename
${
pathname
}
NAME
)
SET
(
API_REL_INCLUDE_FILES
${
API_REL_INCLUDE_FILES
}
${
dir
}
/
${
filename
}
)
ENDFOREACH
(
pathname
)
ENDFOREACH
(
dir
)
# collect up source files
SET
(
SOURCE_FILES
)
# empty
SET
(
SOURCE_INCLUDE_FILES
)
FOREACH
(
subdir
${
OPENMM_SOURCE_SUBDIRS
}
)
FILE
(
GLOB_RECURSE src_files
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/src/*.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/src/*.c
)
FILE
(
GLOB incl_files
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/src/*.h
)
SET
(
SOURCE_FILES
${
SOURCE_FILES
}
${
src_files
}
)
#append
SET
(
SOURCE_INCLUDE_FILES
${
SOURCE_INCLUDE_FILES
}
${
incl_files
}
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/include
)
ENDFOREACH
(
subdir
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_SOURCE_DIR
}
/platforms/cuda/include
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_SOURCE_DIR
}
/platforms/cuda/src
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_BINARY_DIR
}
/platforms/cuda/src
)
# Set variables needed for encoding kernel sources into a C++ class
SET
(
CUDA_SOURCE_DIR
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
)
SET
(
CUDA_SOURCE_CLASS CudaRpmdKernelSources
)
SET
(
CUDA_KERNELS_CPP
${
CMAKE_CURRENT_BINARY_DIR
}
/src/
${
CUDA_SOURCE_CLASS
}
.cpp
)
SET
(
CUDA_KERNELS_H
${
CMAKE_CURRENT_BINARY_DIR
}
/src/
${
CUDA_SOURCE_CLASS
}
.h
)
SET
(
SOURCE_FILES
${
SOURCE_FILES
}
${
CUDA_KERNELS_CPP
}
${
CUDA_KERNELS_H
}
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_BINARY_DIR
}
/src
)
# Create the library
INCLUDE_DIRECTORIES
(
${
CUDA_TOOLKIT_INCLUDE
}
)
FILE
(
GLOB CUDA_KERNELS
${
CUDA_SOURCE_DIR
}
/kernels/*.cu
)
ADD_CUSTOM_COMMAND
(
OUTPUT
${
CUDA_KERNELS_CPP
}
${
CUDA_KERNELS_H
}
COMMAND
${
CMAKE_COMMAND
}
ARGS -D CUDA_SOURCE_DIR=
${
CUDA_SOURCE_DIR
}
-D CUDA_KERNELS_CPP=
${
CUDA_KERNELS_CPP
}
-D CUDA_KERNELS_H=
${
CUDA_KERNELS_H
}
-D CUDA_SOURCE_CLASS=
${
CUDA_SOURCE_CLASS
}
-P
${
CMAKE_SOURCE_DIR
}
/platforms/cuda/EncodeCUDAFiles.cmake
DEPENDS
${
CUDA_KERNELS
}
)
SET_SOURCE_FILES_PROPERTIES
(
${
CUDA_KERNELS_CPP
}
${
CUDA_KERNELS_H
}
PROPERTIES GENERATED TRUE
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
IF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
_d
)
ELSE
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
MAIN_OPENMM_LIB
}
${
CUDA_LIBRARIES
}
${
PTHREADS_LIB
}
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
debug
${
OPENMM_LIBRARY_NAME
}
CUDA_d optimized
${
OPENMM_LIBRARY_NAME
}
CUDA
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
debug
${
SHARED_RPMD_TARGET
}
optimized
${
SHARED_RPMD_TARGET
}
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES COMPILE_FLAGS
"-DOPENMM_BUILDING_SHARED_LIBRARY"
)
INSTALL
(
TARGETS
${
SHARED_TARGET
}
DESTINATION
${
CMAKE_INSTALL_PREFIX
}
/lib/plugins
)
# Ensure that links to the main CUDA library will be resolved.
IF
(
APPLE
)
IF
(
CMAKE_BUILD_TYPE MATCHES Debug
)
SET
(
CUDA_LIBRARY libOpenMMCUDA_d.dylib
)
ELSE
(
CMAKE_BUILD_TYPE MATCHES Debug
)
SET
(
CUDA_LIBRARY libOpenMMCUDA.dylib
)
ENDIF
(
CMAKE_BUILD_TYPE MATCHES Debug
)
INSTALL
(
CODE
"EXECUTE_PROCESS(COMMAND install_name_tool -change
${
CUDA_LIBRARY
}
@loader_path/
${
CUDA_LIBRARY
}
${
CMAKE_INSTALL_PREFIX
}
/lib/plugins/lib
${
SHARED_TARGET
}
.dylib)"
)
ENDIF
(
APPLE
)
SUBDIRS
(
tests
)
plugins/rpmd/platforms/cuda/include/CudaRpmdKernelFactory.h
0 → 100644
View file @
05198df6
#ifndef OPENMM_CUDARPMDKERNELFACTORY_H_
#define OPENMM_CUDARPMDKERNELFACTORY_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/KernelFactory.h"
namespace
OpenMM
{
/**
* This KernelFactory creates kernels for the CUDA implementation of RPMDIntegrator.
*/
class
CudaRpmdKernelFactory
:
public
KernelFactory
{
public:
KernelImpl
*
createKernelImpl
(
std
::
string
name
,
const
Platform
&
platform
,
ContextImpl
&
context
)
const
;
};
}
// namespace OpenMM
#endif
/*OPENMM_CUDARPMDKERNELFACTORY_H_*/
plugins/rpmd/platforms/cuda/src/CudaRpmdKernelFactory.cpp
0 → 100644
View file @
05198df6
/* -------------------------------------------------------------------------- *
* OpenMMAmoeba *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <exception>
#include "CudaRpmdKernelFactory.h"
#include "CudaRpmdKernels.h"
#include "openmm/internal/windowsExport.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
using
namespace
OpenMM
;
extern
"C"
void
registerPlatforms
()
{
}
extern
"C"
void
registerKernelFactories
()
{
try
{
Platform
&
platform
=
Platform
::
getPlatformByName
(
"CUDA"
);
CudaRpmdKernelFactory
*
factory
=
new
CudaRpmdKernelFactory
();
platform
.
registerKernelFactory
(
IntegrateRPMDStepKernel
::
Name
(),
factory
);
}
catch
(
std
::
exception
ex
)
{
// Ignore
}
}
KernelImpl
*
CudaRpmdKernelFactory
::
createKernelImpl
(
std
::
string
name
,
const
Platform
&
platform
,
ContextImpl
&
context
)
const
{
CudaContext
&
cl
=
*
static_cast
<
CudaPlatform
::
PlatformData
*>
(
context
.
getPlatformData
())
->
contexts
[
0
];
if
(
name
==
IntegrateRPMDStepKernel
::
Name
())
return
new
CudaIntegrateRPMDStepKernel
(
name
,
platform
,
cl
);
throw
OpenMMException
((
std
::
string
(
"Tried to create kernel with illegal kernel name '"
)
+
name
+
"'"
).
c_str
());
}
plugins/rpmd/platforms/cuda/src/CudaRpmdKernelSources.cpp.in
0 → 100644
View file @
05198df6
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaRpmdKernelSources.h"
using namespace OpenMM;
using namespace std;
plugins/rpmd/platforms/cuda/src/CudaRpmdKernelSources.h.in
0 → 100644
View file @
05198df6
#ifndef OPENMM_CUDARPMDKERNELSOURCES_H_
#define OPENMM_CUDARPMDKERNELSOURCES_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <string>
namespace OpenMM {
/**
* This class is a central holding place for the source code of CUDA kernels.
* The CMake build script inserts declarations into it based on the .cu files in the
* kernels subfolder.
*/
class CudaRpmdKernelSources {
public:
@CUDA_FILE_DECLARATIONS@
};
} // namespace OpenMM
#endif /*OPENMM_CUDARPMDKERNELSOURCES_H_*/
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
0 → 100644
View file @
05198df6
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaRpmdKernels.h"
#include "CudaRpmdKernelSources.h"
#include "openmm/internal/ContextImpl.h"
#include "CudaIntegrationUtilities.h"
#include "CudaExpressionUtilities.h"
#include "CudaKernelSources.h"
#include "CudaNonbondedUtilities.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
using
namespace
OpenMM
;
using
namespace
std
;
/**
* Select a size for an FFT that is a multiple of 2, 3, 5, and 7.
*/
static
int
findFFTDimension
(
int
minimum
)
{
if
(
minimum
<
1
)
return
1
;
while
(
true
)
{
// Attempt to factor the current value.
int
unfactored
=
minimum
;
for
(
int
factor
=
2
;
factor
<
8
;
factor
++
)
{
while
(
unfactored
>
1
&&
unfactored
%
factor
==
0
)
unfactored
/=
factor
;
}
if
(
unfactored
==
1
)
return
minimum
;
minimum
++
;
}
}
CudaIntegrateRPMDStepKernel
::~
CudaIntegrateRPMDStepKernel
()
{
if
(
forces
!=
NULL
)
delete
forces
;
if
(
positions
!=
NULL
)
delete
positions
;
if
(
velocities
!=
NULL
)
delete
velocities
;
}
void
CudaIntegrateRPMDStepKernel
::
initialize
(
const
System
&
system
,
const
RPMDIntegrator
&
integrator
)
{
cu
.
getPlatformData
().
initializeContexts
(
system
);
numCopies
=
integrator
.
getNumCopies
();
numParticles
=
system
.
getNumParticles
();
workgroupSize
=
numCopies
;
while
(
workgroupSize
<=
128
-
numCopies
)
workgroupSize
+=
numCopies
;
if
(
numCopies
!=
findFFTDimension
(
numCopies
))
throw
OpenMMException
(
"RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5."
);
int
paddedParticles
=
cu
.
getPaddedNumAtoms
();
forces
=
CudaArray
::
create
<
long
long
>
(
cu
,
numCopies
*
paddedParticles
*
3
,
"rpmdForces"
);
positions
=
CudaArray
::
create
<
float4
>
(
cu
,
numCopies
*
paddedParticles
,
"rpmdPositions"
);
velocities
=
CudaArray
::
create
<
float4
>
(
cu
,
numCopies
*
paddedParticles
,
"rpmdVelocities"
);
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
((
unsigned
int
)
integrator
.
getRandomNumberSeed
());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans.
vector
<
float4
>
temp
(
positions
->
getSize
());
for
(
int
i
=
0
;
i
<
positions
->
getSize
();
i
++
)
temp
[
i
]
=
make_float4
(
0
,
0
,
0
,
0
);
positions
->
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
->
getSize
();
i
++
)
temp
[
i
]
=
make_float4
(
0
,
0
,
0
,
1
);
velocities
->
upload
(
temp
);
// Create kernels.
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"NUM_COPIES"
]
=
cu
.
intToString
(
numCopies
);
defines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
workgroupSize
);
defines
[
"HBAR"
]
=
cu
.
doubleToString
(
1.054571628e-34
*
AVOGADRO
/
(
1000
*
1e-12
));
defines
[
"SCALE"
]
=
cu
.
doubleToString
(
1.0
/
sqrt
((
double
)
numCopies
));
defines
[
"M_PI"
]
=
cu
.
doubleToString
(
M_PI
);
map
<
string
,
string
>
replacements
;
replacements
[
"FFT_Q_FORWARD"
]
=
createFFT
(
numCopies
,
"q"
,
true
);
replacements
[
"FFT_Q_BACKWARD"
]
=
createFFT
(
numCopies
,
"q"
,
false
);
replacements
[
"FFT_V_FORWARD"
]
=
createFFT
(
numCopies
,
"v"
,
true
);
replacements
[
"FFT_V_BACKWARD"
]
=
createFFT
(
numCopies
,
"v"
,
false
);
CUmodule
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
vectorOps
+
CudaRpmdKernelSources
::
rpmd
,
replacements
),
defines
,
""
);
pileKernel
=
cu
.
getKernel
(
module
,
"applyPileThermostat"
);
stepKernel
=
cu
.
getKernel
(
module
,
"integrateStep"
);
velocitiesKernel
=
cu
.
getKernel
(
module
,
"advanceVelocities"
);
copyToContextKernel
=
cu
.
getKernel
(
module
,
"copyToContext"
);
copyFromContextKernel
=
cu
.
getKernel
(
module
,
"copyFromContext"
);
translateKernel
=
cu
.
getKernel
(
module
,
"applyCellTranslations"
);
}
void
CudaIntegrateRPMDStepKernel
::
execute
(
ContextImpl
&
context
,
const
RPMDIntegrator
&
integrator
,
bool
forcesAreValid
)
{
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
// Loop over copies and compute the force on each one.
if
(
!
forcesAreValid
)
computeForces
(
context
);
// Apply the PILE-L thermostat.
double
dt
=
integrator
.
getStepSize
();
float
dtFloat
=
(
float
)
dt
;
double
kT
=
integrator
.
getTemperature
()
*
BOLTZ
;
float
kTFloat
=
(
float
)
kT
;
double
friction
=
integrator
.
getFriction
();
float
frictionFloat
=
(
float
)
friction
;
int
randomIndex
=
integration
.
prepareRandomNumbers
(
numParticles
*
numCopies
);
void
*
pileArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
,
&
dtFloat
,
&
kTFloat
,
&
frictionFloat
};
cu
.
executeKernel
(
pileKernel
,
pileArgs
,
numParticles
*
numCopies
,
workgroupSize
);
// Update positions and velocities.
void
*
stepArgs
[]
=
{
&
positions
->
getDevicePointer
(),
&
velocities
->
getDevicePointer
(),
&
forces
->
getDevicePointer
(),
&
dtFloat
,
&
kTFloat
};
cu
.
executeKernel
(
stepKernel
,
stepArgs
,
numParticles
*
numCopies
,
workgroupSize
);
// Calculate forces based on the updated positions.
computeForces
(
context
);
// Update velocities.
void
*
velocitiesArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
forces
->
getDevicePointer
(),
&
dtFloat
};
cu
.
executeKernel
(
velocitiesKernel
,
velocitiesArgs
,
numParticles
*
numCopies
,
workgroupSize
);
// Apply the PILE-L thermostat again.
randomIndex
=
integration
.
prepareRandomNumbers
(
numParticles
*
numCopies
);
cu
.
executeKernel
(
pileKernel
,
pileArgs
,
numParticles
*
numCopies
,
workgroupSize
);
// Update the time and step count.
cu
.
setTime
(
cu
.
getTime
()
+
dt
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
}
void
CudaIntegrateRPMDStepKernel
::
computeForces
(
ContextImpl
&
context
)
{
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
{
void
*
copyToContextArgs
[]
=
{
&
positions
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyToContextKernel
,
copyToContextArgs
,
cu
.
getNumAtoms
());
context
.
calcForcesAndEnergy
(
true
,
false
);
void
*
copyFromContextArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
forces
->
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyFromContextKernel
,
copyFromContextArgs
,
cu
.
getNumAtoms
());
if
(
cu
.
getAtomsWereReordered
()
&&
cu
.
getNonbondedUtilities
().
getUsePeriodic
())
{
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
void
*
args
[]
=
{
&
positions
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
translateKernel
,
args
,
cu
.
getNumAtoms
());
}
}
}
double
CudaIntegrateRPMDStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
RPMDIntegrator
&
integrator
)
{
return
cu
.
getIntegrationUtilities
().
computeKineticEnergy
(
0
);
}
void
CudaIntegrateRPMDStepKernel
::
setPositions
(
int
copy
,
const
vector
<
Vec3
>&
pos
)
{
if
(
positions
==
NULL
)
throw
OpenMMException
(
"RPMDIntegrator: Cannot set positions before the integrator is added to a Context"
);
if
(
pos
.
size
()
!=
numParticles
)
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setPositions()"
);
vector
<
float4
>
posq
(
cu
.
getPaddedNumAtoms
());
cu
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
make_float4
((
float
)
pos
[
i
][
0
],
(
float
)
pos
[
i
][
1
],
(
float
)
pos
[
i
][
2
],
posq
[
i
].
w
);
CUresult
result
=
cuMemcpyHtoD
(
positions
->
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
float4
),
&
posq
[
0
],
numParticles
*
sizeof
(
float4
));
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error uploading array "
<<
positions
->
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
}
}
void
CudaIntegrateRPMDStepKernel
::
setVelocities
(
int
copy
,
const
vector
<
Vec3
>&
vel
)
{
if
(
velocities
==
NULL
)
throw
OpenMMException
(
"RPMDIntegrator: Cannot set velocities before the integrator is added to a Context"
);
if
(
vel
.
size
()
!=
numParticles
)
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setVelocities()"
);
vector
<
float4
>
velm
(
cu
.
getPaddedNumAtoms
());
cu
.
getVelm
().
download
(
velm
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
velm
[
i
]
=
make_float4
((
float
)
vel
[
i
][
0
],
(
float
)
vel
[
i
][
1
],
(
float
)
vel
[
i
][
2
],
velm
[
i
].
w
);
CUresult
result
=
cuMemcpyHtoD
(
velocities
->
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
float4
),
&
velm
[
0
],
numParticles
*
sizeof
(
float4
));
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error uploading array "
<<
velocities
->
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
}
}
void
CudaIntegrateRPMDStepKernel
::
copyToContext
(
int
copy
,
ContextImpl
&
context
)
{
void
*
copyPositionsArgs
[]
=
{
&
positions
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
copy
};
cu
.
executeKernel
(
copyToContextKernel
,
copyPositionsArgs
,
cu
.
getNumAtoms
());
void
*
copyVelocitiesArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
copy
};
cu
.
executeKernel
(
copyToContextKernel
,
copyVelocitiesArgs
,
cu
.
getNumAtoms
());
}
string
CudaIntegrateRPMDStepKernel
::
createFFT
(
int
size
,
const
string
&
variable
,
bool
forward
)
{
stringstream
source
;
int
unfactored
=
size
;
int
stage
=
0
;
int
L
=
size
;
int
m
=
1
;
string
sign
=
(
forward
?
"1.0f"
:
"-1.0f"
);
string
multReal
=
(
forward
?
"multiplyComplexRealPart"
:
"multiplyComplexRealPartConj"
);
string
multImag
=
(
forward
?
"multiplyComplexImagPart"
:
"multiplyComplexImagPartConj"
);
source
<<
"{
\n
"
;
source
<<
"float3* real0 = "
<<
variable
<<
"real;
\n
"
;
source
<<
"float3* imag0 = "
<<
variable
<<
"imag;
\n
"
;
source
<<
"float3* real1 = &temp[blockStart];
\n
"
;
source
<<
"float3* imag1 = &temp[blockStart+blockDim.x];
\n
"
;
// Factor size, generating an appropriate block of code for each factor.
while
(
unfactored
>
1
)
{
int
input
=
stage
%
2
;
int
output
=
1
-
input
;
source
<<
"{
\n
"
;
if
(
unfactored
%
5
==
0
)
{
L
=
L
/
5
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 5)
\n
"
;
source
<<
"if (indexInBlock < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int i = indexInBlock;
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"float3 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"float3 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"float3 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c1i = imag"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c2r = real"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c2i = imag"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c3r = real"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c3i = imag"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c4r = real"
<<
input
<<
"[i+"
<<
(
4
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c4i = imag"
<<
input
<<
"[i+"
<<
(
4
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 d0r = c1r+c4r;
\n
"
;
source
<<
"float3 d0i = c1i+c4i;
\n
"
;
source
<<
"float3 d1r = c2r+c3r;
\n
"
;
source
<<
"float3 d1i = c2i+c3i;
\n
"
;
source
<<
"float3 d2r = "
<<
cu
.
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c1r-c4r);
\n
"
;
source
<<
"float3 d2i = "
<<
cu
.
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c1i-c4i);
\n
"
;
source
<<
"float3 d3r = "
<<
cu
.
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c2r-c3r);
\n
"
;
source
<<
"float3 d3i = "
<<
cu
.
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c2i-c3i);
\n
"
;
source
<<
"float3 d4r = d0r+d1r;
\n
"
;
source
<<
"float3 d4i = d0i+d1i;
\n
"
;
source
<<
"float3 d5r = "
<<
cu
.
doubleToString
(
0.25
*
sqrt
(
5.0
))
<<
"*(d0r-d1r);
\n
"
;
source
<<
"float3 d5i = "
<<
cu
.
doubleToString
(
0.25
*
sqrt
(
5.0
))
<<
"*(d0i-d1i);
\n
"
;
source
<<
"float3 d6r = c0r-0.25f*d4r;
\n
"
;
source
<<
"float3 d6i = c0i-0.25f*d4i;
\n
"
;
source
<<
"float3 d7r = d6r+d5r;
\n
"
;
source
<<
"float3 d7i = d6i+d5i;
\n
"
;
source
<<
"float3 d8r = d6r-d5r;
\n
"
;
source
<<
"float3 d8i = d6i-d5i;
\n
"
;
string
coeff
=
cu
.
doubleToString
(
sin
(
0.2
*
M_PI
)
/
sin
(
0.4
*
M_PI
));
source
<<
"float3 d9r = "
<<
sign
<<
"*(d2i+"
<<
coeff
<<
"*d3i);
\n
"
;
source
<<
"float3 d9i = "
<<
sign
<<
"*(-d2r-"
<<
coeff
<<
"*d3r);
\n
"
;
source
<<
"float3 d10r = "
<<
sign
<<
"*("
<<
coeff
<<
"*d2i-d3i);
\n
"
;
source
<<
"float3 d10i = "
<<
sign
<<
"*(d3r-"
<<
coeff
<<
"*d2r);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+4*j*"
<<
m
<<
"] = c0r+d4r;
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+4*j*"
<<
m
<<
"] = c0i+d4i;
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(4*j+1)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
size
<<
"/"
<<
(
5
*
L
)
<<
"], d7r+d9r, d7i+d9i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(4*j+1)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
size
<<
"/"
<<
(
5
*
L
)
<<
"], d7r+d9r, d7i+d9i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(4*j+2)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8r+d10r, d8i+d10i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(4*j+2)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8r+d10r, d8i+d10i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(4*j+3)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8r-d10r, d8i-d10i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(4*j+3)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8r-d10r, d8i-d10i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(4*j+4)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
4
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d7r-d9r, d7i-d9i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(4*j+4)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
4
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d7r-d9r, d7i-d9i);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
5
;
unfactored
/=
5
;
}
else
if
(
unfactored
%
4
==
0
)
{
L
=
L
/
4
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 4)
\n
"
;
source
<<
"if (indexInBlock < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int i = indexInBlock;
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"float3 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"float3 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"float3 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c1i = imag"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c2r = real"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c2i = imag"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c3r = real"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c3i = imag"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 d0r = c0r+c2r;
\n
"
;
source
<<
"float3 d0i = c0i+c2i;
\n
"
;
source
<<
"float3 d1r = c0r-c2r;
\n
"
;
source
<<
"float3 d1i = c0i-c2i;
\n
"
;
source
<<
"float3 d2r = c1r+c3r;
\n
"
;
source
<<
"float3 d2i = c1i+c3i;
\n
"
;
source
<<
"float3 d3r = "
<<
sign
<<
"*(c1i-c3i);
\n
"
;
source
<<
"float3 d3i = "
<<
sign
<<
"*(c3r-c1r);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+3*j*"
<<
m
<<
"] = d0r+d2r;
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+3*j*"
<<
m
<<
"] = d0i+d2i;
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(3*j+1)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
size
<<
"/"
<<
(
4
*
L
)
<<
"], d1r+d3r, d1i+d3i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(3*j+1)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
size
<<
"/"
<<
(
4
*
L
)
<<
"], d1r+d3r, d1i+d3i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(3*j+2)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d0r-d2r, d0i-d2i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(3*j+2)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d0r-d2r, d0i-d2i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(3*j+3)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d1r-d3r, d1i-d3i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(3*j+3)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d1r-d3r, d1i-d3i);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
4
;
unfactored
/=
4
;
}
else
if
(
unfactored
%
3
==
0
)
{
L
=
L
/
3
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 3)
\n
"
;
source
<<
"if (indexInBlock < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int i = indexInBlock;
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"float3 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"float3 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"float3 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c1i = imag"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c2r = real"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c2i = imag"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 d0r = c1r+c2r;
\n
"
;
source
<<
"float3 d0i = c1i+c2i;
\n
"
;
source
<<
"float3 d1r = c0r-0.5f*d0r;
\n
"
;
source
<<
"float3 d1i = c0i-0.5f*d0i;
\n
"
;
source
<<
"float3 d2r = "
<<
sign
<<
"*"
<<
cu
.
doubleToString
(
sin
(
M_PI
/
3.0
))
<<
"*(c1i-c2i);
\n
"
;
source
<<
"float3 d2i = "
<<
sign
<<
"*"
<<
cu
.
doubleToString
(
sin
(
M_PI
/
3.0
))
<<
"*(c2r-c1r);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+2*j*"
<<
m
<<
"] = c0r+d0r;
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+2*j*"
<<
m
<<
"] = c0i+d0i;
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(2*j+1)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
size
<<
"/"
<<
(
3
*
L
)
<<
"], d1r+d2r, d1i+d2i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(2*j+1)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
size
<<
"/"
<<
(
3
*
L
)
<<
"], d1r+d2r, d1i+d2i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1r-d2r, d1i-d2i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1r-d2r, d1i-d2i);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
3
;
unfactored
/=
3
;
}
else
if
(
unfactored
%
2
==
0
)
{
L
=
L
/
2
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 2)
\n
"
;
source
<<
"if (indexInBlock < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int i = indexInBlock;
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"float3 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"float3 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"float3 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"float3 c1i = imag"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"real"
<<
output
<<
"[i+j*"
<<
m
<<
"] = c0r+c1r;
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+j*"
<<
m
<<
"] = c0i+c1i;
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
size
<<
"/"
<<
(
2
*
L
)
<<
"], c0r-c1r, c0i-c1i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
size
<<
"/"
<<
(
2
*
L
)
<<
"], c0r-c1r, c0i-c1i);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
2
;
unfactored
/=
2
;
}
else
throw
OpenMMException
(
"Illegal size for FFT: "
+
cu
.
intToString
(
size
));
source
<<
"__syncthreads();
\n
"
;
source
<<
"}
\n
"
;
++
stage
;
}
// Create the kernel.
if
(
stage
%
2
==
1
)
{
source
<<
"real0[indexInBlock] = real1[indexInBlock];
\n
"
;
source
<<
"imag0[indexInBlock] = imag1[indexInBlock];
\n
"
;
}
source
<<
"}
\n
"
;
return
source
.
str
();
}
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
0 → 100644
View file @
05198df6
#ifndef CUDA_RPMD_KERNELS_H_
#define CUDA_RPMD_KERNELS_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/RpmdKernels.h"
#include "CudaContext.h"
#include "CudaArray.h"
namespace
OpenMM
{
/**
* This kernel is invoked by RPMDIntegrator to take one time step, and to get and
* set the state of system copies.
*/
class
CudaIntegrateRPMDStepKernel
:
public
IntegrateRPMDStepKernel
{
public:
CudaIntegrateRPMDStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
IntegrateRPMDStepKernel
(
name
,
platform
),
cu
(
cu
),
forces
(
NULL
),
positions
(
NULL
),
velocities
(
NULL
)
{
}
~
CudaIntegrateRPMDStepKernel
();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the RPMDIntegrator this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
RPMDIntegrator
&
integrator
);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the RPMDIntegrator this kernel is being used for
* @param forcesAreValid if the context has been modified since the last time step, this will be
* false to show that cached forces are invalid and must be recalculated
*/
void
execute
(
ContextImpl
&
context
,
const
RPMDIntegrator
&
integrator
,
bool
forcesAreValid
);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the RPMDIntegrator this kernel is being used for
*/
double
computeKineticEnergy
(
ContextImpl
&
context
,
const
RPMDIntegrator
&
integrator
);
/**
* Get the positions of all particles in one copy of the system.
*/
void
setPositions
(
int
copy
,
const
std
::
vector
<
Vec3
>&
positions
);
/**
* Get the velocities of all particles in one copy of the system.
*/
void
setVelocities
(
int
copy
,
const
std
::
vector
<
Vec3
>&
velocities
);
/**
* Copy positions and velocities for one copy into the context.
*/
void
copyToContext
(
int
copy
,
ContextImpl
&
context
);
private:
void
computeForces
(
ContextImpl
&
context
);
std
::
string
createFFT
(
int
size
,
const
std
::
string
&
variable
,
bool
forward
);
CudaContext
&
cu
;
int
numCopies
,
numParticles
,
workgroupSize
;
CudaArray
*
forces
;
CudaArray
*
positions
;
CudaArray
*
velocities
;
CUfunction
pileKernel
,
stepKernel
,
velocitiesKernel
,
copyToContextKernel
,
copyFromContextKernel
,
translateKernel
;
};
}
// namespace OpenMM
#endif
/*CUDA_RPMD_KERNELS_H_*/
plugins/rpmd/platforms/cuda/src/kernels/rpmd.cu
0 → 100644
View file @
05198df6
__device__
float3
multiplyComplexRealPart
(
float2
c1
,
float3
c2r
,
float3
c2i
)
{
return
c1
.
x
*
c2r
-
c1
.
y
*
c2i
;
}
__device__
float3
multiplyComplexImagPart
(
float2
c1
,
float3
c2r
,
float3
c2i
)
{
return
c1
.
x
*
c2i
+
c1
.
y
*
c2r
;
}
__device__
float3
multiplyComplexRealPartConj
(
float2
c1
,
float3
c2r
,
float3
c2i
)
{
return
c1
.
x
*
c2r
+
c1
.
y
*
c2i
;
}
__device__
float3
multiplyComplexImagPartConj
(
float2
c1
,
float3
c2r
,
float3
c2i
)
{
return
c1
.
x
*
c2i
-
c1
.
y
*
c2r
;
}
/**
* Apply the PILE-L thermostat.
*/
extern
"C"
__global__
void
applyPileThermostat
(
float4
*
velm
,
float4
*
random
,
unsigned
int
randomIndex
,
float
dt
,
float
kT
,
float
friction
)
{
const
int
numBlocks
=
blockDim
.
x
*
gridDim
.
x
/
NUM_COPIES
;
const
int
blockStart
=
NUM_COPIES
*
(
threadIdx
.
x
/
NUM_COPIES
);
const
int
indexInBlock
=
threadIdx
.
x
-
blockStart
;
const
float
nkT
=
NUM_COPIES
*
kT
;
const
float
twown
=
2.0
f
*
nkT
/
HBAR
;
const
float
c1_0
=
EXP
(
-
0.5
f
*
dt
*
friction
);
const
float
c2_0
=
SQRT
(
1.0
f
-
c1_0
*
c1_0
);
__shared__
float3
v
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
float3
temp
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
float2
w
[
NUM_COPIES
];
float3
*
vreal
=
&
v
[
blockStart
];
float3
*
vimag
=
&
v
[
blockStart
+
blockDim
.
x
];
if
(
threadIdx
.
x
<
NUM_COPIES
)
w
[
indexInBlock
]
=
make_float2
(
cos
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
),
sin
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
));
__syncthreads
();
randomIndex
+=
NUM_COPIES
*
((
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
);
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
float4
particleVelm
=
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
float
invMass
=
particleVelm
.
w
;
float
c3_0
=
c2_0
*
SQRT
(
nkT
*
invMass
);
// Forward FFT.
vreal
[
indexInBlock
]
=
SCALE
*
make_float3
(
particleVelm
.
x
,
particleVelm
.
y
,
particleVelm
.
z
);
vimag
[
indexInBlock
]
=
make_float3
(
0
);
__syncthreads
();
FFT_V_FORWARD
// Apply the thermostat.
if
(
indexInBlock
==
0
)
{
// Apply a local Langevin thermostat to the centroid mode.
float4
rand
=
random
[
randomIndex
];
vreal
[
0
]
=
vreal
[
0
]
*
c1_0
+
c3_0
*
make_float3
(
rand
.
x
,
rand
.
y
,
rand
.
z
);
}
else
{
// Use critical damping white noise for the remaining modes.
int
k
=
(
indexInBlock
<=
NUM_COPIES
/
2
?
indexInBlock
:
NUM_COPIES
-
indexInBlock
);
const
bool
isCenter
=
(
NUM_COPIES
%
2
==
0
&&
k
==
NUM_COPIES
/
2
);
const
float
wk
=
twown
*
sin
(
k
*
M_PI
/
NUM_COPIES
);
const
float
c1
=
EXP
(
-
wk
*
dt
);
const
float
c2
=
SQRT
((
1.0
f
-
c1
*
c1
)
/
2.0
f
)
*
(
isCenter
?
sqrt
(
2.0
f
)
:
1.0
f
);
const
float
c3
=
c2
*
SQRT
(
nkT
*
invMass
);
float4
rand1
=
c3
*
random
[
randomIndex
+
k
];
float4
rand2
=
(
isCenter
?
make_float4
(
0
)
:
c3
*
random
[
randomIndex
+
NUM_COPIES
-
k
]);
vreal
[
indexInBlock
]
=
c1
*
vreal
[
indexInBlock
]
+
make_float3
(
rand1
.
x
,
rand1
.
y
,
rand1
.
z
);
vimag
[
indexInBlock
]
=
c1
*
vimag
[
indexInBlock
]
+
(
indexInBlock
<
NUM_COPIES
/
2
?
make_float3
(
rand2
.
x
,
rand2
.
y
,
rand2
.
z
)
:
make_float3
(
-
rand2
.
x
,
-
rand2
.
y
,
-
rand2
.
z
));
}
__syncthreads
();
// Inverse FFT.
FFT_V_BACKWARD
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_float4
(
SCALE
*
vreal
[
indexInBlock
].
x
,
SCALE
*
vreal
[
indexInBlock
].
y
,
SCALE
*
vreal
[
indexInBlock
].
z
,
particleVelm
.
w
);
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
/**
* Advance the positions and velocities.
*/
extern
"C"
__global__
void
integrateStep
(
float4
*
posq
,
float4
*
velm
,
long
long
*
force
,
float
dt
,
float
kT
)
{
const
int
numBlocks
=
(
blockDim
.
x
*
gridDim
.
x
)
/
NUM_COPIES
;
const
int
blockStart
=
NUM_COPIES
*
(
threadIdx
.
x
/
NUM_COPIES
);
const
int
indexInBlock
=
threadIdx
.
x
-
blockStart
;
const
float
nkT
=
NUM_COPIES
*
kT
;
const
float
twown
=
2.0
f
*
nkT
/
HBAR
;
const
float
forceScale
=
1
/
(
float
)
0xFFFFFFFF
;
__shared__
float3
q
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
float3
v
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
float3
temp
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
float2
w
[
NUM_COPIES
];
// Update velocities.
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
int
index
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
;
int
forceIndex
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
*
3
;
float4
particleVelm
=
velm
[
index
];
particleVelm
.
x
+=
forceScale
*
force
[
forceIndex
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
y
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
z
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
*
2
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
velm
[
index
]
=
particleVelm
;
}
// Evolve the free ring polymer by transforming to the frequency domain.
float3
*
qreal
=
&
q
[
blockStart
];
float3
*
qimag
=
&
q
[
blockStart
+
blockDim
.
x
];
float3
*
vreal
=
&
v
[
blockStart
];
float3
*
vimag
=
&
v
[
blockStart
+
blockDim
.
x
];
if
(
threadIdx
.
x
<
NUM_COPIES
)
w
[
indexInBlock
]
=
make_float2
(
cos
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
),
sin
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
));
__syncthreads
();
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
float4
particlePosq
=
posq
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
float4
particleVelm
=
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
// Forward FFT.
qreal
[
indexInBlock
]
=
SCALE
*
make_float3
(
particlePosq
.
x
,
particlePosq
.
y
,
particlePosq
.
z
);
qimag
[
indexInBlock
]
=
make_float3
(
0
);
vreal
[
indexInBlock
]
=
SCALE
*
make_float3
(
particleVelm
.
x
,
particleVelm
.
y
,
particleVelm
.
z
);
vimag
[
indexInBlock
]
=
make_float3
(
0
);
__syncthreads
();
FFT_Q_FORWARD
FFT_V_FORWARD
// Apply the thermostat.
if
(
indexInBlock
==
0
)
{
qreal
[
0
]
+=
vreal
[
0
]
*
dt
;
qimag
[
0
]
+=
vimag
[
0
]
*
dt
;
}
else
{
const
float
wk
=
twown
*
sin
(
indexInBlock
*
M_PI
/
NUM_COPIES
);
const
float
wt
=
wk
*
dt
;
const
float
coswt
=
cos
(
wt
);
const
float
sinwt
=
sin
(
wt
);
const
float3
vprimereal
=
vreal
[
indexInBlock
]
*
coswt
-
qreal
[
indexInBlock
]
*
(
wk
*
sinwt
);
// Advance velocity from t to t+dt
const
float3
vprimeimag
=
vimag
[
indexInBlock
]
*
coswt
-
qimag
[
indexInBlock
]
*
(
wk
*
sinwt
);
qreal
[
indexInBlock
]
=
vreal
[
indexInBlock
]
*
(
sinwt
/
wk
)
+
qreal
[
indexInBlock
]
*
coswt
;
// Advance position from t to t+dt
qimag
[
indexInBlock
]
=
vimag
[
indexInBlock
]
*
(
sinwt
/
wk
)
+
qimag
[
indexInBlock
]
*
coswt
;
vreal
[
indexInBlock
]
=
vprimereal
;
vimag
[
indexInBlock
]
=
vprimeimag
;
}
__syncthreads
();
// Inverse FFT.
FFT_Q_BACKWARD
FFT_V_BACKWARD
posq
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_float4
(
SCALE
*
qreal
[
indexInBlock
].
x
,
SCALE
*
qreal
[
indexInBlock
].
y
,
SCALE
*
qreal
[
indexInBlock
].
z
,
particlePosq
.
w
);
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_float4
(
SCALE
*
vreal
[
indexInBlock
].
x
,
SCALE
*
vreal
[
indexInBlock
].
y
,
SCALE
*
vreal
[
indexInBlock
].
z
,
particleVelm
.
w
);
}
}
/**
* Advance the velocities by a half step.
*/
extern
"C"
__global__
void
advanceVelocities
(
float4
*
velm
,
long
long
*
force
,
float
dt
)
{
const
int
numBlocks
=
(
blockDim
.
x
*
gridDim
.
x
)
/
NUM_COPIES
;
const
int
blockStart
=
NUM_COPIES
*
(
threadIdx
.
x
/
NUM_COPIES
);
const
int
indexInBlock
=
threadIdx
.
x
-
blockStart
;
const
float
forceScale
=
1
/
(
float
)
0xFFFFFFFF
;
// Update velocities.
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
int
index
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
;
int
forceIndex
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
*
3
;
float4
particleVelm
=
velm
[
index
];
particleVelm
.
x
+=
forceScale
*
force
[
forceIndex
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
y
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
z
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
*
2
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
velm
[
index
]
=
particleVelm
;
}
}
/**
* Copy a set of per-atom values from the integrator's arrays to the context.
*/
extern
"C"
__global__
void
copyToContext
(
float4
*
src
,
float4
*
dst
,
int
*
order
,
int
copy
)
{
const
int
base
=
copy
*
PADDED_NUM_ATOMS
;
for
(
int
particle
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
particle
<
NUM_ATOMS
;
particle
+=
blockDim
.
x
*
gridDim
.
x
)
{
dst
[
particle
]
=
src
[
base
+
order
[
particle
]];
}
}
/**
* Copy a set of per-atom force values from the context to the integrator's arrays.
*/
extern
"C"
__global__
void
copyFromContext
(
long
long
*
src
,
long
long
*
dst
,
int
*
order
,
int
copy
)
{
const
int
base
=
copy
*
PADDED_NUM_ATOMS
*
3
;
for
(
int
particle
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
particle
<
NUM_ATOMS
;
particle
+=
blockDim
.
x
*
gridDim
.
x
)
{
dst
[
base
+
order
[
particle
]]
=
src
[
particle
];
dst
[
base
+
order
[
particle
]
+
PADDED_NUM_ATOMS
]
=
src
[
particle
+
PADDED_NUM_ATOMS
];
dst
[
base
+
order
[
particle
]
+
PADDED_NUM_ATOMS
*
2
]
=
src
[
particle
+
PADDED_NUM_ATOMS
*
2
];
}
}
/**
* Update atom positions so all copies are offset by the same number of periodic box widths.
*/
extern
"C"
__global__
void
applyCellTranslations
(
float4
*
posq
,
float4
*
movedPos
,
int
*
order
,
int
movedCopy
)
{
for
(
int
particle
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
particle
<
NUM_ATOMS
;
particle
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
index
=
order
[
particle
];
float4
delta
=
movedPos
[
particle
]
-
posq
[
movedCopy
*
PADDED_NUM_ATOMS
+
index
];
for
(
int
copy
=
0
;
copy
<
NUM_COPIES
;
copy
++
)
posq
[
copy
*
PADDED_NUM_ATOMS
+
index
]
+=
delta
;
}
}
plugins/rpmd/platforms/cuda/tests/CMakeLists.txt
0 → 100644
View file @
05198df6
#
# Testing
#
ENABLE_TESTING
()
INCLUDE_DIRECTORIES
(
${
CUDA_INCLUDE_DIR
}
)
# Automatically create tests using files named "Test*.cpp"
FILE
(
GLOB TEST_PROGS
"*Test*.cpp"
)
FOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
GET_FILENAME_COMPONENT
(
TEST_ROOT
${
TEST_PROG
}
NAME_WE
)
# Link with shared library
ADD_EXECUTABLE
(
${
TEST_ROOT
}
${
TEST_PROG
}
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
SHARED_RPMD_TARGET
}
)
ADD_TEST
(
${
TEST_ROOT
}
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
plugins/rpmd/platforms/cuda/tests/TestCudaRpmd.cpp
0 → 100644
View file @
05198df6
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of RPMDIntegrator.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "openmm/CustomNonbondedForce.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/RPMDIntegrator.h"
#include "SimTKUtilities/SimTKOpenMMUtilities.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
void
testFreeParticles
()
{
const
int
numParticles
=
100
;
const
int
numCopies
=
30
;
const
double
temperature
=
300.0
;
const
double
mass
=
1.0
;
System
system
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
mass
);
RPMDIntegrator
integ
(
numCopies
,
temperature
,
10.0
,
0.001
);
Platform
&
platform
=
Platform
::
getPlatformByName
(
"CUDA"
);
Context
context
(
system
,
integ
,
platform
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
Vec3
>
positions
(
numParticles
);
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
{
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
positions
[
j
]
=
Vec3
(
0.02
*
genrand_real2
(
sfmt
),
0.02
*
genrand_real2
(
sfmt
),
0.02
*
genrand_real2
(
sfmt
));
integ
.
setPositions
(
i
,
positions
);
}
const
int
numSteps
=
1000
;
integ
.
step
(
1000
);
vector
<
double
>
ke
(
numCopies
,
0.0
);
vector
<
double
>
rg
(
numParticles
,
0.0
);
const
RealOpenMM
hbar
=
1.054571628e-34
*
AVOGADRO
/
(
1000
*
1e-12
);
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
integ
.
step
(
1
);
vector
<
State
>
state
(
numCopies
);
for
(
int
j
=
0
;
j
<
numCopies
;
j
++
)
state
[
j
]
=
integ
.
getState
(
j
,
State
::
Positions
|
State
::
Velocities
);
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
{
double
rg2
=
0.0
;
for
(
int
k
=
0
;
k
<
numCopies
;
k
++
)
{
Vec3
v
=
state
[
k
].
getVelocities
()[
j
];
ke
[
k
]
+=
0.5
*
mass
*
v
.
dot
(
v
);
for
(
int
m
=
0
;
m
<
numCopies
;
m
++
)
{
Vec3
delta
=
state
[
k
].
getPositions
()[
j
]
-
state
[
m
].
getPositions
()[
j
];
rg2
+=
delta
.
dot
(
delta
);
}
}
rg
[
j
]
+=
rg2
/
(
2
*
numCopies
*
numCopies
);
}
}
double
meanKE
=
0.0
;
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
meanKE
+=
ke
[
i
];
meanKE
/=
numSteps
*
numCopies
;
double
expectedKE
=
0.5
*
numCopies
*
numParticles
*
3
*
BOLTZ
*
temperature
;
ASSERT_USUALLY_EQUAL_TOL
(
expectedKE
,
meanKE
,
1e-2
);
double
meanRg2
=
0.0
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
meanRg2
+=
rg
[
i
];
meanRg2
/=
numSteps
*
numParticles
;
double
expectedRg
=
hbar
/
(
2
*
sqrt
(
mass
*
BOLTZ
*
temperature
));
ASSERT_USUALLY_EQUAL_TOL
(
expectedRg
,
sqrt
(
meanRg2
),
1e-3
);
}
void
testParaHydrogen
()
{
const
int
numParticles
=
32
;
const
int
numCopies
=
12
;
const
double
temperature
=
25.0
;
const
double
mass
=
2.0
;
const
double
boxSize
=
1.1896
;
const
int
numSteps
=
1000
;
const
int
numBins
=
200
;
const
double
reference
[]
=
{
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
4.932814042206152e-5
,
1.244331241336431e-4
,
4.052316284060125e-4
,
1.544810863683946e-3
,
4.376197806690222e-3
,
1.025847561714293e-2
,
2.286702037465422e-2
,
4.371052180263602e-2
,
7.518538770734748e-2
,
0.122351534531647
,
0.185758975626622
,
0.266399984652322
,
0.363380262153250
,
0.473696401293219
,
0.595312098494172
,
0.726049519422861
,
0.862264551954547
,
0.991102029379444
,
1.1147503922535
,
1.23587006992066
,
1.33495411932817
,
1.42208208736987
,
1.49273884004107
,
1.54633319690403
,
1.58714702233941
,
1.60439217751355
,
1.61804190608902
,
1.60680198476058
,
1.58892222973695
,
1.56387607986781
,
1.52629494593350
,
1.48421439018970
,
1.43656176771959
,
1.38752775598872
,
1.33310695719931
,
1.28363477223121
,
1.23465642750248
,
1.18874848666326
,
1.14350496170519
,
1.10292486009936
,
1.06107270157688
,
1.02348927970441
,
0.989729345271297
,
0.959273446941802
,
0.932264875865758
,
0.908818658748942
,
0.890946420768315
,
0.869332737718165
,
0.856401736350349
,
0.842370069917020
,
0.834386614237393
,
0.826268072171045
,
0.821547250199453
,
0.818786865315836
,
0.819441757028076
,
0.819156933383128
,
0.822275325148621
,
0.828919078023881
,
0.837233720599450
,
0.846961908186718
,
0.855656955481099
,
0.864520333201247
,
0.876082425547566
,
0.886950044046000
,
0.900275658318995
};
// Create a box of para-hydrogen.
System
system
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
mass
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
CustomNonbondedForce
*
nb
=
new
CustomNonbondedForce
(
"2625.49963*(exp(1.713-1.5671*p-0.00993*p*p)-(12.14/p^6+215.2/p^8-143.1/p^9+4813.9/p^10)*(step(rc-p)*exp(-(rc/p-1)^2)+1-step(rc-p))); p=r/0.05291772108; rc=8.32"
);
nb
->
setNonbondedMethod
(
CustomNonbondedForce
::
CutoffPeriodic
);
nb
->
setCutoffDistance
(
boxSize
/
2
);
vector
<
double
>
params
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
nb
->
addParticle
(
params
);
system
.
addForce
(
nb
);
RPMDIntegrator
integ
(
numCopies
,
temperature
,
10.0
,
0.0005
);
Platform
&
platform
=
Platform
::
getPlatformByName
(
"CUDA"
);
Context
context
(
system
,
integ
,
platform
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
Vec3
>
positions
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
positions
[
i
]
=
Vec3
(
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
));
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
integ
.
setPositions
(
i
,
positions
);
integ
.
step
(
1000
);
// Simulate it.
vector
<
int
>
counts
(
numBins
,
0
);
const
double
invBoxSize
=
1.0
/
boxSize
;
double
meanKE
=
0.0
;
const
RealOpenMM
hbar
=
1.054571628e-34
*
AVOGADRO
/
(
1000
*
1e-12
);
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
integ
.
step
(
20
);
vector
<
State
>
states
(
numCopies
);
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
states
[
i
]
=
integ
.
getState
(
i
,
State
::
Positions
|
State
::
Forces
);
// Record the radial distribution function.
const
vector
<
Vec3
>&
pos
=
states
[
0
].
getPositions
();
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
for
(
int
k
=
0
;
k
<
j
;
k
++
)
{
Vec3
delta
=
pos
[
j
]
-
pos
[
k
];
delta
[
0
]
-=
floor
(
delta
[
0
]
*
invBoxSize
+
0.5
)
*
boxSize
;
delta
[
1
]
-=
floor
(
delta
[
1
]
*
invBoxSize
+
0.5
)
*
boxSize
;
delta
[
2
]
-=
floor
(
delta
[
2
]
*
invBoxSize
+
0.5
)
*
boxSize
;
double
dist
=
sqrt
(
delta
.
dot
(
delta
));
int
bin
=
(
int
)
(
numBins
*
(
dist
/
boxSize
));
counts
[
bin
]
++
;
}
// Calculate the quantum contribution to the kinetic energy.
vector
<
Vec3
>
centroids
(
numParticles
,
Vec3
());
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
{
const
vector
<
Vec3
>&
pos
=
states
[
i
].
getPositions
();
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
centroids
[
j
]
+=
pos
[
j
];
}
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
centroids
[
j
]
*=
1.0
/
numCopies
;
double
ke
=
0.0
;
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
{
const
vector
<
Vec3
>&
pos
=
states
[
i
].
getPositions
();
const
vector
<
Vec3
>&
f
=
states
[
i
].
getForces
();
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
{
Vec3
delta
=
centroids
[
j
]
-
pos
[
j
];
ke
+=
delta
.
dot
(
f
[
j
]);
}
}
meanKE
+=
ke
/
(
2
*
numCopies
*
numParticles
);
}
// Check against expected values.
double
scale
=
(
boxSize
*
boxSize
*
boxSize
)
/
(
numSteps
*
0.5
*
numParticles
*
numParticles
);
for
(
int
i
=
0
;
i
<
numBins
/
2
;
i
++
)
{
double
r1
=
i
*
boxSize
/
numBins
;
double
r2
=
(
i
+
1
)
*
boxSize
/
numBins
;
double
volume
=
(
4.0
/
3.0
)
*
M_PI
*
(
r2
*
r2
*
r2
-
r1
*
r1
*
r1
);
ASSERT_USUALLY_EQUAL_TOL
(
reference
[
i
],
scale
*
counts
[
i
]
/
volume
,
0.1
);
}
meanKE
/=
numSteps
*
BOLTZ
;
ASSERT_USUALLY_EQUAL_TOL
(
60.0
,
1.5
*
temperature
+
meanKE
,
0.02
);
}
int
main
()
{
try
{
Platform
::
loadPluginsFromDirectory
(
Platform
::
getDefaultPluginsDirectory
());
testFreeParticles
();
testParaHydrogen
();
}
catch
(
const
std
::
exception
&
e
)
{
std
::
cout
<<
"exception: "
<<
e
.
what
()
<<
std
::
endl
;
std
::
cout
<<
"FAIL - ERROR. Test failed."
<<
std
::
endl
;
return
1
;
}
std
::
cout
<<
"Done"
<<
std
::
endl
;
return
0
;
}
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
View file @
05198df6
...
...
@@ -98,10 +98,7 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
}
void
OpenCLIntegrateRPMDStepKernel
::
execute
(
ContextImpl
&
context
,
const
RPMDIntegrator
&
integrator
,
bool
forcesAreValid
)
{
const
System
&
system
=
context
.
getSystem
();
const
int
paddedParticles
=
cl
.
getPaddedNumAtoms
();
OpenCLIntegrationUtilities
&
integration
=
cl
.
getIntegrationUtilities
();
if
(
!
hasInitializedKernel
)
{
hasInitializedKernel
=
true
;
pileKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
->
getDeviceBuffer
());
...
...
plugins/rpmd/platforms/opencl/src/kernels/rpmd.cl
View file @
05198df6
...
...
@@ -107,7 +107,6 @@ __kernel void integrateStep(__global float4* posq, __global float4* velm, __glob
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
float4
particlePosq
=
posq[particle+indexInBlock*PADDED_NUM_ATOMS]
;
float4
particleVelm
=
velm[particle+indexInBlock*PADDED_NUM_ATOMS]
;
float
invMass
=
particleVelm.w
;
//
Forward
FFT.
...
...
@@ -130,7 +129,6 @@ __kernel void integrateStep(__global float4* posq, __global float4* velm, __glob
const
float
wt
=
wk*dt
;
const
float
coswt
=
cos
(
wt
)
;
const
float
sinwt
=
sin
(
wt
)
;
const
float
wm
=
wk/particleVelm.w
;
const
float4
vprimereal
=
vreal[indexInBlock]*coswt
-
qreal[indexInBlock]*
(
wk*sinwt
)
; // Advance velocity from t to t+dt
const
float4
vprimeimag
=
vimag[indexInBlock]*coswt
-
qimag[indexInBlock]*
(
wk*sinwt
)
;
qreal[indexInBlock]
=
vreal[indexInBlock]*
(
sinwt/wk
)
+
qreal[indexInBlock]*coswt
; // Advance position from t to t+dt
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment