Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
bf6d95c2
Commit
bf6d95c2
authored
Jun 05, 2015
by
John Chodera (MSKCC)
Browse files
Merge remote-tracking branch 'upstream/master'
parents
896413aa
227c86bf
Changes
46
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1361 additions
and
86 deletions
+1361
-86
platforms/cuda/include/CudaContext.h
platforms/cuda/include/CudaContext.h
+1
-1
platforms/cuda/include/CudaFFT3D.h
platforms/cuda/include/CudaFFT3D.h
+101
-0
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+4
-2
platforms/cuda/src/CudaFFT3D.cpp
platforms/cuda/src/CudaFFT3D.cpp
+350
-0
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+49
-54
platforms/cuda/src/kernels/fft.cu
platforms/cuda/src/kernels/fft.cu
+53
-0
platforms/cuda/src/kernels/fftR2C.cu
platforms/cuda/src/kernels/fftR2C.cu
+172
-0
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+3
-3
platforms/cuda/tests/TestCudaFFT3D.cpp
platforms/cuda/tests/TestCudaFFT3D.cpp
+140
-0
platforms/opencl/include/OpenCLContext.h
platforms/opencl/include/OpenCLContext.h
+1
-1
plugins/cpupme/src/CpuPmeKernels.cpp
plugins/cpupme/src/CpuPmeKernels.cpp
+3
-3
plugins/cpupme/src/CpuPmeKernels.h
plugins/cpupme/src/CpuPmeKernels.h
+1
-0
wrappers/python/simtk/openmm/__init__.py
wrappers/python/simtk/openmm/__init__.py
+2
-0
wrappers/python/simtk/openmm/amd.py
wrappers/python/simtk/openmm/amd.py
+257
-0
wrappers/python/simtk/openmm/app/__init__.py
wrappers/python/simtk/openmm/app/__init__.py
+1
-1
wrappers/python/simtk/openmm/app/gromacstopfile.py
wrappers/python/simtk/openmm/app/gromacstopfile.py
+89
-6
wrappers/python/simtk/openmm/app/internal/amber_file_parser.py
...ers/python/simtk/openmm/app/internal/amber_file_parser.py
+2
-0
wrappers/python/simtk/openmm/app/internal/customgbforces.py
wrappers/python/simtk/openmm/app/internal/customgbforces.py
+8
-10
wrappers/python/simtk/openmm/app/simulation.py
wrappers/python/simtk/openmm/app/simulation.py
+117
-4
wrappers/python/simtk/openmm/app/statedatareporter.py
wrappers/python/simtk/openmm/app/statedatareporter.py
+7
-1
No files found.
platforms/cuda/include/CudaContext.h
View file @
bf6d95c2
...
@@ -30,12 +30,12 @@
...
@@ -30,12 +30,12 @@
#include <map>
#include <map>
#include <queue>
#include <queue>
#include <string>
#include <string>
#include <pthread.h>
#define __CL_ENABLE_EXCEPTIONS
#define __CL_ENABLE_EXCEPTIONS
#ifdef _MSC_VER
#ifdef _MSC_VER
// Prevent Windows from defining macros that interfere with other code.
// Prevent Windows from defining macros that interfere with other code.
#define NOMINMAX
#define NOMINMAX
#endif
#endif
#include <pthread.h>
#include <cuda.h>
#include <cuda.h>
#include <builtin_types.h>
#include <builtin_types.h>
#include <vector_functions.h>
#include <vector_functions.h>
...
...
platforms/cuda/include/CudaFFT3D.h
0 → 100644
View file @
bf6d95c2
#ifndef __OPENMM_CUDAFFT3D_H__
#define __OPENMM_CUDAFFT3D_H__
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaArray.h"
namespace
OpenMM
{
/**
* This class performs three dimensional Fast Fourier Transforms. It is based on the
* mixed radix algorithm described in
* <p>
* Takahashi, D. and Kanada, Y., "High-Performance Radix-2, 3 and 5 Parallel 1-D Complex
* FFT Algorithms for Distributed-Memory Parallel Computers." Journal of Supercomputing,
* 15, 207–228 (2000).
* <p>
* This class places certain restrictions on the allowed dimensions of the grid. First,
* the size of each dimension may have no prime factors other than 2, 3, 5, and 7. You
* can call findLegalDimension() to determine the smallest size that satisfies this
* requirement and is greater than or equal to a specified minimum size. Second, the size
* of each dimension must be small enough to compute each 1D transform entirely in local
* memory with one work unit per data point. This will vary between platforms, but is
* typically at least 512.
* <p>
* Note that this class performs an unnormalized transform. That means that if you perform
* a forward transform followed immediately by an inverse transform, the effect is to
* multiply every value of the original data set by the total number of data points.
*/
class
OPENMM_EXPORT_CUDA
CudaFFT3D
{
public:
/**
* Create an CudaFFT3D object for performing transforms of a particular size.
*
* @param context the context in which to perform calculations
* @param xsize the first dimension of the data sets on which FFTs will be performed
* @param ysize the second dimension of the data sets on which FFTs will be performed
* @param zsize the third dimension of the data sets on which FFTs will be performed
* @param realToComplex if true, a real-to-complex transform will be done. Otherwise, it is complex-to-complex.
*/
CudaFFT3D
(
CudaContext
&
context
,
int
xsize
,
int
ysize
,
int
zsize
,
bool
realToComplex
=
false
);
/**
* Perform a Fourier transform. The transform cannot be done in-place: the input and output
* arrays must be different. Also, the input array is used as workspace, so its contents
* are destroyed. This also means that both arrays must be large enough to hold complex values,
* even when performing a real-to-complex transform.
* <p>
* When performing a real-to-complex transform, the output data is of size xsize*ysize*(zsize/2+1)
* and contains only the non-redundant elements.
*
* @param in the data to transform, ordered such that in[x*ysize*zsize + y*zsize + z] contains element (x, y, z)
* @param out on exit, this contains the transformed data
* @param forward true to perform a forward transform, false to perform an inverse transform
*/
void
execFFT
(
CudaArray
&
in
,
CudaArray
&
out
,
bool
forward
=
true
);
/**
* Get the smallest legal size for a dimension of the grid (that is, a size with no prime
* factors other than 2, 3, 5, and 7).
*
* @param minimum the minimum size the return value must be greater than or equal to
*/
static
int
findLegalDimension
(
int
minimum
);
private:
CUfunction
createKernel
(
int
xsize
,
int
ysize
,
int
zsize
,
int
&
threads
,
int
axis
,
bool
forward
,
bool
inputIsReal
);
int
xsize
,
ysize
,
zsize
;
int
xthreads
,
ythreads
,
zthreads
;
bool
packRealAsComplex
;
CudaContext
&
context
;
CUfunction
xkernel
,
ykernel
,
zkernel
;
CUfunction
invxkernel
,
invykernel
,
invzkernel
;
CUfunction
packForwardKernel
,
unpackForwardKernel
,
packBackwardKernel
,
unpackBackwardKernel
;
};
}
// namespace OpenMM
#endif // __OPENMM_CUDAFFT3D_H__
platforms/cuda/include/CudaKernels.h
View file @
bf6d95c2
...
@@ -30,6 +30,7 @@
...
@@ -30,6 +30,7 @@
#include "CudaPlatform.h"
#include "CudaPlatform.h"
#include "CudaArray.h"
#include "CudaArray.h"
#include "CudaContext.h"
#include "CudaContext.h"
#include "CudaFFT3D.h"
#include "CudaParameterSet.h"
#include "CudaParameterSet.h"
#include "CudaSort.h"
#include "CudaSort.h"
#include "openmm/kernels.h"
#include "openmm/kernels.h"
...
@@ -588,7 +589,7 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
...
@@ -588,7 +589,7 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
public:
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
sort
(
NULL
),
pmeio
(
NULL
)
{
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
}
}
~
CudaCalcNonbondedForceKernel
();
~
CudaCalcNonbondedForceKernel
();
/**
/**
...
@@ -649,6 +650,7 @@ private:
...
@@ -649,6 +650,7 @@ private:
PmeIO
*
pmeio
;
PmeIO
*
pmeio
;
CUstream
pmeStream
;
CUstream
pmeStream
;
CUevent
pmeSyncEvent
;
CUevent
pmeSyncEvent
;
CudaFFT3D
*
fft
;
cufftHandle
fftForward
;
cufftHandle
fftForward
;
cufftHandle
fftBackward
;
cufftHandle
fftBackward
;
CUfunction
ewaldSumsKernel
;
CUfunction
ewaldSumsKernel
;
...
@@ -663,7 +665,7 @@ private:
...
@@ -663,7 +665,7 @@ private:
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
;
int
interpolateForceThreads
;
int
interpolateForceThreads
;
bool
hasCoulomb
,
hasLJ
,
usePmeStream
;
bool
hasCoulomb
,
hasLJ
,
usePmeStream
,
useCudaFFT
;
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
};
};
...
...
platforms/cuda/src/CudaFFT3D.cpp
0 → 100644
View file @
bf6d95c2
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaFFT3D.h"
#include "CudaContext.h"
#include "CudaKernelSources.h"
#include "SimTKOpenMMRealType.h"
#include <map>
#include <sstream>
#include <string>
using
namespace
OpenMM
;
using
namespace
std
;
CudaFFT3D
::
CudaFFT3D
(
CudaContext
&
context
,
int
xsize
,
int
ysize
,
int
zsize
,
bool
realToComplex
)
:
context
(
context
),
xsize
(
xsize
),
ysize
(
ysize
),
zsize
(
zsize
)
{
packRealAsComplex
=
false
;
int
packedXSize
=
xsize
;
int
packedYSize
=
ysize
;
int
packedZSize
=
zsize
;
if
(
realToComplex
)
{
// If any axis size is even, we can pack the real values into a complex grid that is only half as large.
// Look for an appropriate axis.
packRealAsComplex
=
true
;
int
packedAxis
,
bufferSize
;
if
(
xsize
%
2
==
0
)
{
packedAxis
=
0
;
packedXSize
/=
2
;
bufferSize
=
packedXSize
;
}
else
if
(
ysize
%
2
==
0
)
{
packedAxis
=
1
;
packedYSize
/=
2
;
bufferSize
=
packedYSize
;
}
else
if
(
zsize
%
2
==
0
)
{
packedAxis
=
2
;
packedZSize
/=
2
;
bufferSize
=
packedZSize
;
}
else
packRealAsComplex
=
false
;
if
(
packRealAsComplex
)
{
// Build the kernels for packing and unpacking the data.
map
<
string
,
string
>
defines
;
defines
[
"XSIZE"
]
=
context
.
intToString
(
xsize
);
defines
[
"YSIZE"
]
=
context
.
intToString
(
ysize
);
defines
[
"ZSIZE"
]
=
context
.
intToString
(
zsize
);
defines
[
"PACKED_AXIS"
]
=
context
.
intToString
(
packedAxis
);
defines
[
"PACKED_XSIZE"
]
=
context
.
intToString
(
packedXSize
);
defines
[
"PACKED_YSIZE"
]
=
context
.
intToString
(
packedYSize
);
defines
[
"PACKED_ZSIZE"
]
=
context
.
intToString
(
packedZSize
);
CUmodule
module
=
context
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
fftR2C
,
defines
);
packForwardKernel
=
context
.
getKernel
(
module
,
"packForwardData"
);
unpackForwardKernel
=
context
.
getKernel
(
module
,
"unpackForwardData"
);
packBackwardKernel
=
context
.
getKernel
(
module
,
"packBackwardData"
);
unpackBackwardKernel
=
context
.
getKernel
(
module
,
"unpackBackwardData"
);
}
}
bool
inputIsReal
=
(
realToComplex
&&
!
packRealAsComplex
);
zkernel
=
createKernel
(
packedXSize
,
packedYSize
,
packedZSize
,
zthreads
,
0
,
true
,
inputIsReal
);
xkernel
=
createKernel
(
packedYSize
,
packedZSize
,
packedXSize
,
xthreads
,
1
,
true
,
inputIsReal
);
ykernel
=
createKernel
(
packedZSize
,
packedXSize
,
packedYSize
,
ythreads
,
2
,
true
,
inputIsReal
);
invzkernel
=
createKernel
(
packedXSize
,
packedYSize
,
packedZSize
,
zthreads
,
0
,
false
,
inputIsReal
);
invxkernel
=
createKernel
(
packedYSize
,
packedZSize
,
packedXSize
,
xthreads
,
1
,
false
,
inputIsReal
);
invykernel
=
createKernel
(
packedZSize
,
packedXSize
,
packedYSize
,
ythreads
,
2
,
false
,
inputIsReal
);
}
void
CudaFFT3D
::
execFFT
(
CudaArray
&
in
,
CudaArray
&
out
,
bool
forward
)
{
CUfunction
kernel1
=
(
forward
?
zkernel
:
invzkernel
);
CUfunction
kernel2
=
(
forward
?
xkernel
:
invxkernel
);
CUfunction
kernel3
=
(
forward
?
ykernel
:
invykernel
);
void
*
args1
[]
=
{
&
in
.
getDevicePointer
(),
&
out
.
getDevicePointer
()};
void
*
args2
[]
=
{
&
out
.
getDevicePointer
(),
&
in
.
getDevicePointer
()};
if
(
packRealAsComplex
)
{
CUfunction
packKernel
=
(
forward
?
packForwardKernel
:
packBackwardKernel
);
CUfunction
unpackKernel
=
(
forward
?
unpackForwardKernel
:
unpackBackwardKernel
);
int
gridSize
=
xsize
*
ysize
*
zsize
/
2
;
// Pack the data into a half sized grid.
context
.
executeKernel
(
packKernel
,
args1
,
gridSize
,
128
);
// Perform the FFT.
context
.
executeKernel
(
kernel1
,
args2
,
gridSize
,
zthreads
);
context
.
executeKernel
(
kernel2
,
args1
,
gridSize
,
xthreads
);
context
.
executeKernel
(
kernel3
,
args2
,
gridSize
,
ythreads
);
// Unpack the data.
context
.
executeKernel
(
unpackKernel
,
args1
,
gridSize
,
128
);
}
else
{
context
.
executeKernel
(
kernel1
,
args1
,
xsize
*
ysize
*
zsize
,
zthreads
);
context
.
executeKernel
(
kernel2
,
args2
,
xsize
*
ysize
*
zsize
,
xthreads
);
context
.
executeKernel
(
kernel3
,
args1
,
xsize
*
ysize
*
zsize
,
ythreads
);
}
}
int
CudaFFT3D
::
findLegalDimension
(
int
minimum
)
{
if
(
minimum
<
1
)
return
1
;
while
(
true
)
{
// Attempt to factor the current value.
int
unfactored
=
minimum
;
for
(
int
factor
=
2
;
factor
<
8
;
factor
++
)
{
while
(
unfactored
>
1
&&
unfactored
%
factor
==
0
)
unfactored
/=
factor
;
}
if
(
unfactored
==
1
)
return
minimum
;
minimum
++
;
}
}
static
int
getSmallestRadix
(
int
size
)
{
int
minRadix
=
1
;
int
unfactored
=
size
;
while
(
unfactored
%
7
==
0
)
{
minRadix
=
7
;
unfactored
/=
7
;
}
while
(
unfactored
%
5
==
0
)
{
minRadix
=
5
;
unfactored
/=
5
;
}
while
(
unfactored
%
4
==
0
)
{
minRadix
=
4
;
unfactored
/=
4
;
}
while
(
unfactored
%
3
==
0
)
{
minRadix
=
3
;
unfactored
/=
3
;
}
while
(
unfactored
%
2
==
0
)
{
minRadix
=
2
;
unfactored
/=
2
;
}
return
minRadix
;
}
CUfunction
CudaFFT3D
::
createKernel
(
int
xsize
,
int
ysize
,
int
zsize
,
int
&
threads
,
int
axis
,
bool
forward
,
bool
inputIsReal
)
{
int
maxThreads
=
256
;
// while (maxThreads > 128 && maxThreads-64 >= zsize)
// maxThreads -= 64;
int
threadsPerBlock
=
zsize
/
getSmallestRadix
(
zsize
);
stringstream
source
;
int
blocksPerGroup
=
max
(
1
,
maxThreads
/
threadsPerBlock
);
int
stage
=
0
;
int
L
=
zsize
;
int
m
=
1
;
// Factor zsize, generating an appropriate block of code for each factor.
while
(
L
>
1
)
{
int
input
=
stage
%
2
;
int
output
=
1
-
input
;
int
radix
;
if
(
L
%
7
==
0
)
radix
=
7
;
else
if
(
L
%
5
==
0
)
radix
=
5
;
else
if
(
L
%
4
==
0
)
radix
=
4
;
else
if
(
L
%
3
==
0
)
radix
=
3
;
else
if
(
L
%
2
==
0
)
radix
=
2
;
else
throw
OpenMMException
(
"Illegal size for FFT: "
+
context
.
intToString
(
zsize
));
source
<<
"{
\n
"
;
L
=
L
/
radix
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix "
<<
radix
<<
")
\n
"
;
if
(
L
*
m
<
threadsPerBlock
)
source
<<
"if (threadIdx.x < "
<<
(
blocksPerGroup
*
L
*
m
)
<<
") {
\n
"
;
else
source
<<
"{
\n
"
;
source
<<
"int block = threadIdx.x/"
<<
(
L
*
m
)
<<
";
\n
"
;
source
<<
"int i = threadIdx.x-block*"
<<
(
L
*
m
)
<<
";
\n
"
;
source
<<
"int base = i+block*"
<<
zsize
<<
";
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
if
(
radix
==
7
)
{
source
<<
"real2 c0 = data"
<<
input
<<
"[base];
\n
"
;
source
<<
"real2 c1 = data"
<<
input
<<
"[base+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"real2 c2 = data"
<<
input
<<
"[base+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"real2 c3 = data"
<<
input
<<
"[base+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"real2 c4 = data"
<<
input
<<
"[base+"
<<
(
4
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"real2 c5 = data"
<<
input
<<
"[base+"
<<
(
5
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"real2 c6 = data"
<<
input
<<
"[base+"
<<
(
6
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"real2 d0 = c1+c6;
\n
"
;
source
<<
"real2 d1 = c1-c6;
\n
"
;
source
<<
"real2 d2 = c2+c5;
\n
"
;
source
<<
"real2 d3 = c2-c5;
\n
"
;
source
<<
"real2 d4 = c4+c3;
\n
"
;
source
<<
"real2 d5 = c4-c3;
\n
"
;
source
<<
"real2 d6 = d2+d0;
\n
"
;
source
<<
"real2 d7 = d5+d3;
\n
"
;
source
<<
"real2 b0 = c0+d6+d4;
\n
"
;
source
<<
"real2 b1 = "
<<
context
.
doubleToString
((
cos
(
2
*
M_PI
/
7
)
+
cos
(
4
*
M_PI
/
7
)
+
cos
(
6
*
M_PI
/
7
))
/
3
-
1
)
<<
"*(d6+d4);
\n
"
;
source
<<
"real2 b2 = "
<<
context
.
doubleToString
((
2
*
cos
(
2
*
M_PI
/
7
)
-
cos
(
4
*
M_PI
/
7
)
-
cos
(
6
*
M_PI
/
7
))
/
3
)
<<
"*(d0-d4);
\n
"
;
source
<<
"real2 b3 = "
<<
context
.
doubleToString
((
cos
(
2
*
M_PI
/
7
)
-
2
*
cos
(
4
*
M_PI
/
7
)
+
cos
(
6
*
M_PI
/
7
))
/
3
)
<<
"*(d4-d2);
\n
"
;
source
<<
"real2 b4 = "
<<
context
.
doubleToString
((
cos
(
2
*
M_PI
/
7
)
+
cos
(
4
*
M_PI
/
7
)
-
2
*
cos
(
6
*
M_PI
/
7
))
/
3
)
<<
"*(d2-d0);
\n
"
;
source
<<
"real2 b5 = -(SIGN)*"
<<
context
.
doubleToString
((
sin
(
2
*
M_PI
/
7
)
+
sin
(
4
*
M_PI
/
7
)
-
sin
(
6
*
M_PI
/
7
))
/
3
)
<<
"*(d7+d1);
\n
"
;
source
<<
"real2 b6 = -(SIGN)*"
<<
context
.
doubleToString
((
2
*
sin
(
2
*
M_PI
/
7
)
-
sin
(
4
*
M_PI
/
7
)
+
sin
(
6
*
M_PI
/
7
))
/
3
)
<<
"*(d1-d5);
\n
"
;
source
<<
"real2 b7 = -(SIGN)*"
<<
context
.
doubleToString
((
sin
(
2
*
M_PI
/
7
)
-
2
*
sin
(
4
*
M_PI
/
7
)
-
sin
(
6
*
M_PI
/
7
))
/
3
)
<<
"*(d5-d3);
\n
"
;
source
<<
"real2 b8 = -(SIGN)*"
<<
context
.
doubleToString
((
sin
(
2
*
M_PI
/
7
)
+
sin
(
4
*
M_PI
/
7
)
+
2
*
sin
(
6
*
M_PI
/
7
))
/
3
)
<<
"*(d3-d1);
\n
"
;
source
<<
"real2 t0 = b0+b1;
\n
"
;
source
<<
"real2 t1 = b2+b3;
\n
"
;
source
<<
"real2 t2 = b4-b3;
\n
"
;
source
<<
"real2 t3 = -b2-b4;
\n
"
;
source
<<
"real2 t4 = b6+b7;
\n
"
;
source
<<
"real2 t5 = b8-b7;
\n
"
;
source
<<
"real2 t6 = -b8-b6;
\n
"
;
source
<<
"real2 t7 = t0+t1;
\n
"
;
source
<<
"real2 t8 = t0+t2;
\n
"
;
source
<<
"real2 t9 = t0+t3;
\n
"
;
source
<<
"real2 t10 = make_real2(t4.y+b5.y, -(t4.x+b5.x));
\n
"
;
source
<<
"real2 t11 = make_real2(t5.y+b5.y, -(t5.x+b5.x));
\n
"
;
source
<<
"real2 t12 = make_real2(t6.y+b5.y, -(t6.x+b5.x));
\n
"
;
source
<<
"data"
<<
output
<<
"[base+6*j*"
<<
m
<<
"] = b0;
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(6*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
7
*
L
)
<<
"], t7-t10);
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(6*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
7
*
L
)
<<
"], t9-t12);
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(6*j+3)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
3
*
zsize
)
<<
"/"
<<
(
7
*
L
)
<<
"], t8+t11);
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(6*j+4)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
4
*
zsize
)
<<
"/"
<<
(
7
*
L
)
<<
"], t8-t11);
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(6*j+5)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
5
*
zsize
)
<<
"/"
<<
(
7
*
L
)
<<
"], t9+t12);
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(6*j+6)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
6
*
zsize
)
<<
"/"
<<
(
7
*
L
)
<<
"], t7+t10);
\n
"
;
}
else
if
(
radix
==
5
)
{
source
<<
"real2 c0 = data"
<<
input
<<
"[base];
\n
"
;
source
<<
"real2 c1 = data"
<<
input
<<
"[base+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"real2 c2 = data"
<<
input
<<
"[base+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"real2 c3 = data"
<<
input
<<
"[base+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"real2 c4 = data"
<<
input
<<
"[base+"
<<
(
4
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"real2 d0 = c1+c4;
\n
"
;
source
<<
"real2 d1 = c2+c3;
\n
"
;
source
<<
"real2 d2 = "
<<
context
.
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c1-c4);
\n
"
;
source
<<
"real2 d3 = "
<<
context
.
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c2-c3);
\n
"
;
source
<<
"real2 d4 = d0+d1;
\n
"
;
source
<<
"real2 d5 = "
<<
context
.
doubleToString
(
0.25
*
sqrt
(
5.0
))
<<
"*(d0-d1);
\n
"
;
source
<<
"real2 d6 = c0-0.25f*d4;
\n
"
;
source
<<
"real2 d7 = d6+d5;
\n
"
;
source
<<
"real2 d8 = d6-d5;
\n
"
;
string
coeff
=
context
.
doubleToString
(
sin
(
0.2
*
M_PI
)
/
sin
(
0.4
*
M_PI
));
source
<<
"real2 d9 = (SIGN)*make_real2(d2.y+"
<<
coeff
<<
"*d3.y, -d2.x-"
<<
coeff
<<
"*d3.x);
\n
"
;
source
<<
"real2 d10 = (SIGN)*make_real2("
<<
coeff
<<
"*d2.y-d3.y, d3.x-"
<<
coeff
<<
"*d2.x);
\n
"
;
source
<<
"data"
<<
output
<<
"[base+4*j*"
<<
m
<<
"] = c0+d4;
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(4*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
5
*
L
)
<<
"], d7+d9);
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(4*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8+d10);
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(4*j+3)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
3
*
zsize
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8-d10);
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(4*j+4)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
4
*
zsize
)
<<
"/"
<<
(
5
*
L
)
<<
"], d7-d9);
\n
"
;
}
else
if
(
radix
==
4
)
{
source
<<
"real2 c0 = data"
<<
input
<<
"[base];
\n
"
;
source
<<
"real2 c1 = data"
<<
input
<<
"[base+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"real2 c2 = data"
<<
input
<<
"[base+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"real2 c3 = data"
<<
input
<<
"[base+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"real2 d0 = c0+c2;
\n
"
;
source
<<
"real2 d1 = c0-c2;
\n
"
;
source
<<
"real2 d2 = c1+c3;
\n
"
;
source
<<
"real2 d3 = (SIGN)*make_real2(c1.y-c3.y, c3.x-c1.x);
\n
"
;
source
<<
"data"
<<
output
<<
"[base+3*j*"
<<
m
<<
"] = d0+d2;
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(3*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
4
*
L
)
<<
"], d1+d3);
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(3*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
4
*
L
)
<<
"], d0-d2);
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(3*j+3)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
3
*
zsize
)
<<
"/"
<<
(
4
*
L
)
<<
"], d1-d3);
\n
"
;
}
else
if
(
radix
==
3
)
{
source
<<
"real2 c0 = data"
<<
input
<<
"[base];
\n
"
;
source
<<
"real2 c1 = data"
<<
input
<<
"[base+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"real2 c2 = data"
<<
input
<<
"[base+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"real2 d0 = c1+c2;
\n
"
;
source
<<
"real2 d1 = c0-0.5f*d0;
\n
"
;
source
<<
"real2 d2 = (SIGN)*"
<<
context
.
doubleToString
(
sin
(
M_PI
/
3.0
))
<<
"*make_real2(c1.y-c2.y, c2.x-c1.x);
\n
"
;
source
<<
"data"
<<
output
<<
"[base+2*j*"
<<
m
<<
"] = c0+d0;
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(2*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
3
*
L
)
<<
"], d1+d2);
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(2*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1-d2);
\n
"
;
}
else
if
(
radix
==
2
)
{
source
<<
"real2 c0 = data"
<<
input
<<
"[base];
\n
"
;
source
<<
"real2 c1 = data"
<<
input
<<
"[base+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"data"
<<
output
<<
"[base+j*"
<<
m
<<
"] = c0+c1;
\n
"
;
source
<<
"data"
<<
output
<<
"[base+(j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
2
*
L
)
<<
"], c0-c1);
\n
"
;
}
source
<<
"}
\n
"
;
m
=
m
*
radix
;
source
<<
"__syncthreads();
\n
"
;
source
<<
"}
\n
"
;
++
stage
;
}
// Create the kernel.
bool
outputIsReal
=
(
inputIsReal
&&
axis
==
2
&&
!
forward
);
bool
outputIsPacked
=
(
inputIsReal
&&
axis
==
2
&&
forward
);
string
outputSuffix
=
(
outputIsReal
?
".x"
:
""
);
if
(
outputIsPacked
)
source
<<
"if (index < XSIZE*YSIZE && x < XSIZE/2+1)
\n
"
;
else
source
<<
"if (index < XSIZE*YSIZE)
\n
"
;
source
<<
"for (int i = threadIdx.x-block*THREADS_PER_BLOCK; i < ZSIZE; i += THREADS_PER_BLOCK)
\n
"
;
if
(
outputIsPacked
)
source
<<
"out[y*(ZSIZE*(XSIZE/2+1))+i*(XSIZE/2+1)+x] = data"
<<
(
stage
%
2
)
<<
"[i+block*ZSIZE]"
<<
outputSuffix
<<
";
\n
"
;
else
source
<<
"out[y*(ZSIZE*XSIZE)+i*XSIZE+x] = data"
<<
(
stage
%
2
)
<<
"[i+block*ZSIZE]"
<<
outputSuffix
<<
";
\n
"
;
map
<
string
,
string
>
replacements
;
replacements
[
"XSIZE"
]
=
context
.
intToString
(
xsize
);
replacements
[
"YSIZE"
]
=
context
.
intToString
(
ysize
);
replacements
[
"ZSIZE"
]
=
context
.
intToString
(
zsize
);
replacements
[
"BLOCKS_PER_GROUP"
]
=
context
.
intToString
(
blocksPerGroup
);
replacements
[
"THREADS_PER_BLOCK"
]
=
context
.
intToString
(
threadsPerBlock
);
replacements
[
"M_PI"
]
=
context
.
doubleToString
(
M_PI
);
replacements
[
"COMPUTE_FFT"
]
=
source
.
str
();
replacements
[
"SIGN"
]
=
(
forward
?
"1"
:
"-1"
);
replacements
[
"INPUT_TYPE"
]
=
(
inputIsReal
&&
axis
==
0
&&
forward
?
"real"
:
"real2"
);
replacements
[
"OUTPUT_TYPE"
]
=
(
outputIsReal
?
"real"
:
"real2"
);
replacements
[
"INPUT_IS_REAL"
]
=
(
inputIsReal
&&
axis
==
0
&&
forward
?
"1"
:
"0"
);
replacements
[
"INPUT_IS_PACKED"
]
=
(
inputIsReal
&&
axis
==
0
&&
!
forward
?
"1"
:
"0"
);
replacements
[
"OUTPUT_IS_PACKED"
]
=
(
outputIsPacked
?
"1"
:
"0"
);
CUmodule
module
=
context
.
createModule
(
CudaKernelSources
::
vectorOps
+
context
.
replaceStrings
(
CudaKernelSources
::
fft
,
replacements
));
CUfunction
kernel
=
context
.
getKernel
(
module
,
"execFFT"
);
threads
=
blocksPerGroup
*
threadsPerBlock
;
return
kernel
;
}
platforms/cuda/src/CudaKernels.cpp
View file @
bf6d95c2
...
@@ -1497,11 +1497,15 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
...
@@ -1497,11 +1497,15 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
delete
pmeAtomGridIndex
;
delete
pmeAtomGridIndex
;
if
(
sort
!=
NULL
)
if
(
sort
!=
NULL
)
delete
sort
;
delete
sort
;
if
(
fft
!=
NULL
)
delete
fft
;
if
(
pmeio
!=
NULL
)
if
(
pmeio
!=
NULL
)
delete
pmeio
;
delete
pmeio
;
if
(
hasInitializedFFT
)
{
if
(
hasInitializedFFT
)
{
if
(
useCudaFFT
)
{
cufftDestroy
(
fftForward
);
cufftDestroy
(
fftForward
);
cufftDestroy
(
fftBackward
);
cufftDestroy
(
fftBackward
);
}
if
(
usePmeStream
)
{
if
(
usePmeStream
)
{
cuStreamDestroy
(
pmeStream
);
cuStreamDestroy
(
pmeStream
);
cuEventDestroy
(
pmeSyncEvent
);
cuEventDestroy
(
pmeSyncEvent
);
...
@@ -1509,26 +1513,6 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
...
@@ -1509,26 +1513,6 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
}
}
}
}
/**
* Select a size for an FFT that is a multiple of 2, 3, 5, and 7.
*/
static
int
findFFTDimension
(
int
minimum
)
{
if
(
minimum
<
1
)
return
1
;
while
(
true
)
{
// Attempt to factor the current value.
int
unfactored
=
minimum
;
for
(
int
factor
=
2
;
factor
<
8
;
factor
++
)
{
while
(
unfactored
>
1
&&
unfactored
%
factor
==
0
)
unfactored
/=
factor
;
}
if
(
unfactored
==
1
)
return
minimum
;
minimum
++
;
}
}
void
CudaCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
void
CudaCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
...
@@ -1643,9 +1627,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1643,9 +1627,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
alpha
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
alpha
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
gridSizeX
=
findFFT
Dimension
(
gridSizeX
);
gridSizeX
=
CudaFFT3D
::
findLegal
Dimension
(
gridSizeX
);
gridSizeY
=
findFFT
Dimension
(
gridSizeY
);
gridSizeY
=
CudaFFT3D
::
findLegal
Dimension
(
gridSizeY
);
gridSizeZ
=
findFFT
Dimension
(
gridSizeZ
);
gridSizeZ
=
CudaFFT3D
::
findLegal
Dimension
(
gridSizeZ
);
defines
[
"EWALD_ALPHA"
]
=
cu
.
doubleToString
(
alpha
);
defines
[
"EWALD_ALPHA"
]
=
cu
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
cu
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"TWO_OVER_SQRT_PI"
]
=
cu
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
...
@@ -1692,38 +1676,40 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1692,38 +1676,40 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Create required data structures.
// Create required data structures.
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
directPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
cu
.
getComputeCapability
()
>=
2.0
?
2
*
elementSize
:
2
*
sizeof
(
long
long
),
"originalPmeGrid"
);
directPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
cu
.
getComputeCapability
()
>=
2.0
?
elementSize
:
sizeof
(
long
long
),
"originalPmeGrid"
);
reciprocalPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"reciprocalPmeGrid"
);
reciprocalPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
(
gridSizeZ
/
2
+
1
),
2
*
elementSize
,
"reciprocalPmeGrid"
);
cu
.
addAutoclearBuffer
(
*
directPmeGrid
);
cu
.
addAutoclearBuffer
(
*
directPmeGrid
);
pmeBsplineModuliX
=
new
CudaArray
(
cu
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliX
=
new
CudaArray
(
cu
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
CudaArray
(
cu
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliY
=
new
CudaArray
(
cu
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
CudaArray
(
cu
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeBsplineModuliZ
=
new
CudaArray
(
cu
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numParticles
,
"pmeAtomGridIndex"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
useCudaFFT
=
false
;
// We might switch back in the future, once Nvidia has all their bugs worked out
if
(
useCudaFFT
)
{
cufftResult
result
=
cufftPlan3d
(
&
fftForward
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_D2Z
:
CUFFT_R2C
);
cufftResult
result
=
cufftPlan3d
(
&
fftForward
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_D2Z
:
CUFFT_R2C
);
if
(
result
!=
CUFFT_SUCCESS
)
if
(
result
!=
CUFFT_SUCCESS
)
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
result
=
cufftPlan3d
(
&
fftBackward
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_Z2D
:
CUFFT_C2R
);
result
=
cufftPlan3d
(
&
fftBackward
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_Z2D
:
CUFFT_C2R
);
if
(
result
!=
CUFFT_SUCCESS
)
if
(
result
!=
CUFFT_SUCCESS
)
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
cufftSetCompatibilityMode
(
fftForward
,
CUFFT_COMPATIBILITY_NATIVE
);
cufftSetCompatibilityMode
(
fftForward
,
CUFFT_COMPATIBILITY_NATIVE
);
cufftSetCompatibilityMode
(
fftBackward
,
CUFFT_COMPATIBILITY_NATIVE
);
cufftSetCompatibilityMode
(
fftBackward
,
CUFFT_COMPATIBILITY_NATIVE
);
}
else
fft
=
new
CudaFFT3D
(
cu
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
true
);
// Prepare for doing PME on its own stream.
// Prepare for doing PME on its own stream.
int
cufftVersion
;
char
deviceName
[
100
]
;
cu
fftGetVersion
(
&
cufftVersion
);
cu
DeviceGetName
(
deviceName
,
100
,
cu
.
getDevice
()
);
usePmeStream
=
(
cu
.
getComputeCapability
()
<
5.0
&&
numParticles
<
130000
&&
cufftVersion
>=
6000
);
// Workarounds for various CUDA bugs
usePmeStream
=
(
string
(
deviceName
)
!=
"GeForce GTX 980"
);
// Using a separate stream is slower on GTX 980
if
(
usePmeStream
)
{
if
(
usePmeStream
)
{
cuStreamCreate
(
&
pmeStream
,
CU_STREAM_NON_BLOCKING
);
cuStreamCreate
(
&
pmeStream
,
CU_STREAM_NON_BLOCKING
);
if
(
useCudaFFT
)
{
cufftSetStream
(
fftForward
,
pmeStream
);
cufftSetStream
(
fftForward
,
pmeStream
);
cufftSetStream
(
fftBackward
,
pmeStream
);
cufftSetStream
(
fftBackward
,
pmeStream
);
}
CHECK_RESULT
(
cuEventCreate
(
&
pmeSyncEvent
,
CU_EVENT_DISABLE_TIMING
),
"Error creating event for NonbondedForce"
);
CHECK_RESULT
(
cuEventCreate
(
&
pmeSyncEvent
,
CU_EVENT_DISABLE_TIMING
),
"Error creating event for NonbondedForce"
);
int
recipForceGroup
=
force
.
getReciprocalSpaceForceGroup
();
int
recipForceGroup
=
force
.
getReciprocalSpaceForceGroup
();
if
(
recipForceGroup
<
0
)
if
(
recipForceGroup
<
0
)
...
@@ -1893,10 +1879,15 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1893,10 +1879,15 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
cu
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
finishSpreadArgs
,
directPmeGrid
->
getSize
());
cu
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
finishSpreadArgs
,
directPmeGrid
->
getSize
());
}
}
if
(
useCudaFFT
)
{
if
(
cu
.
getUseDoublePrecision
())
if
(
cu
.
getUseDoublePrecision
())
cufftExecD2Z
(
fftForward
,
(
double
*
)
directPmeGrid
->
getDevicePointer
(),
(
double2
*
)
reciprocalPmeGrid
->
getDevicePointer
());
cufftExecD2Z
(
fftForward
,
(
double
*
)
directPmeGrid
->
getDevicePointer
(),
(
double2
*
)
reciprocalPmeGrid
->
getDevicePointer
());
else
else
cufftExecR2C
(
fftForward
,
(
float
*
)
directPmeGrid
->
getDevicePointer
(),
(
float2
*
)
reciprocalPmeGrid
->
getDevicePointer
());
cufftExecR2C
(
fftForward
,
(
float
*
)
directPmeGrid
->
getDevicePointer
(),
(
float2
*
)
reciprocalPmeGrid
->
getDevicePointer
());
}
else
{
fft
->
execFFT
(
*
directPmeGrid
,
*
reciprocalPmeGrid
,
true
);
}
if
(
includeEnergy
)
{
if
(
includeEnergy
)
{
void
*
computeEnergyArgs
[]
=
{
&
reciprocalPmeGrid
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
void
*
computeEnergyArgs
[]
=
{
&
reciprocalPmeGrid
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
...
@@ -1910,11 +1901,15 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1910,11 +1901,15 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeConvolutionKernel
,
convolutionArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeConvolutionKernel
,
convolutionArgs
,
cu
.
getNumAtoms
());
if
(
useCudaFFT
)
{
if
(
cu
.
getUseDoublePrecision
())
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2D
(
fftBackward
,
(
double2
*
)
reciprocalPmeGrid
->
getDevicePointer
(),
(
double
*
)
directPmeGrid
->
getDevicePointer
());
cufftExecZ2D
(
fftBackward
,
(
double2
*
)
reciprocalPmeGrid
->
getDevicePointer
(),
(
double
*
)
directPmeGrid
->
getDevicePointer
());
else
else
cufftExecC2R
(
fftBackward
,
(
float2
*
)
reciprocalPmeGrid
->
getDevicePointer
(),
(
float
*
)
directPmeGrid
->
getDevicePointer
());
cufftExecC2R
(
fftBackward
,
(
float2
*
)
reciprocalPmeGrid
->
getDevicePointer
(),
(
float
*
)
directPmeGrid
->
getDevicePointer
());
}
else
{
fft
->
execFFT
(
*
reciprocalPmeGrid
,
*
directPmeGrid
,
false
);
}
void
*
interpolateArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
directPmeGrid
->
getDevicePointer
(),
void
*
interpolateArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
directPmeGrid
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
],
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
],
&
pmeAtomGridIndex
->
getDevicePointer
()};
...
...
platforms/cuda/src/kernels/fft.cu
0 → 100644
View file @
bf6d95c2
static
__inline__
__device__
real2
multiplyComplex
(
real2
c1
,
real2
c2
)
{
return
make_real2
(
c1
.
x
*
c2
.
x
-
c1
.
y
*
c2
.
y
,
c1
.
x
*
c2
.
y
+
c1
.
y
*
c2
.
x
);
}
/**
* Load a value from the half-complex grid produces by a real-to-complex transform.
*/
static
__inline__
__device__
real2
loadComplexValue
(
const
real2
*
__restrict__
in
,
int
x
,
int
y
,
int
z
)
{
const
int
inputZSize
=
ZSIZE
/
2
+
1
;
if
(
z
<
inputZSize
)
return
in
[
x
*
YSIZE
*
inputZSize
+
y
*
inputZSize
+
z
];
int
xp
=
(
x
==
0
?
0
:
XSIZE
-
x
);
int
yp
=
(
y
==
0
?
0
:
YSIZE
-
y
);
real2
value
=
in
[
xp
*
YSIZE
*
inputZSize
+
yp
*
inputZSize
+
(
ZSIZE
-
z
)];
return
make_real2
(
value
.
x
,
-
value
.
y
);
}
/**
* Perform a 1D FFT on each row along one axis.
*/
extern
"C"
__global__
void
execFFT
(
const
INPUT_TYPE
*
__restrict__
in
,
OUTPUT_TYPE
*
__restrict__
out
)
{
__shared__
real2
w
[
ZSIZE
];
__shared__
real2
data0
[
BLOCKS_PER_GROUP
*
ZSIZE
];
__shared__
real2
data1
[
BLOCKS_PER_GROUP
*
ZSIZE
];
for
(
int
i
=
threadIdx
.
x
;
i
<
ZSIZE
;
i
+=
blockDim
.
x
)
w
[
i
]
=
make_real2
(
cos
(
-
(
SIGN
)
*
i
*
2
*
M_PI
/
ZSIZE
),
sin
(
-
(
SIGN
)
*
i
*
2
*
M_PI
/
ZSIZE
));
__syncthreads
();
const
int
block
=
threadIdx
.
x
/
THREADS_PER_BLOCK
;
for
(
int
baseIndex
=
blockIdx
.
x
*
BLOCKS_PER_GROUP
;
baseIndex
<
XSIZE
*
YSIZE
;
baseIndex
+=
gridDim
.
x
*
BLOCKS_PER_GROUP
)
{
int
index
=
baseIndex
+
block
;
int
x
=
index
/
YSIZE
;
int
y
=
index
-
x
*
YSIZE
;
#if OUTPUT_IS_PACKED
if
(
x
<
XSIZE
/
2
+
1
)
{
#endif
if
(
index
<
XSIZE
*
YSIZE
)
for
(
int
i
=
threadIdx
.
x
-
block
*
THREADS_PER_BLOCK
;
i
<
ZSIZE
;
i
+=
THREADS_PER_BLOCK
)
#if INPUT_IS_REAL
data0
[
i
+
block
*
ZSIZE
]
=
make_real2
(
in
[
x
*
(
YSIZE
*
ZSIZE
)
+
y
*
ZSIZE
+
i
],
0
);
#elif INPUT_IS_PACKED
data0
[
i
+
block
*
ZSIZE
]
=
loadComplexValue
(
in
,
x
,
y
,
i
);
#else
data0
[
i
+
block
*
ZSIZE
]
=
in
[
x
*
(
YSIZE
*
ZSIZE
)
+
y
*
ZSIZE
+
i
];
#endif
#if OUTPUT_IS_PACKED
}
#endif
__syncthreads
();
COMPUTE_FFT
}
}
platforms/cuda/src/kernels/fftR2C.cu
0 → 100644
View file @
bf6d95c2
/**
* Combine the two halves of a real grid into a complex grid that is half as large.
*/
extern
"C"
__global__
void
packForwardData
(
const
real
*
__restrict__
in
,
real2
*
__restrict__
out
)
{
const
int
gridSize
=
PACKED_XSIZE
*
PACKED_YSIZE
*
PACKED_ZSIZE
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
x
=
index
/
(
PACKED_YSIZE
*
PACKED_ZSIZE
);
int
remainder
=
index
-
x
*
(
PACKED_YSIZE
*
PACKED_ZSIZE
);
int
y
=
remainder
/
PACKED_ZSIZE
;
int
z
=
remainder
-
y
*
PACKED_ZSIZE
;
#if PACKED_AXIS == 0
real2
value
=
make_real2
(
in
[
2
*
x
*
YSIZE
*
ZSIZE
+
y
*
ZSIZE
+
z
],
in
[(
2
*
x
+
1
)
*
YSIZE
*
ZSIZE
+
y
*
ZSIZE
+
z
]);
#elif PACKED_AXIS == 1
real2
value
=
make_real2
(
in
[
x
*
YSIZE
*
ZSIZE
+
2
*
y
*
ZSIZE
+
z
],
in
[
x
*
YSIZE
*
ZSIZE
+
(
2
*
y
+
1
)
*
ZSIZE
+
z
]);
#else
real2
value
=
make_real2
(
in
[
x
*
YSIZE
*
ZSIZE
+
y
*
ZSIZE
+
2
*
z
],
in
[
x
*
YSIZE
*
ZSIZE
+
y
*
ZSIZE
+
(
2
*
z
+
1
)]);
#endif
out
[
index
]
=
value
;
}
}
/**
* Split the transformed data back into a full sized, symmetric grid.
*/
extern
"C"
__global__
void
unpackForwardData
(
const
real2
*
__restrict__
in
,
real2
*
__restrict__
out
)
{
// Compute the phase factors.
#if PACKED_AXIS == 0
__shared__
real2
w
[
PACKED_XSIZE
];
for
(
int
i
=
threadIdx
.
x
;
i
<
PACKED_XSIZE
;
i
+=
blockDim
.
x
)
w
[
i
]
=
make_real2
(
sin
(
i
*
2
*
M_PI
/
XSIZE
),
cos
(
i
*
2
*
M_PI
/
XSIZE
));
#elif PACKED_AXIS == 1
__shared__
real2
w
[
PACKED_YSIZE
];
for
(
int
i
=
threadIdx
.
x
;
i
<
PACKED_YSIZE
;
i
+=
blockDim
.
x
)
w
[
i
]
=
make_real2
(
sin
(
i
*
2
*
M_PI
/
YSIZE
),
cos
(
i
*
2
*
M_PI
/
YSIZE
));
#else
__shared__
real2
w
[
PACKED_ZSIZE
];
for
(
int
i
=
threadIdx
.
x
;
i
<
PACKED_ZSIZE
;
i
+=
blockDim
.
x
)
w
[
i
]
=
make_real2
(
sin
(
i
*
2
*
M_PI
/
ZSIZE
),
cos
(
i
*
2
*
M_PI
/
ZSIZE
));
#endif
__syncthreads
();
// Transform the data.
const
int
gridSize
=
PACKED_XSIZE
*
PACKED_YSIZE
*
PACKED_ZSIZE
;
const
int
outputZSize
=
ZSIZE
/
2
+
1
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
x
=
index
/
(
PACKED_YSIZE
*
PACKED_ZSIZE
);
int
remainder
=
index
-
x
*
(
PACKED_YSIZE
*
PACKED_ZSIZE
);
int
y
=
remainder
/
PACKED_ZSIZE
;
int
z
=
remainder
-
y
*
PACKED_ZSIZE
;
int
xp
=
(
x
==
0
?
0
:
PACKED_XSIZE
-
x
);
int
yp
=
(
y
==
0
?
0
:
PACKED_YSIZE
-
y
);
int
zp
=
(
z
==
0
?
0
:
PACKED_ZSIZE
-
z
);
real2
z1
=
in
[
x
*
PACKED_YSIZE
*
PACKED_ZSIZE
+
y
*
PACKED_ZSIZE
+
z
];
real2
z2
=
in
[
xp
*
PACKED_YSIZE
*
PACKED_ZSIZE
+
yp
*
PACKED_ZSIZE
+
zp
];
#if PACKED_AXIS == 0
real2
wfac
=
w
[
x
];
#elif PACKED_AXIS == 1
real2
wfac
=
w
[
y
];
#else
real2
wfac
=
w
[
z
];
#endif
real2
output
=
make_real2
((
z1
.
x
+
z2
.
x
-
wfac
.
x
*
(
z1
.
x
-
z2
.
x
)
+
wfac
.
y
*
(
z1
.
y
+
z2
.
y
))
/
2
,
(
z1
.
y
-
z2
.
y
-
wfac
.
y
*
(
z1
.
x
-
z2
.
x
)
-
wfac
.
x
*
(
z1
.
y
+
z2
.
y
))
/
2
);
if
(
z
<
outputZSize
)
out
[
x
*
YSIZE
*
outputZSize
+
y
*
outputZSize
+
z
]
=
output
;
xp
=
(
x
==
0
?
0
:
XSIZE
-
x
);
yp
=
(
y
==
0
?
0
:
YSIZE
-
y
);
zp
=
(
z
==
0
?
0
:
ZSIZE
-
z
);
if
(
zp
<
outputZSize
)
{
#if PACKED_AXIS == 0
if
(
x
==
0
)
out
[
PACKED_XSIZE
*
YSIZE
*
outputZSize
+
yp
*
outputZSize
+
zp
]
=
make_real2
((
z1
.
x
-
z1
.
y
+
z2
.
x
-
z2
.
y
)
/
2
,
(
-
z1
.
x
-
z1
.
y
+
z2
.
x
+
z2
.
y
)
/
2
);
#elif PACKED_AXIS == 1
if
(
y
==
0
)
out
[
xp
*
YSIZE
*
outputZSize
+
PACKED_YSIZE
*
outputZSize
+
zp
]
=
make_real2
((
z1
.
x
-
z1
.
y
+
z2
.
x
-
z2
.
y
)
/
2
,
(
-
z1
.
x
-
z1
.
y
+
z2
.
x
+
z2
.
y
)
/
2
);
#else
if
(
z
==
0
)
out
[
xp
*
YSIZE
*
outputZSize
+
yp
*
outputZSize
+
PACKED_ZSIZE
]
=
make_real2
((
z1
.
x
-
z1
.
y
+
z2
.
x
-
z2
.
y
)
/
2
,
(
-
z1
.
x
-
z1
.
y
+
z2
.
x
+
z2
.
y
)
/
2
);
#endif
else
out
[
xp
*
YSIZE
*
outputZSize
+
yp
*
outputZSize
+
zp
]
=
make_real2
(
output
.
x
,
-
output
.
y
);
}
}
}
/**
* Load a value from the half-complex grid produced by a real-to-complex transform.
*/
static
__inline__
__device__
real2
loadComplexValue
(
const
real2
*
__restrict__
in
,
int
x
,
int
y
,
int
z
)
{
const
int
inputZSize
=
ZSIZE
/
2
+
1
;
if
(
z
<
inputZSize
)
return
in
[
x
*
YSIZE
*
inputZSize
+
y
*
inputZSize
+
z
];
int
xp
=
(
x
==
0
?
0
:
XSIZE
-
x
);
int
yp
=
(
y
==
0
?
0
:
YSIZE
-
y
);
real2
value
=
in
[
xp
*
YSIZE
*
inputZSize
+
yp
*
inputZSize
+
(
ZSIZE
-
z
)];
return
make_real2
(
value
.
x
,
-
value
.
y
);
}
/**
* Repack the symmetric complex grid into one half as large in preparation for doing an inverse complex-to-real transform.
*/
extern
"C"
__global__
void
packBackwardData
(
const
real2
*
__restrict__
in
,
real2
*
__restrict__
out
)
{
// Compute the phase factors.
#if PACKED_AXIS == 0
__shared__
real2
w
[
PACKED_XSIZE
];
for
(
int
i
=
threadIdx
.
x
;
i
<
PACKED_XSIZE
;
i
+=
blockDim
.
x
)
w
[
i
]
=
make_real2
(
cos
(
i
*
2
*
M_PI
/
XSIZE
),
sin
(
i
*
2
*
M_PI
/
XSIZE
));
#elif PACKED_AXIS == 1
__shared__
real2
w
[
PACKED_YSIZE
];
for
(
int
i
=
threadIdx
.
x
;
i
<
PACKED_YSIZE
;
i
+=
blockDim
.
x
)
w
[
i
]
=
make_real2
(
cos
(
i
*
2
*
M_PI
/
YSIZE
),
sin
(
i
*
2
*
M_PI
/
YSIZE
));
#else
__shared__
real2
w
[
PACKED_ZSIZE
];
for
(
int
i
=
threadIdx
.
x
;
i
<
PACKED_ZSIZE
;
i
+=
blockDim
.
x
)
w
[
i
]
=
make_real2
(
cos
(
i
*
2
*
M_PI
/
ZSIZE
),
sin
(
i
*
2
*
M_PI
/
ZSIZE
));
#endif
__syncthreads
();
// Transform the data.
const
int
gridSize
=
PACKED_XSIZE
*
PACKED_YSIZE
*
PACKED_ZSIZE
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
x
=
index
/
(
PACKED_YSIZE
*
PACKED_ZSIZE
);
int
remainder
=
index
-
x
*
(
PACKED_YSIZE
*
PACKED_ZSIZE
);
int
y
=
remainder
/
PACKED_ZSIZE
;
int
z
=
remainder
-
y
*
PACKED_ZSIZE
;
int
xp
=
(
x
==
0
?
0
:
PACKED_XSIZE
-
x
);
int
yp
=
(
y
==
0
?
0
:
PACKED_YSIZE
-
y
);
int
zp
=
(
z
==
0
?
0
:
PACKED_ZSIZE
-
z
);
real2
z1
=
loadComplexValue
(
in
,
x
,
y
,
z
);
#if PACKED_AXIS == 0
real2
wfac
=
w
[
x
];
real2
z2
=
loadComplexValue
(
in
,
PACKED_XSIZE
-
x
,
yp
,
zp
);
#elif PACKED_AXIS == 1
real2
wfac
=
w
[
y
];
real2
z2
=
loadComplexValue
(
in
,
xp
,
PACKED_YSIZE
-
y
,
zp
);
#else
real2
wfac
=
w
[
z
];
real2
z2
=
loadComplexValue
(
in
,
xp
,
yp
,
PACKED_ZSIZE
-
z
);
#endif
real2
even
=
make_real2
((
z1
.
x
+
z2
.
x
)
/
2
,
(
z1
.
y
-
z2
.
y
)
/
2
);
real2
odd
=
make_real2
((
z1
.
x
-
z2
.
x
)
/
2
,
(
z1
.
y
+
z2
.
y
)
/
2
);
odd
=
make_real2
(
odd
.
x
*
wfac
.
x
-
odd
.
y
*
wfac
.
y
,
odd
.
y
*
wfac
.
x
+
odd
.
x
*
wfac
.
y
);
out
[
x
*
PACKED_YSIZE
*
PACKED_ZSIZE
+
y
*
PACKED_ZSIZE
+
z
]
=
make_real2
(
even
.
x
-
odd
.
y
,
even
.
y
+
odd
.
x
);
}
}
/**
* Split the data back into a full sized, real grid after an inverse transform.
*/
extern
"C"
__global__
void
unpackBackwardData
(
const
real2
*
__restrict__
in
,
real
*
__restrict__
out
)
{
const
int
gridSize
=
PACKED_XSIZE
*
PACKED_YSIZE
*
PACKED_ZSIZE
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
x
=
index
/
(
PACKED_YSIZE
*
PACKED_ZSIZE
);
int
remainder
=
index
-
x
*
(
PACKED_YSIZE
*
PACKED_ZSIZE
);
int
y
=
remainder
/
PACKED_ZSIZE
;
int
z
=
remainder
-
y
*
PACKED_ZSIZE
;
real2
value
=
2
*
in
[
index
];
#if PACKED_AXIS == 0
out
[
2
*
x
*
YSIZE
*
ZSIZE
+
y
*
ZSIZE
+
z
]
=
value
.
x
;
out
[(
2
*
x
+
1
)
*
YSIZE
*
ZSIZE
+
y
*
ZSIZE
+
z
]
=
value
.
y
;
#elif PACKED_AXIS == 1
out
[
x
*
YSIZE
*
ZSIZE
+
2
*
y
*
ZSIZE
+
z
]
=
value
.
x
;
out
[
x
*
YSIZE
*
ZSIZE
+
(
2
*
y
+
1
)
*
ZSIZE
+
z
]
=
value
.
y
;
#else
out
[
x
*
YSIZE
*
ZSIZE
+
y
*
ZSIZE
+
2
*
z
]
=
value
.
x
;
out
[
x
*
YSIZE
*
ZSIZE
+
y
*
ZSIZE
+
(
2
*
z
+
1
)]
=
value
.
y
;
#endif
}
}
platforms/cuda/src/kernels/pme.cu
View file @
bf6d95c2
...
@@ -271,9 +271,9 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
...
@@ -271,9 +271,9 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
real
forceX
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecX
.
x
);
real
forceX
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecX
.
x
);
real
forceY
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecY
.
y
);
real
forceY
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecY
.
y
);
real
forceZ
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
real
forceZ
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
forceBuffers
[
atom
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceX
*
0x100000000
));
atomicAdd
(
&
forceBuffers
[
atom
]
,
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceX
*
0x100000000
))
)
;
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceY
*
0x100000000
));
atomicAdd
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
,
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceY
*
0x100000000
))
)
;
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceZ
*
0x100000000
));
atomicAdd
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
,
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceZ
*
0x100000000
))
)
;
}
}
}
}
...
...
platforms/cuda/tests/TestCudaFFT3D.cpp
0 → 100644
View file @
bf6d95c2
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of sorting.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "CudaArray.h"
#include "CudaContext.h"
#include "CudaFFT3D.h"
#include "CudaSort.h"
#include "fftpack.h"
#include "sfmt/SFMT.h"
#include "openmm/System.h"
#include <iostream>
#include <cmath>
#include <set>
using
namespace
OpenMM
;
using
namespace
std
;
static
CudaPlatform
platform
;
template
<
class
Real2
>
void
testTransform
(
bool
realToComplex
,
int
xsize
,
int
ysize
,
int
zsize
)
{
System
system
;
system
.
addParticle
(
0.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()));
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
Real2
>
original
(
xsize
*
ysize
*
zsize
);
vector
<
t_complex
>
reference
(
original
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
original
.
size
();
i
++
)
{
Real2
value
;
value
.
x
=
(
float
)
genrand_real2
(
sfmt
);
value
.
y
=
(
float
)
genrand_real2
(
sfmt
);
original
[
i
]
=
value
;
reference
[
i
]
=
t_complex
(
value
.
x
,
value
.
y
);
}
for
(
int
i
=
0
;
i
<
(
int
)
reference
.
size
();
i
++
)
{
if
(
realToComplex
)
reference
[
i
]
=
t_complex
(
i
%
2
==
0
?
original
[
i
/
2
].
x
:
original
[
i
/
2
].
y
,
0
);
else
reference
[
i
]
=
t_complex
(
original
[
i
].
x
,
original
[
i
].
y
);
}
CudaArray
grid1
(
context
,
original
.
size
(),
sizeof
(
Real2
),
"grid1"
);
CudaArray
grid2
(
context
,
original
.
size
(),
sizeof
(
Real2
),
"grid2"
);
grid1
.
upload
(
original
);
CudaFFT3D
fft
(
context
,
xsize
,
ysize
,
zsize
,
realToComplex
);
// Perform a forward FFT, then verify the result is correct.
fft
.
execFFT
(
grid1
,
grid2
,
true
);
vector
<
Real2
>
result
;
grid2
.
download
(
result
);
fftpack_t
plan
;
fftpack_init_3d
(
&
plan
,
xsize
,
ysize
,
zsize
);
fftpack_exec_3d
(
plan
,
FFTPACK_FORWARD
,
&
reference
[
0
],
&
reference
[
0
]);
int
outputZSize
=
(
realToComplex
?
zsize
/
2
+
1
:
zsize
);
for
(
int
x
=
0
;
x
<
xsize
;
x
++
)
for
(
int
y
=
0
;
y
<
ysize
;
y
++
)
for
(
int
z
=
0
;
z
<
outputZSize
;
z
++
)
{
int
index1
=
x
*
ysize
*
zsize
+
y
*
zsize
+
z
;
int
index2
=
x
*
ysize
*
outputZSize
+
y
*
outputZSize
+
z
;
ASSERT_EQUAL_TOL
(
reference
[
index1
].
re
,
result
[
index2
].
x
,
1e-3
);
ASSERT_EQUAL_TOL
(
reference
[
index1
].
im
,
result
[
index2
].
y
,
1e-3
);
}
fftpack_destroy
(
plan
);
// Perform a backward transform and see if we get the original values.
fft
.
execFFT
(
grid2
,
grid1
,
false
);
grid1
.
download
(
result
);
double
scale
=
1.0
/
(
xsize
*
ysize
*
zsize
);
int
valuesToCheck
=
(
realToComplex
?
original
.
size
()
/
2
:
original
.
size
());
for
(
int
i
=
0
;
i
<
valuesToCheck
;
++
i
)
{
ASSERT_EQUAL_TOL
(
original
[
i
].
x
,
scale
*
result
[
i
].
x
,
1e-4
);
ASSERT_EQUAL_TOL
(
original
[
i
].
y
,
scale
*
result
[
i
].
y
,
1e-4
);
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
if
(
argc
>
1
)
platform
.
setPropertyDefaultValue
(
"CudaPrecision"
,
string
(
argv
[
1
]));
if
(
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
)
==
"double"
)
{
testTransform
<
double2
>
(
false
,
28
,
25
,
30
);
testTransform
<
double2
>
(
true
,
28
,
25
,
25
);
testTransform
<
double2
>
(
true
,
25
,
28
,
25
);
testTransform
<
double2
>
(
true
,
25
,
25
,
28
);
testTransform
<
double2
>
(
true
,
21
,
25
,
27
);
}
else
{
testTransform
<
float2
>
(
false
,
28
,
25
,
30
);
testTransform
<
float2
>
(
true
,
28
,
25
,
25
);
testTransform
<
float2
>
(
true
,
25
,
28
,
25
);
testTransform
<
float2
>
(
true
,
25
,
25
,
28
);
testTransform
<
float2
>
(
true
,
21
,
25
,
27
);
}
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
platforms/opencl/include/OpenCLContext.h
View file @
bf6d95c2
...
@@ -30,13 +30,13 @@
...
@@ -30,13 +30,13 @@
#include <map>
#include <map>
#include <queue>
#include <queue>
#include <string>
#include <string>
#include <pthread.h>
#define __CL_ENABLE_EXCEPTIONS
#define __CL_ENABLE_EXCEPTIONS
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#ifdef _MSC_VER
#ifdef _MSC_VER
// Prevent Windows from defining macros that interfere with other code.
// Prevent Windows from defining macros that interfere with other code.
#define NOMINMAX
#define NOMINMAX
#endif
#endif
#include <pthread.h>
#include <cl.hpp>
#include <cl.hpp>
#include "windowsExportOpenCL.h"
#include "windowsExportOpenCL.h"
#include "OpenCLPlatform.h"
#include "OpenCLPlatform.h"
...
...
plugins/cpupme/src/CpuPmeKernels.cpp
View file @
bf6d95c2
...
@@ -177,12 +177,12 @@ static void computeReciprocalEterm(int start, int end, int gridx, int gridy, int
...
@@ -177,12 +177,12 @@ static void computeReciprocalEterm(int start, int end, int gridx, int gridy, int
}
}
}
}
static
float
reciprocalEnergy
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
int
gridx
,
int
gridy
,
int
gridz
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
*
periodicBoxVectors
,
Vec3
*
recipBoxVectors
)
{
static
double
reciprocalEnergy
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
int
gridx
,
int
gridy
,
int
gridz
,
double
alpha
,
vector
<
float
>*
bsplineModuli
,
Vec3
*
periodicBoxVectors
,
Vec3
*
recipBoxVectors
)
{
const
unsigned
int
zsizeHalf
=
gridz
/
2
+
1
;
const
unsigned
int
zsizeHalf
=
gridz
/
2
+
1
;
const
unsigned
int
yzsizeHalf
=
gridy
*
zsizeHalf
;
const
unsigned
int
yzsizeHalf
=
gridy
*
zsizeHalf
;
const
float
scaleFactor
=
(
float
)
(
M_PI
*
periodicBoxVectors
[
0
][
0
]
*
periodicBoxVectors
[
1
][
1
]
*
periodicBoxVectors
[
2
][
2
]);
const
float
scaleFactor
=
(
float
)
(
M_PI
*
periodicBoxVectors
[
0
][
0
]
*
periodicBoxVectors
[
1
][
1
]
*
periodicBoxVectors
[
2
][
2
]);
const
float
recipExpFactor
=
(
float
)
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
const
float
recipExpFactor
=
(
float
)
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
float
energy
=
0.0
f
;
double
energy
=
0.0
;
int
firstz
=
(
start
==
0
?
1
:
0
);
int
firstz
=
(
start
==
0
?
1
:
0
);
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
for
(
int
kx
=
start
;
kx
<
end
;
kx
++
)
{
...
@@ -220,7 +220,7 @@ static float reciprocalEnergy(int start, int end, fftwf_complex* grid, int gridx
...
@@ -220,7 +220,7 @@ static float reciprocalEnergy(int start, int end, fftwf_complex* grid, int gridx
firstz
=
0
;
firstz
=
0
;
}
}
}
}
return
0.5
f
*
energy
;
return
0.5
*
energy
;
}
}
static
void
reciprocalConvolution
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
int
gridx
,
int
gridy
,
int
gridz
,
vector
<
float
>&
recipEterm
)
{
static
void
reciprocalConvolution
(
int
start
,
int
end
,
fftwf_complex
*
grid
,
int
gridx
,
int
gridy
,
int
gridz
,
vector
<
float
>&
recipEterm
)
{
...
...
plugins/cpupme/src/CpuPmeKernels.h
View file @
bf6d95c2
...
@@ -32,6 +32,7 @@
...
@@ -32,6 +32,7 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#define NOMINMAX
#include "internal/windowsExportPme.h"
#include "internal/windowsExportPme.h"
#include "openmm/kernels.h"
#include "openmm/kernels.h"
#include "openmm/Vec3.h"
#include "openmm/Vec3.h"
...
...
wrappers/python/simtk/openmm/__init__.py
View file @
bf6d95c2
...
@@ -20,6 +20,8 @@ if sys.platform == 'win32':
...
@@ -20,6 +20,8 @@ if sys.platform == 'win32':
from
simtk.openmm.openmm
import
*
from
simtk.openmm.openmm
import
*
from
simtk.openmm.vec3
import
Vec3
from
simtk.openmm.vec3
import
Vec3
from
simtk.openmm.mtsintegrator
import
MTSIntegrator
from
simtk.openmm.amd
import
AMDIntegrator
,
AMDForceGroupIntegrator
,
DualAMDIntegrator
if
os
.
getenv
(
'OPENMM_PLUGIN_DIR'
)
is
None
and
os
.
path
.
isdir
(
version
.
openmm_library_path
):
if
os
.
getenv
(
'OPENMM_PLUGIN_DIR'
)
is
None
and
os
.
path
.
isdir
(
version
.
openmm_library_path
):
pluginLoadedLibNames
=
Platform
.
loadPluginsFromDirectory
(
os
.
path
.
join
(
version
.
openmm_library_path
,
'plugins'
))
pluginLoadedLibNames
=
Platform
.
loadPluginsFromDirectory
(
os
.
path
.
join
(
version
.
openmm_library_path
,
'plugins'
))
...
...
wrappers/python/simtk/openmm/amd.py
0 → 100644
View file @
bf6d95c2
"""
amd.py: Implements the aMD integration method.
This is part of the OpenMM molecular simulation toolkit originating from
Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org.
Portions copyright (c) 2012 Stanford University and the Authors.
Authors: Peter Eastman, Steffen Lindert
Contributors:
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
__author__
=
"Peter Eastman"
__version__
=
"1.0"
from
simtk.openmm
import
CustomIntegrator
from
simtk.unit
import
kilojoules_per_mole
,
is_quantity
class
AMDIntegrator
(
CustomIntegrator
):
"""AMDIntegrator implements the aMD integration algorithm.
The system is integrated based on a modified potential. Whenever the energy V(r) is less than a
cutoff value E, the following effective potential is used:
V*(r) = V(r) + (E-V(r))^2 / (alpha+E-V(r))
For details, see Hamelberg et al., J. Chem. Phys. 127, 155102 (2007).
"""
def
__init__
(
self
,
dt
,
alpha
,
E
):
"""Create an AMDIntegrator.
Parameters:
- dt (time) The integration time step to use
- alpha (energy) The alpha parameter to use
- E (energy) The energy cutoff to use
"""
CustomIntegrator
.
__init__
(
self
,
dt
)
self
.
addGlobalVariable
(
"alpha"
,
alpha
)
self
.
addGlobalVariable
(
"E"
,
E
)
self
.
addPerDofVariable
(
"oldx"
,
0
)
self
.
addUpdateContextState
();
self
.
addComputePerDof
(
"v"
,
"v+dt*fprime/m; fprime=f*((1-modify) + modify*(alpha/(alpha+E-energy))^2); modify=step(E-energy)"
)
self
.
addComputePerDof
(
"oldx"
,
"x"
)
self
.
addComputePerDof
(
"x"
,
"x+dt*v"
)
self
.
addConstrainPositions
()
self
.
addComputePerDof
(
"v"
,
"(x-oldx)/dt"
)
def
getAlpha
(
self
):
"""Get the value of alpha for the integrator."""
return
self
.
getGlobalVariable
(
0
)
*
kilojoules_per_mole
def
setAlpha
(
self
,
alpha
):
"""Set the value of alpha for the integrator."""
self
.
setGlobalVariable
(
0
,
alpha
)
def
getE
(
self
):
"""Get the energy threshold E for the integrator."""
return
self
.
getGlobalVariable
(
1
)
*
kilojoules_per_mole
def
setE
(
self
,
E
):
"""Set the energy threshold E for the integrator."""
self
.
setGlobalVariable
(
1
,
E
)
def
getEffectiveEnergy
(
self
,
energy
):
"""Given the actual potential energy of the system, return the value of the effective potential."""
alpha
=
self
.
getAlpha
()
E
=
self
.
getE
()
if
not
is_quantity
(
energy
):
energy
=
energy
*
kilojoules_per_mole
# Assume kJ/mole
if
(
energy
>
E
):
return
energy
return
energy
+
(
E
-
energy
)
*
(
E
-
energy
)
/
(
alpha
+
E
-
energy
)
class
AMDForceGroupIntegrator
(
CustomIntegrator
):
"""AMDForceGroupIntegrator implements a single boost aMD integration algorithm.
This is similar to AMDIntegrator, but is applied based on the energy of a single force group
(typically representing torsions).
For details, see Hamelberg et al., J. Chem. Phys. 127, 155102 (2007).
"""
def
__init__
(
self
,
dt
,
group
,
alphaGroup
,
EGroup
):
"""Create a AMDForceGroupIntegrator.
Parameters:
- dt (time) The integration time step to use
- group (int) The force group to apply the boost to
- alphaGroup (energy) The alpha parameter to use for the boosted force group
- EGroup (energy) The energy cutoff to use for the boosted force group
"""
CustomIntegrator
.
__init__
(
self
,
dt
)
self
.
addGlobalVariable
(
"alphaGroup"
,
alphaGroup
)
self
.
addGlobalVariable
(
"EGroup"
,
EGroup
)
self
.
addGlobalVariable
(
"groupEnergy"
,
0
)
self
.
addPerDofVariable
(
"oldx"
,
0
)
self
.
addPerDofVariable
(
"fg"
,
0
)
self
.
addUpdateContextState
();
self
.
addComputeGlobal
(
"groupEnergy"
,
"energy"
+
str
(
group
))
self
.
addComputePerDof
(
"fg"
,
"f"
+
str
(
group
))
self
.
addComputePerDof
(
"v"
,
"v+dt*fprime/m; fprime=fother + fg*((1-modify) + modify*(alphaGroup/(alphaGroup+EGroup-groupEnergy))^2); fother=f-fg; modify=step(EGroup-groupEnergy)"
)
self
.
addComputePerDof
(
"oldx"
,
"x"
)
self
.
addComputePerDof
(
"x"
,
"x+dt*v"
)
self
.
addConstrainPositions
()
self
.
addComputePerDof
(
"v"
,
"(x-oldx)/dt"
)
def
getAlphaGroup
(
self
):
"""Get the value of alpha for the boosted force group."""
return
self
.
getGlobalVariable
(
0
)
*
kilojoules_per_mole
def
setAlphaGroup
(
self
,
alpha
):
"""Set the value of alpha for the boosted force group."""
self
.
setGlobalVariable
(
0
,
alpha
)
def
getEGroup
(
self
):
"""Get the energy threshold E for the boosted force group."""
return
self
.
getGlobalVariable
(
1
)
*
kilojoules_per_mole
def
setEGroup
(
self
,
E
):
"""Set the energy threshold E for the boosted force group."""
self
.
setGlobalVariable
(
1
,
E
)
def
getEffectiveEnergy
(
self
,
groupEnergy
):
"""Given the actual group energy of the system, return the value of the effective potential.
Parameters:
- groupEnergy (energy): the actual potential energy of the boosted force group
Returns: the value of the effective potential
"""
alphaGroup
=
self
.
getAlphaGroup
()
EGroup
=
self
.
getEGroup
()
if
not
is_quantity
(
groupEnergy
):
groupEnergy
=
groupEnergy
*
kilojoules_per_mole
# Assume kJ/mole
dE
=
0.0
*
kilojoules_per_mole
if
(
groupEnergy
<
EGroup
):
dE
=
dE
+
(
EGroup
-
groupEnergy
)
*
(
EGroup
-
groupEnergy
)
/
(
alphaGroup
+
EGroup
-
groupEnergy
)
return
groupEnergy
+
dE
class
DualAMDIntegrator
(
CustomIntegrator
):
"""DualAMDIntegrator implements a dual boost aMD integration algorithm.
This is similar to AMDIntegrator, but two different boosts are applied to the potential:
one based on the total energy, and one based on the energy of a single force group
(typically representing torsions).
For details, see Hamelberg et al., J. Chem. Phys. 127, 155102 (2007).
"""
def
__init__
(
self
,
dt
,
group
,
alphaTotal
,
ETotal
,
alphaGroup
,
EGroup
):
"""Create a DualAMDIntegrator.
Parameters:
- dt (time) The integration time step to use
- group (int) The force group to apply the second boost to
- alphaTotal (energy) The alpha parameter to use for the total energy
- ETotal (energy) The energy cutoff to use for the total energy
- alphaGroup (energy) The alpha parameter to use for the boosted force group
- EGroup (energy) The energy cutoff to use for the boosted force group
"""
CustomIntegrator
.
__init__
(
self
,
dt
)
self
.
addGlobalVariable
(
"alphaTotal"
,
alphaTotal
)
self
.
addGlobalVariable
(
"ETotal"
,
ETotal
)
self
.
addGlobalVariable
(
"alphaGroup"
,
alphaGroup
)
self
.
addGlobalVariable
(
"EGroup"
,
EGroup
)
self
.
addGlobalVariable
(
"groupEnergy"
,
0
)
self
.
addPerDofVariable
(
"oldx"
,
0
)
self
.
addPerDofVariable
(
"fg"
,
0
)
self
.
addUpdateContextState
();
self
.
addComputeGlobal
(
"groupEnergy"
,
"energy"
+
str
(
group
))
self
.
addComputePerDof
(
"fg"
,
"f"
+
str
(
group
))
self
.
addComputePerDof
(
"v"
,
"""v+dt*fprime/m;
fprime=fprime1 + fprime2;
fprime2=fg*((1-modifyGroup) + modifyGroup*(alphaGroup/(alphaGroup+EGroup-groupEnergy))^2);
fprime1=fother*((1-modifyTotal) + modifyTotal*(alphaTotal/(alphaTotal+ETotal-energy))^2);
fother=f-fg;
modifyTotal=step(ETotal-energy); modifyGroup=step(EGroup-groupEnergy)"""
)
self
.
addComputePerDof
(
"oldx"
,
"x"
)
self
.
addComputePerDof
(
"x"
,
"x+dt*v"
)
self
.
addConstrainPositions
()
self
.
addComputePerDof
(
"v"
,
"(x-oldx)/dt"
)
def
getAlphaTotal
(
self
):
"""Get the value of alpha for the total energy."""
return
self
.
getGlobalVariable
(
0
)
*
kilojoules_per_mole
def
setAlphaTotal
(
self
,
alpha
):
"""Set the value of alpha for the total energy."""
self
.
setGlobalVariable
(
0
,
alpha
)
def
getETotal
(
self
):
"""Get the energy threshold E for the total energy."""
return
self
.
getGlobalVariable
(
1
)
*
kilojoules_per_mole
def
setETotal
(
self
,
E
):
"""Set the energy threshold E for the total energy."""
self
.
setGlobalVariable
(
1
,
E
)
def
getAlphaGroup
(
self
):
"""Get the value of alpha for the boosted force group."""
return
self
.
getGlobalVariable
(
2
)
*
kilojoules_per_mole
def
setAlphaGroup
(
self
,
alpha
):
"""Set the value of alpha for the boosted force group."""
self
.
setGlobalVariable
(
2
,
alpha
)
def
getEGroup
(
self
):
"""Get the energy threshold E for the boosted force group."""
return
self
.
getGlobalVariable
(
3
)
*
kilojoules_per_mole
def
setEGroup
(
self
,
E
):
"""Set the energy threshold E for the boosted force group."""
self
.
setGlobalVariable
(
3
,
E
)
def
getEffectiveEnergy
(
self
,
totalEnergy
,
groupEnergy
):
"""Given the actual potential energy of the system, return the value of the effective potential.
Parameters:
- totalEnergy (energy): the actual potential energy of the whole system
- groupEnergy (energy): the actual potential energy of the boosted force group
Returns: the value of the effective potential
"""
alphaTotal
=
self
.
getAlphaTotal
()
ETotal
=
self
.
getETotal
()
alphaGroup
=
self
.
getAlphaGroup
()
EGroup
=
self
.
getEGroup
()
if
not
is_quantity
(
totalEnergy
):
totalEnergy
=
totalEnergy
*
kilojoules_per_mole
# Assume kJ/mole
if
not
is_quantity
(
groupEnergy
):
groupEnergy
=
groupEnergy
*
kilojoules_per_mole
# Assume kJ/mole
dE
=
0.0
*
kilojoules_per_mole
if
(
totalEnergy
<
ETotal
):
dE
=
dE
+
(
ETotal
-
totalEnergy
)
*
(
ETotal
-
totalEnergy
)
/
(
alphaTotal
+
ETotal
-
totalEnergy
)
if
(
groupEnergy
<
EGroup
):
dE
=
dE
+
(
EGroup
-
groupEnergy
)
*
(
EGroup
-
groupEnergy
)
/
(
alphaGroup
+
EGroup
-
groupEnergy
)
return
totalEnergy
+
dE
wrappers/python/simtk/openmm/app/__init__.py
View file @
bf6d95c2
...
@@ -4,7 +4,7 @@ OpenMM Application
...
@@ -4,7 +4,7 @@ OpenMM Application
__docformat__
=
"epytext en"
__docformat__
=
"epytext en"
__author__
=
"Peter Eastman"
__author__
=
"Peter Eastman"
__copyright__
=
"Copyright 201
1
, Stanford University and Peter Eastman"
__copyright__
=
"Copyright 201
5
, Stanford University and Peter Eastman"
__credits__
=
[]
__credits__
=
[]
__license__
=
"MIT"
__license__
=
"MIT"
__maintainer__
=
"Peter Eastman"
__maintainer__
=
"Peter Eastman"
...
...
wrappers/python/simtk/openmm/app/gromacstopfile.py
View file @
bf6d95c2
...
@@ -8,7 +8,7 @@ Medical Research, grant U54 GM072970. See https://simtk.org.
...
@@ -8,7 +8,7 @@ Medical Research, grant U54 GM072970. See https://simtk.org.
Portions copyright (c) 2012-2015 Stanford University and the Authors.
Portions copyright (c) 2012-2015 Stanford University and the Authors.
Authors: Peter Eastman
Authors: Peter Eastman
Contributors:
Contributors:
Jason Swails
Permission is hereby granted, free of charge, to any person obtaining a
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
copy of this software and associated documentation files (the "Software"),
...
@@ -40,7 +40,9 @@ import simtk.unit as unit
...
@@ -40,7 +40,9 @@ import simtk.unit as unit
import
simtk.openmm
as
mm
import
simtk.openmm
as
mm
import
math
import
math
import
os
import
os
import
re
import
distutils.spawn
import
distutils.spawn
from
collections
import
OrderedDict
HBonds
=
ff
.
HBonds
HBonds
=
ff
.
HBonds
AllBonds
=
ff
.
AllBonds
AllBonds
=
ff
.
AllBonds
...
@@ -48,6 +50,57 @@ HAngles = ff.HAngles
...
@@ -48,6 +50,57 @@ HAngles = ff.HAngles
OBC2
=
prmtop
.
OBC2
OBC2
=
prmtop
.
OBC2
novarcharre
=
re
.
compile
(
r
'\W'
)
def
_find_all_instances_in_string
(
string
,
substr
):
""" Find indices of all instances of substr in string """
indices
=
[]
idx
=
string
.
find
(
substr
,
0
)
while
idx
>
-
1
:
indices
.
append
(
idx
)
idx
=
string
.
find
(
substr
,
idx
+
1
)
return
indices
def
_replace_defines
(
line
,
defines
):
""" Replaces defined tokens in a given line """
if
not
defines
:
return
line
for
define
in
reversed
(
defines
):
value
=
defines
[
define
]
indices
=
_find_all_instances_in_string
(
line
,
define
)
if
not
indices
:
continue
# Check to see if it's inside of quotes
inside
=
''
idx
=
0
n_to_skip
=
0
new_line
=
[]
for
i
,
char
in
enumerate
(
line
):
if
n_to_skip
:
n_to_skip
-=
1
continue
if
char
in
(
'
\'
"'
):
if
not
inside
:
inside
=
char
else
:
if
inside
==
char
:
inside
=
''
if
idx
<
len
(
indices
)
and
i
==
indices
[
idx
]:
if
inside
:
new_line
.
append
(
char
)
idx
+=
1
continue
if
i
==
0
or
novarcharre
.
match
(
line
[
i
-
1
]):
endidx
=
indices
[
idx
]
+
len
(
define
)
if
endidx
>=
len
(
line
)
or
novarcharre
.
match
(
line
[
endidx
]):
new_line
.
extend
(
list
(
value
))
n_to_skip
=
len
(
define
)
-
1
idx
+=
1
continue
idx
+=
1
new_line
.
append
(
char
)
line
=
''
.
join
(
new_line
)
return
line
class
GromacsTopFile
(
object
):
class
GromacsTopFile
(
object
):
"""GromacsTopFile parses a Gromacs top file and constructs a Topology and (optionally) an OpenMM System from it."""
"""GromacsTopFile parses a Gromacs top file and constructs a Topology and (optionally) an OpenMM System from it."""
...
@@ -61,6 +114,7 @@ class GromacsTopFile(object):
...
@@ -61,6 +114,7 @@ class GromacsTopFile(object):
self
.
exclusions
=
[]
self
.
exclusions
=
[]
self
.
pairs
=
[]
self
.
pairs
=
[]
self
.
cmaps
=
[]
self
.
cmaps
=
[]
self
.
has_virtual_sites
=
False
def
_processFile
(
self
,
file
):
def
_processFile
(
self
,
file
):
append
=
''
append
=
''
...
@@ -113,6 +167,7 @@ class GromacsTopFile(object):
...
@@ -113,6 +167,7 @@ class GromacsTopFile(object):
name
=
fields
[
1
]
name
=
fields
[
1
]
valueStart
=
stripped
.
find
(
name
,
len
(
command
))
+
len
(
name
)
+
1
valueStart
=
stripped
.
find
(
name
,
len
(
command
))
+
len
(
name
)
+
1
value
=
line
[
valueStart
:].
strip
()
value
=
line
[
valueStart
:].
strip
()
value
=
value
or
'1'
# Default define is 1
self
.
_defines
[
name
]
=
value
self
.
_defines
[
name
]
=
value
elif
command
==
'#ifdef'
:
elif
command
==
'#ifdef'
:
# See whether this block should be ignored.
# See whether this block should be ignored.
...
@@ -121,6 +176,12 @@ class GromacsTopFile(object):
...
@@ -121,6 +176,12 @@ class GromacsTopFile(object):
name
=
fields
[
1
]
name
=
fields
[
1
]
self
.
_ifStack
.
append
(
name
in
self
.
_defines
)
self
.
_ifStack
.
append
(
name
in
self
.
_defines
)
self
.
_elseStack
.
append
(
False
)
self
.
_elseStack
.
append
(
False
)
elif
command
==
'#undef'
:
# Un-define a variable
if
len
(
fields
)
<
2
:
raise
ValueError
(
'Illegal line in .top file: '
+
line
)
if
fields
[
1
]
in
self
.
_defines
:
self
.
_defines
.
pop
(
fields
[
1
])
elif
command
==
'#ifndef'
:
elif
command
==
'#ifndef'
:
# See whether this block should be ignored.
# See whether this block should be ignored.
if
len
(
fields
)
<
2
:
if
len
(
fields
)
<
2
:
...
@@ -145,6 +206,11 @@ class GromacsTopFile(object):
...
@@ -145,6 +206,11 @@ class GromacsTopFile(object):
self
.
_elseStack
[
-
1
]
=
True
self
.
_elseStack
[
-
1
]
=
True
elif
not
ignore
:
elif
not
ignore
:
# Gromacs occasionally uses #define's to introduce specific
# parameters for individual terms (for instance, this is how
# ff99SB-ILDN is implemented). So make sure we do the appropriate
# pre-processor replacements necessary
line
=
_replace_defines
(
line
,
self
.
_defines
)
# A line of data for the current category
# A line of data for the current category
if
self
.
_currentCategory
is
None
:
if
self
.
_currentCategory
is
None
:
raise
ValueError
(
'Unexpected line in .top file: '
+
line
)
raise
ValueError
(
'Unexpected line in .top file: '
+
line
)
...
@@ -182,6 +248,11 @@ class GromacsTopFile(object):
...
@@ -182,6 +248,11 @@ class GromacsTopFile(object):
self
.
_processPairType
(
line
)
self
.
_processPairType
(
line
)
elif
self
.
_currentCategory
==
'cmaptypes'
:
elif
self
.
_currentCategory
==
'cmaptypes'
:
self
.
_processCmapType
(
line
)
self
.
_processCmapType
(
line
)
elif
self
.
_currentCategory
.
startswith
(
'virtual_sites'
):
if
self
.
_currentMoleculeType
is
None
:
raise
ValueError
(
'Found %s before [ moleculetype ]'
%
self
.
_currentCategory
)
self
.
_currentMoleculeType
.
has_virtual_sites
=
True
def
_processDefaults
(
self
,
line
):
def
_processDefaults
(
self
,
line
):
"""Process the [ defaults ] line."""
"""Process the [ defaults ] line."""
...
@@ -193,7 +264,7 @@ class GromacsTopFile(object):
...
@@ -193,7 +264,7 @@ class GromacsTopFile(object):
if
fields
[
1
]
!=
'2'
:
if
fields
[
1
]
!=
'2'
:
raise
ValueError
(
'Unsupported combination rule: '
+
fields
[
1
])
raise
ValueError
(
'Unsupported combination rule: '
+
fields
[
1
])
if
fields
[
2
].
lower
()
==
'no'
:
if
fields
[
2
].
lower
()
==
'no'
:
raise
ValueError
(
'
gen
_
pairs
=no is not supported'
)
self
.
_
genpairs
=
False
self
.
_defaults
=
fields
self
.
_defaults
=
fields
def
_processMoleculeType
(
self
,
line
):
def
_processMoleculeType
(
self
,
line
):
...
@@ -379,9 +450,12 @@ class GromacsTopFile(object):
...
@@ -379,9 +450,12 @@ class GromacsTopFile(object):
# unless the preprocessor #define FLEXIBLE is given, don't define
# unless the preprocessor #define FLEXIBLE is given, don't define
# bonds between the water hydrogen and oxygens, but only give the
# bonds between the water hydrogen and oxygens, but only give the
# constraint distances and exclusions.
# constraint distances and exclusions.
self
.
_defines
=
{
'FLEXIBLE'
:
True
}
self
.
_defines
=
OrderedDict
()
self
.
_defines
[
'FLEXIBLE'
]
=
True
self
.
_genpairs
=
True
if
defines
is
not
None
:
if
defines
is
not
None
:
self
.
_defines
.
update
(
defines
)
for
define
,
value
in
defines
.
iteritems
():
self
.
_defines
[
define
]
=
value
# Parse the file.
# Parse the file.
...
@@ -416,6 +490,8 @@ class GromacsTopFile(object):
...
@@ -416,6 +490,8 @@ class GromacsTopFile(object):
if
moleculeName
not
in
self
.
_moleculeTypes
:
if
moleculeName
not
in
self
.
_moleculeTypes
:
raise
ValueError
(
"Unknown molecule type: "
+
moleculeName
)
raise
ValueError
(
"Unknown molecule type: "
+
moleculeName
)
moleculeType
=
self
.
_moleculeTypes
[
moleculeName
]
moleculeType
=
self
.
_moleculeTypes
[
moleculeName
]
if
moleculeCount
>
0
and
moleculeType
.
has_virtual_sites
:
raise
ValueError
(
'Virtual sites not yet supported by Gromacs parsers'
)
# Create the specified number of molecules of this type.
# Create the specified number of molecules of this type.
...
@@ -747,6 +823,9 @@ class GromacsTopFile(object):
...
@@ -747,6 +823,9 @@ class GromacsTopFile(object):
params
=
self
.
_pairTypes
[
types
][
3
:
5
]
params
=
self
.
_pairTypes
[
types
][
3
:
5
]
elif
types
[::
-
1
]
in
self
.
_pairTypes
:
elif
types
[::
-
1
]
in
self
.
_pairTypes
:
params
=
self
.
_pairTypes
[
types
[::
-
1
]][
3
:
5
]
params
=
self
.
_pairTypes
[
types
[::
-
1
]][
3
:
5
]
elif
not
self
.
_genpairs
:
raise
ValueError
(
'No pair parameters defined for atom '
'types %s and gen-pairs is "no"'
%
types
)
else
:
else
:
continue
# We'll use the automatically generated parameters
continue
# We'll use the automatically generated parameters
atom1params
=
nb
.
getParticleParameters
(
baseAtomIndex
+
atoms
[
0
])
atom1params
=
nb
.
getParticleParameters
(
baseAtomIndex
+
atoms
[
0
])
...
@@ -796,8 +875,8 @@ class GromacsTopFile(object):
...
@@ -796,8 +875,8 @@ class GromacsTopFile(object):
def
_defaultGromacsIncludeDir
():
def
_defaultGromacsIncludeDir
():
"""Find the location where gromacs #include files are referenced from, by
"""Find the location where gromacs #include files are referenced from, by
searching for (1) gromacs environment variables, (2) for the gromacs binary
searching for (1) gromacs environment variables, (2) for the gromacs binary
'pdb2gmx' in the PATH, or (3) just using the default gromacs
install
'pdb2gmx'
or 'gmx'
in the PATH, or (3) just using the default gromacs
location, /usr/local/gromacs/share/gromacs/top """
install
location, /usr/local/gromacs/share/gromacs/top """
if
'GMXDATA'
in
os
.
environ
:
if
'GMXDATA'
in
os
.
environ
:
return
os
.
path
.
join
(
os
.
environ
[
'GMXDATA'
],
'top'
)
return
os
.
path
.
join
(
os
.
environ
[
'GMXDATA'
],
'top'
)
if
'GMXBIN'
in
os
.
environ
:
if
'GMXBIN'
in
os
.
environ
:
...
@@ -806,5 +885,9 @@ def _defaultGromacsIncludeDir():
...
@@ -806,5 +885,9 @@ def _defaultGromacsIncludeDir():
pdb2gmx_path
=
distutils
.
spawn
.
find_executable
(
'pdb2gmx'
)
pdb2gmx_path
=
distutils
.
spawn
.
find_executable
(
'pdb2gmx'
)
if
pdb2gmx_path
is
not
None
:
if
pdb2gmx_path
is
not
None
:
return
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
pdb2gmx_path
),
'..'
,
'share'
,
'gromacs'
,
'top'
))
return
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
pdb2gmx_path
),
'..'
,
'share'
,
'gromacs'
,
'top'
))
else
:
gmx_path
=
distutils
.
spawn
.
find_executable
(
'gmx'
)
if
gmx_path
is
not
None
:
return
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
gmx_path
),
'..'
,
'share'
,
'gromacs'
,
'top'
))
return
'/usr/local/gromacs/share/gromacs/top'
return
'/usr/local/gromacs/share/gromacs/top'
wrappers/python/simtk/openmm/app/internal/amber_file_parser.py
View file @
bf6d95c2
...
@@ -1363,6 +1363,7 @@ class AmberNetcdfRestart(object):
...
@@ -1363,6 +1363,7 @@ class AmberNetcdfRestart(object):
if
'velocities'
in
ncfile
.
variables
:
if
'velocities'
in
ncfile
.
variables
:
vels
=
ncfile
.
variables
[
'velocities'
]
vels
=
ncfile
.
variables
[
'velocities'
]
self
.
velocities
=
np
.
array
(
vels
[:])
*
vels
.
scale_factor
self
.
velocities
=
np
.
array
(
vels
[:])
*
vels
.
scale_factor
del
vels
# Get rid of reference to variable to avoid warnings
if
(
'cell_lengths'
in
ncfile
.
variables
and
if
(
'cell_lengths'
in
ncfile
.
variables
and
'cell_angles'
in
ncfile
.
variables
):
'cell_angles'
in
ncfile
.
variables
):
self
.
boxVectors
=
np
.
zeros
((
3
,
3
),
np
.
float32
)
self
.
boxVectors
=
np
.
zeros
((
3
,
3
),
np
.
float32
)
...
@@ -1372,6 +1373,7 @@ class AmberNetcdfRestart(object):
...
@@ -1372,6 +1373,7 @@ class AmberNetcdfRestart(object):
units
.
degrees
)
units
.
degrees
)
self
.
boxVectors
=
computePeriodicBoxVectors
(
leng
[
0
],
leng
[
1
],
self
.
boxVectors
=
computePeriodicBoxVectors
(
leng
[
0
],
leng
[
1
],
leng
[
2
],
angl
[
0
],
angl
[
1
],
angl
[
2
])
leng
[
2
],
angl
[
0
],
angl
[
1
],
angl
[
2
])
del
leng
,
angl
# Avoid warnings
if
'time'
in
ncfile
.
variables
:
if
'time'
in
ncfile
.
variables
:
self
.
time
=
ncfile
.
variables
[
'time'
].
getValue
()
self
.
time
=
ncfile
.
variables
[
'time'
].
getValue
()
finally
:
finally
:
...
...
wrappers/python/simtk/openmm/app/internal/customgbforces.py
View file @
bf6d95c2
...
@@ -6,7 +6,7 @@ Simbios, the NIH National Center for Physics-Based Simulation of
...
@@ -6,7 +6,7 @@ Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for
Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org.
Medical Research, grant U54 GM072970. See https://simtk.org.
Portions copyright (c) 2012-201
4
University of Virginia and the Authors.
Portions copyright (c) 2012-201
5
University of Virginia and the Authors.
Authors: Christoph Klein, Michael R. Shirts
Authors: Christoph Klein, Michael R. Shirts
Contributors: Jason M. Swails, Peter Eastman
Contributors: Jason M. Swails, Peter Eastman
...
@@ -31,7 +31,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
...
@@ -31,7 +31,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
from
__future__
import
division
from
__future__
import
division
from
simtk.openmm
import
CustomGBForce
,
Discrete1
DFunction
from
simtk.openmm
import
CustomGBForce
,
Continuous2
DFunction
d0
=
[
2.26685
,
2.32548
,
2.38397
,
2.44235
,
2.50057
,
2.55867
,
2.61663
,
2.67444
,
d0
=
[
2.26685
,
2.32548
,
2.38397
,
2.44235
,
2.50057
,
2.55867
,
2.61663
,
2.67444
,
2.73212
,
2.78965
,
2.84705
,
2.9043
,
2.96141
,
3.0184
,
3.07524
,
3.13196
,
2.73212
,
2.78965
,
2.84705
,
2.9043
,
2.96141
,
3.0184
,
3.07524
,
3.13196
,
...
@@ -308,12 +308,11 @@ def GBSAGBnForce(solventDielectric=78.5, soluteDielectric=1, SA=None,
...
@@ -308,12 +308,11 @@ def GBSAGBnForce(solventDielectric=78.5, soluteDielectric=1, SA=None,
custom
.
addPerParticleParameter
(
"or"
)
# Offset radius
custom
.
addPerParticleParameter
(
"or"
)
# Offset radius
custom
.
addPerParticleParameter
(
"sr"
)
# Scaled offset radius
custom
.
addPerParticleParameter
(
"sr"
)
# Scaled offset radius
custom
.
addTabulatedFunction
(
"getd0"
,
Discrete1DFunction
(
d0
))
custom
.
addTabulatedFunction
(
"getd0"
,
Continuous2DFunction
(
21
,
21
,
d0
,
0.1
,
0.2
,
0.1
,
0.2
))
custom
.
addTabulatedFunction
(
"getm0"
,
Discrete1DFunction
(
m0
))
custom
.
addTabulatedFunction
(
"getm0"
,
Continuous2DFunction
(
21
,
21
,
m0
,
0.1
,
0.2
,
0.1
,
0.2
))
custom
.
addComputedValue
(
"I"
,
"Ivdw+neckScale*Ineck;"
custom
.
addComputedValue
(
"I"
,
"Ivdw+neckScale*Ineck;"
"Ineck=step(radius1+radius2+neckCut-r)*getm0(index)/(1+100*(r-getd0(index))^2+0.3*1000000*(r-getd0(index))^6);"
"Ineck=step(radius1+radius2+neckCut-r)*getm0(radius1,radius2)/(1+100*(r-getd0(radius1,radius2))^2+0.3*1000000*(r-getd0(radius1,radius2))^6);"
"index = (radius2*200-20)*21 + (radius1*200-20);"
"Ivdw=step(r+sr2-or1)*0.5*(1/L-1/U+0.25*(r-sr2^2/r)*(1/(U^2)-1/(L^2))+0.5*log(L/U)/r);"
"Ivdw=step(r+sr2-or1)*0.5*(1/L-1/U+0.25*(r-sr2^2/r)*(1/(U^2)-1/(L^2))+0.5*log(L/U)/r);"
"U=r+sr2;"
"U=r+sr2;"
"L=max(or1, D);"
"L=max(or1, D);"
...
@@ -350,12 +349,11 @@ def GBSAGBn2Force(solventDielectric=78.5, soluteDielectric=1, SA=None,
...
@@ -350,12 +349,11 @@ def GBSAGBn2Force(solventDielectric=78.5, soluteDielectric=1, SA=None,
custom
.
addPerParticleParameter
(
"beta"
)
custom
.
addPerParticleParameter
(
"beta"
)
custom
.
addPerParticleParameter
(
"gamma"
)
custom
.
addPerParticleParameter
(
"gamma"
)
custom
.
addTabulatedFunction
(
"getd0"
,
Discrete1DFunction
(
d0
))
custom
.
addTabulatedFunction
(
"getd0"
,
Continuous2DFunction
(
21
,
21
,
d0
,
0.1
,
0.2
,
0.1
,
0.2
))
custom
.
addTabulatedFunction
(
"getm0"
,
Discrete1DFunction
(
m0
))
custom
.
addTabulatedFunction
(
"getm0"
,
Continuous2DFunction
(
21
,
21
,
m0
,
0.1
,
0.2
,
0.1
,
0.2
))
custom
.
addComputedValue
(
"I"
,
"Ivdw+neckScale*Ineck;"
custom
.
addComputedValue
(
"I"
,
"Ivdw+neckScale*Ineck;"
"Ineck=step(radius1+radius2+neckCut-r)*getm0(index)/(1+100*(r-getd0(index))^2+0.3*1000000*(r-getd0(index))^6);"
"Ineck=step(radius1+radius2+neckCut-r)*getm0(radius1,radius2)/(1+100*(r-getd0(radius1,radius2))^2+0.3*1000000*(r-getd0(radius1,radius2))^6);"
"index = (radius2*200-20)*21 + (radius1*200-20);"
"Ivdw=step(r+sr2-or1)*0.5*(1/L-1/U+0.25*(r-sr2^2/r)*(1/(U^2)-1/(L^2))+0.5*log(L/U)/r);"
"Ivdw=step(r+sr2-or1)*0.5*(1/L-1/U+0.25*(r-sr2^2/r)*(1/(U^2)-1/(L^2))+0.5*log(L/U)/r);"
"U=r+sr2;"
"U=r+sr2;"
"L=max(or1, D);"
"L=max(or1, D);"
...
...
wrappers/python/simtk/openmm/app/simulation.py
View file @
bf6d95c2
This diff is collapsed.
Click to expand it.
wrappers/python/simtk/openmm/app/statedatareporter.py
View file @
bf6d95c2
...
@@ -55,7 +55,7 @@ class StateDataReporter(object):
...
@@ -55,7 +55,7 @@ class StateDataReporter(object):
"""
"""
def
__init__
(
self
,
file
,
reportInterval
,
step
=
False
,
time
=
False
,
potentialEnergy
=
False
,
kineticEnergy
=
False
,
totalEnergy
=
False
,
temperature
=
False
,
volume
=
False
,
density
=
False
,
def
__init__
(
self
,
file
,
reportInterval
,
step
=
False
,
time
=
False
,
potentialEnergy
=
False
,
kineticEnergy
=
False
,
totalEnergy
=
False
,
temperature
=
False
,
volume
=
False
,
density
=
False
,
progress
=
False
,
remainingTime
=
False
,
speed
=
False
,
separator
=
','
,
systemMass
=
None
,
totalSteps
=
None
):
progress
=
False
,
remainingTime
=
False
,
speed
=
False
,
elapsedTime
=
False
,
separator
=
','
,
systemMass
=
None
,
totalSteps
=
None
):
"""Create a StateDataReporter.
"""Create a StateDataReporter.
Parameters:
Parameters:
...
@@ -74,6 +74,7 @@ class StateDataReporter(object):
...
@@ -74,6 +74,7 @@ class StateDataReporter(object):
- remainingTime (boolean=False) Whether to write an estimate of the remaining clock time until
- remainingTime (boolean=False) Whether to write an estimate of the remaining clock time until
completion to the file. If this is True, you must also specify totalSteps.
completion to the file. If this is True, you must also specify totalSteps.
- speed (bool=False) Whether to write an estimate of the simulation speed in ns/day to the file
- speed (bool=False) Whether to write an estimate of the simulation speed in ns/day to the file
- elapsedTime (bool=False) Whether to write the elapsed time of the simulation in seconds to the file.
- separator (string=',') The separator to use between columns in the file
- separator (string=',') The separator to use between columns in the file
- systemMass (mass=None) The total mass to use for the system when reporting density. If this is
- systemMass (mass=None) The total mass to use for the system when reporting density. If this is
None (the default), the system mass is computed by summing the masses of all particles. This
None (the default), the system mass is computed by summing the masses of all particles. This
...
@@ -113,6 +114,7 @@ class StateDataReporter(object):
...
@@ -113,6 +114,7 @@ class StateDataReporter(object):
self
.
_progress
=
progress
self
.
_progress
=
progress
self
.
_remainingTime
=
remainingTime
self
.
_remainingTime
=
remainingTime
self
.
_speed
=
speed
self
.
_speed
=
speed
self
.
_elapsedTime
=
elapsedTime
self
.
_separator
=
separator
self
.
_separator
=
separator
self
.
_totalMass
=
systemMass
self
.
_totalMass
=
systemMass
self
.
_totalSteps
=
totalSteps
self
.
_totalSteps
=
totalSteps
...
@@ -207,6 +209,8 @@ class StateDataReporter(object):
...
@@ -207,6 +209,8 @@ class StateDataReporter(object):
values
.
append
(
'%.3g'
%
(
elapsedNs
/
elapsedDays
))
values
.
append
(
'%.3g'
%
(
elapsedNs
/
elapsedDays
))
else
:
else
:
values
.
append
(
'--'
)
values
.
append
(
'--'
)
if
self
.
_elapsedTime
:
values
.
append
(
time
.
time
()
-
self
.
_initialClockTime
)
if
self
.
_remainingTime
:
if
self
.
_remainingTime
:
elapsedSeconds
=
clockTime
-
self
.
_initialClockTime
elapsedSeconds
=
clockTime
-
self
.
_initialClockTime
elapsedSteps
=
simulation
.
currentStep
-
self
.
_initialSteps
elapsedSteps
=
simulation
.
currentStep
-
self
.
_initialSteps
...
@@ -284,6 +288,8 @@ class StateDataReporter(object):
...
@@ -284,6 +288,8 @@ class StateDataReporter(object):
headers
.
append
(
'Density (g/mL)'
)
headers
.
append
(
'Density (g/mL)'
)
if
self
.
_speed
:
if
self
.
_speed
:
headers
.
append
(
'Speed (ns/day)'
)
headers
.
append
(
'Speed (ns/day)'
)
if
self
.
_elapsedTime
:
headers
.
append
(
'Elapsed Time (s)'
)
if
self
.
_remainingTime
:
if
self
.
_remainingTime
:
headers
.
append
(
'Time Remaining'
)
headers
.
append
(
'Time Remaining'
)
return
headers
return
headers
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment