Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
669e314e
Commit
669e314e
authored
Oct 15, 2016
by
John Chodera (MSKCC)
Browse files
Merge remote-tracking branch 'upstream/master' into fix-mts
parents
283b1acd
388bf972
Changes
14
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
310 additions
and
115 deletions
+310
-115
.travis.yml
.travis.yml
+1
-1
README.md
README.md
+3
-12
platforms/cuda/include/CudaNonbondedUtilities.h
platforms/cuda/include/CudaNonbondedUtilities.h
+19
-10
platforms/cuda/include/CudaParallelKernels.h
platforms/cuda/include/CudaParallelKernels.h
+1
-1
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+7
-5
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+2
-2
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+45
-23
platforms/cuda/src/CudaParallelKernels.cpp
platforms/cuda/src/CudaParallelKernels.cpp
+15
-14
platforms/cuda/src/kernels/findInteractingBlocks.cu
platforms/cuda/src/kernels/findInteractingBlocks.cu
+88
-19
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+59
-5
platforms/reference/src/SimTKReference/ReferenceVariableStochasticDynamics.cpp
...rc/SimTKReference/ReferenceVariableStochasticDynamics.cpp
+3
-3
wrappers/python/src/swig_doxygen/swig_lib/python/extend.i
wrappers/python/src/swig_doxygen/swig_lib/python/extend.i
+36
-17
wrappers/python/src/swig_doxygen/swig_lib/python/header.i
wrappers/python/src/swig_doxygen/swig_lib/python/header.i
+16
-2
wrappers/python/src/swig_doxygen/swig_lib/python/pythoncode.i
...pers/python/src/swig_doxygen/swig_lib/python/pythoncode.i
+15
-1
No files found.
.travis.yml
View file @
669e314e
...
...
@@ -109,7 +109,7 @@ before_install:
sudo easy_install pytest;
fi
-
if [[ "$OPENCL" == "true" ]]; then
wget http
s
://
jenkins.choderalab.org/userContent
/AMD-APP-SDKInstaller-v3.0.130.135-GA-linux64.tar.bz2;
wget http://
s3.amazonaws.com/omnia-ci
/AMD-APP-SDKInstaller-v3.0.130.135-GA-linux64.tar.bz2;
tar -xjf AMD-APP-SDK*.tar.bz2;
AMDAPPSDK=${HOME}/AMDAPPSDK;
export OPENCL_VENDOR_PATH=${AMDAPPSDK}/etc/OpenCL/vendors;
...
...
README.md
View file @
669e314e
[

](https://travis-ci.org/pandegroup/openmm)
[

](https://anaconda.org/omnia/openmm)
## OpenMM: A High Performance Molecular Dynamics Library
Introduction
...
...
@@ -10,15 +13,3 @@ Getting Help
------------
Need Help? Check out the
[
documentation
](
http://docs.openmm.org/
)
and
[
discussion forums
](
https://simtk.org/forums/viewforum.php?f=161
)
.
[
C++ API Reference
](
http://docs.openmm.org/6.3.0/api-c++/namespaceOpenMM.html
)
[
Python API Reference
](
http://docs.openmm.org/6.3.0/api-python/annotated.html
)
Badges
------
*
Travis CI
`linux`
and
`osx`
integration tests:
*
GitHub master
[

](https://travis-ci.org/pandegroup/openmm)
*
`openmm-dev`
recipe
[

](https://travis-ci.org/omnia-md/conda-dev-recipes)
*
Anaconda Cloud
`openmm`
conda release:
[

](https://anaconda.org/omnia/openmm)
*
Anaconda Cloud
`openmm-dev`
conda package:
[

](https://anaconda.org/omnia/openmm-dev)
platforms/cuda/include/CudaNonbondedUtilities.h
View file @
669e314e
...
...
@@ -78,8 +78,9 @@ public:
* @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded
* @param kernel the code to evaluate the interaction
* @param forceGroup the force group in which the interaction should be calculated
* @param supportsPairList specifies whether this interaction can work with a neighbor list that uses a separate pair list
*/
void
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
std
::
vector
<
std
::
vector
<
int
>
>&
exclusionList
,
const
std
::
string
&
kernel
,
int
forceGroup
);
void
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
std
::
vector
<
std
::
vector
<
int
>
>&
exclusionList
,
const
std
::
string
&
kernel
,
int
forceGroup
,
bool
supportsPairList
=
false
);
/**
* Add a per-atom parameter that the default interaction kernel may depend on.
*/
...
...
@@ -189,6 +190,12 @@ public:
CudaArray
&
getInteractingAtoms
()
{
return
*
interactingAtoms
;
}
/**
* Get the array containing single pairs in the neighbor list.
*/
CudaArray
&
getSinglePairs
()
{
return
*
singlePairs
;
}
/**
* Get the array containing exclusion flags.
*/
...
...
@@ -270,6 +277,8 @@ private:
CudaArray
*
interactingTiles
;
CudaArray
*
interactingAtoms
;
CudaArray
*
interactionCount
;
CudaArray
*
singlePairs
;
CudaArray
*
singlePairCount
;
CudaArray
*
blockCenter
;
CudaArray
*
blockBoundingBox
;
CudaArray
*
sortedBlocks
;
...
...
@@ -288,8 +297,8 @@ private:
std
::
map
<
int
,
double
>
groupCutoff
;
std
::
map
<
int
,
std
::
string
>
groupKernelSource
;
double
lastCutoff
;
bool
useCutoff
,
usePeriodic
,
anyExclusions
,
usePadding
,
forceRebuildNeighborList
;
int
startTileIndex
,
numTiles
,
startBlockIndex
,
numBlocks
,
maxTiles
,
maxExclusions
,
numForceThreadBlocks
,
forceThreadBlockSize
,
numAtoms
,
groupFlags
;
bool
useCutoff
,
usePeriodic
,
anyExclusions
,
usePadding
,
forceRebuildNeighborList
,
canUsePairList
;
int
startTileIndex
,
numTiles
,
startBlockIndex
,
numBlocks
,
maxTiles
,
maxSinglePairs
,
maxExclusions
,
numForceThreadBlocks
,
forceThreadBlockSize
,
numAtoms
,
groupFlags
;
};
/**
...
...
platforms/cuda/include/CudaParallelKernels.h
View file @
669e314e
...
...
@@ -83,7 +83,7 @@ private:
std
::
vector
<
Kernel
>
kernels
;
std
::
vector
<
long
long
>
completionTimes
;
std
::
vector
<
double
>
contextNonbondedFractions
;
int
*
tile
Counts
;
int
2
*
interaction
Counts
;
CudaArray
*
contextForces
;
void
*
pinnedPositionBuffer
;
long
long
*
pinnedForceBuffer
;
...
...
platforms/cuda/src/CudaContext.cpp
View file @
669e314e
...
...
@@ -130,8 +130,10 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
#endif
struct
stat
info
;
isNvccAvailable
=
(
res
==
0
&&
stat
(
tempDir
.
c_str
(),
&
info
)
==
0
);
int
cudaDriverVersion
;
cuDriverGetVersion
(
&
cudaDriverVersion
);
static
bool
hasShownNvccWarning
=
false
;
if
(
hasCompilerKernel
&&
!
isNvccAvailable
&&
!
hasShownNvccWarning
)
{
if
(
hasCompilerKernel
&&
!
isNvccAvailable
&&
!
hasShownNvccWarning
&&
cudaDriverVersion
<
8000
)
{
hasShownNvccWarning
=
true
;
printf
(
"Could not find nvcc. Using runtime compiler, which may produce slower performance. "
);
#ifdef WIN32
...
...
@@ -208,14 +210,14 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
int
major
,
minor
;
CHECK_RESULT
(
cuDeviceComputeCapability
(
&
major
,
&
minor
,
device
));
int
numThreadBlocksPerComputeUnit
=
(
major
>=
6
?
4
:
6
);
#if __CUDA_API_VERSION
< 7000
if
(
cudaDriverVersion
<
7000
)
{
// This is a workaround to support GTX 980 with CUDA 6.5. It reports
// its compute capability as 5.2, but the compiler doesn't support
// anything beyond 5.0.
if
(
major
==
5
)
minor
=
0
;
#endif
#if __CUDA_API_VERSION
< 8000
}
if
(
cudaDriverVersion
<
8000
)
{
// This is a workaround to support Pascal with CUDA 7.5. It reports
// its compute capability as 6.x, but the compiler doesn't support
// anything beyond 5.3.
...
...
@@ -223,7 +225,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
major
=
5
;
minor
=
3
;
}
#endif
}
gpuArchitecture
=
intToString
(
major
)
+
intToString
(
minor
);
computeCapability
=
major
+
0.1
*
minor
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
669e314e
...
...
@@ -1920,7 +1920,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Add the interaction to the default nonbonded kernel.
string
source
=
cu
.
replaceStrings
(
CudaKernelSources
::
coulombLennardJones
,
defines
);
cu
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
());
cu
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
()
,
true
);
if
(
hasLJ
)
cu
.
getNonbondedUtilities
().
addParameter
(
CudaNonbondedUtilities
::
ParameterInfo
(
"sigmaEpsilon"
,
"float"
,
2
,
sizeof
(
float2
),
sigmaEpsilon
->
getDevicePointer
()));
...
...
@@ -2314,7 +2314,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
if
(
force
.
getNumInteractionGroups
()
>
0
)
initInteractionGroups
(
force
,
source
,
tableTypes
);
else
{
cu
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
());
cu
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
()
,
true
);
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
cu
.
getNonbondedUtilities
().
addParameter
(
CudaNonbondedUtilities
::
ParameterInfo
(
prefix
+
"params"
+
cu
.
intToString
(
i
+
1
),
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
...
...
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
669e314e
...
...
@@ -64,15 +64,16 @@ private:
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
pinnedCountBuffer
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
interactionCount
(
NULL
),
singlePairs
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
pinnedCountBuffer
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
),
canUsePairList
(
true
)
{
// Decide how many thread blocks to use.
string
errorMessage
=
"Error initializing nonbonded utilities"
;
int
multiprocessors
;
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
context
.
getDevice
()));
CHECK_RESULT
(
cuEventCreate
(
&
downloadCountEvent
,
0
));
CHECK_RESULT
(
cuMemHostAlloc
((
void
**
)
&
pinnedCountBuffer
,
sizeof
(
int
),
CU_MEMHOSTALLOC_PORTABLE
));
CHECK_RESULT
(
cuMemHostAlloc
((
void
**
)
&
pinnedCountBuffer
,
2
*
sizeof
(
int
),
CU_MEMHOSTALLOC_PORTABLE
));
numForceThreadBlocks
=
4
*
multiprocessors
;
forceThreadBlockSize
=
(
context
.
getComputeCapability
()
<
2.0
?
128
:
256
);
}
...
...
@@ -92,6 +93,8 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
delete
interactingAtoms
;
if
(
interactionCount
!=
NULL
)
delete
interactionCount
;
if
(
singlePairs
!=
NULL
)
delete
singlePairs
;
if
(
blockCenter
!=
NULL
)
delete
blockCenter
;
if
(
blockBoundingBox
!=
NULL
)
...
...
@@ -113,7 +116,7 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
cuEventDestroy
(
downloadCountEvent
);
}
void
CudaNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
)
{
void
CudaNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
,
bool
supportsPairList
)
{
if
(
groupCutoff
.
size
()
>
0
)
{
if
(
usesCutoff
!=
useCutoff
)
throw
OpenMMException
(
"All Forces must agree on whether to use a cutoff"
);
...
...
@@ -128,6 +131,7 @@ void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic,
usePeriodic
=
usesPeriodic
;
groupCutoff
[
forceGroup
]
=
cutoffDistance
;
groupFlags
|=
1
<<
forceGroup
;
canUsePairList
&=
supportsPairList
;
if
(
kernel
.
size
()
>
0
)
{
if
(
groupKernelSource
.
find
(
forceGroup
)
==
groupKernelSource
.
end
())
groupKernelSource
[
forceGroup
]
=
""
;
...
...
@@ -279,9 +283,11 @@ void CudaNonbondedUtilities::initialize(const System& system) {
maxTiles
=
numTiles
;
if
(
maxTiles
<
1
)
maxTiles
=
1
;
maxSinglePairs
=
5
*
numAtoms
;
interactingTiles
=
CudaArray
::
create
<
int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
=
CudaArray
::
create
<
int
>
(
context
,
CudaContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
interactionCount
=
CudaArray
::
create
<
unsigned
int
>
(
context
,
1
,
"interactionCount"
);
interactionCount
=
CudaArray
::
create
<
unsigned
int
>
(
context
,
2
,
"interactionCount"
);
singlePairs
=
CudaArray
::
create
<
int2
>
(
context
,
maxSinglePairs
,
"singlePairs"
);
int
elementSize
=
(
context
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
blockCenter
=
new
CudaArray
(
context
,
numAtomBlocks
,
4
*
elementSize
,
"blockCenter"
);
blockBoundingBox
=
new
CudaArray
(
context
,
numAtomBlocks
,
4
*
elementSize
,
"blockBoundingBox"
);
...
...
@@ -291,7 +297,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
oldPositions
=
new
CudaArray
(
context
,
numAtoms
,
4
*
elementSize
,
"oldPositions"
);
rebuildNeighborList
=
CudaArray
::
create
<
int
>
(
context
,
1
,
"rebuildNeighborList"
);
blockSorter
=
new
CudaSort
(
context
,
new
BlockSortTrait
(
context
.
getUseDoublePrecision
()),
numAtomBlocks
);
vector
<
unsigned
int
>
count
(
1
,
0
);
vector
<
unsigned
int
>
count
(
2
,
0
);
interactionCount
->
upload
(
count
);
}
...
...
@@ -316,6 +322,8 @@ void CudaNonbondedUtilities::initialize(const System& system) {
forceArgs
.
push_back
(
&
blockCenter
->
getDevicePointer
());
forceArgs
.
push_back
(
&
blockBoundingBox
->
getDevicePointer
());
forceArgs
.
push_back
(
&
interactingAtoms
->
getDevicePointer
());
forceArgs
.
push_back
(
&
maxSinglePairs
);
forceArgs
.
push_back
(
&
singlePairs
->
getDevicePointer
());
}
for
(
int
i
=
0
;
i
<
(
int
)
parameters
.
size
();
i
++
)
forceArgs
.
push_back
(
&
parameters
[
i
].
getMemory
());
...
...
@@ -353,8 +361,10 @@ void CudaNonbondedUtilities::initialize(const System& system) {
findInteractingBlocksArgs
.
push_back
(
&
interactionCount
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
interactingTiles
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
interactingAtoms
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
singlePairs
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
maxTiles
);
findInteractingBlocksArgs
.
push_back
(
&
maxSinglePairs
);
findInteractingBlocksArgs
.
push_back
(
&
startBlockIndex
);
findInteractingBlocksArgs
.
push_back
(
&
numBlocks
);
findInteractingBlocksArgs
.
push_back
(
&
sortedBlocks
->
getDevicePointer
());
...
...
@@ -424,12 +434,13 @@ void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeFo
bool
CudaNonbondedUtilities
::
updateNeighborListSize
()
{
if
(
!
useCutoff
)
return
false
;
if
(
pinnedCountBuffer
[
0
]
<=
(
unsigned
int
)
maxTile
s
)
if
(
pinnedCountBuffer
[
0
]
<=
maxTiles
&&
pinnedCountBuffer
[
1
]
<=
maxSinglePair
s
)
return
false
;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
if
(
pinnedCountBuffer
[
0
]
>
maxTiles
)
{
maxTiles
=
(
int
)
(
1.2
*
pinnedCountBuffer
[
0
]);
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
if
(
maxTiles
>
totalTiles
)
...
...
@@ -446,6 +457,16 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
if
(
forceArgs
.
size
()
>
0
)
forceArgs
[
17
]
=
&
interactingAtoms
->
getDevicePointer
();
findInteractingBlocksArgs
[
7
]
=
&
interactingAtoms
->
getDevicePointer
();
}
if
(
pinnedCountBuffer
[
1
]
>
maxSinglePairs
)
{
maxSinglePairs
=
(
int
)
(
1.2
*
pinnedCountBuffer
[
1
]);
delete
singlePairs
;
singlePairs
=
NULL
;
// Avoid an error in the destructor if the following allocation fails
singlePairs
=
CudaArray
::
create
<
int2
>
(
context
,
maxSinglePairs
,
"singlePairs"
);
if
(
forceArgs
.
size
()
>
0
)
forceArgs
[
19
]
=
&
singlePairs
->
getDevicePointer
();
findInteractingBlocksArgs
[
8
]
=
&
singlePairs
->
getDevicePointer
();
}
forceRebuildNeighborList
=
true
;
context
.
setForcesValid
(
false
);
return
true
;
...
...
@@ -493,6 +514,7 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
if
(
usePeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines
[
"MAX_EXCLUSIONS"
]
=
context
.
intToString
(
maxExclusions
);
defines
[
"MAX_BITS_FOR_PAIRS"
]
=
(
canUsePairList
?
"2"
:
"0"
);
CUmodule
interactingBlocksProgram
=
context
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
findInteractingBlocks
,
defines
);
kernels
.
findBlockBoundsKernel
=
context
.
getKernel
(
interactingBlocksProgram
,
"findBlockBounds"
);
kernels
.
sortBoxDataKernel
=
context
.
getKernel
(
interactingBlocksProgram
,
"sortBoxData"
);
...
...
platforms/cuda/src/CudaParallelKernels.cpp
View file @
669e314e
...
...
@@ -63,8 +63,8 @@ if (result != CUDA_SUCCESS) { \
class
CudaParallelCalcForcesAndEnergyKernel
::
BeginComputationTask
:
public
CudaContext
::
WorkTask
{
public:
BeginComputationTask
(
ContextImpl
&
context
,
CudaContext
&
cu
,
CudaCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
void
*
pinnedMemory
,
CUevent
event
,
int
&
numTiles
)
:
context
(
context
),
cu
(
cu
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
pinnedMemory
(
pinnedMemory
),
event
(
event
),
numTiles
(
numTiles
)
{
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
void
*
pinnedMemory
,
CUevent
event
,
int
2
&
interactionCount
)
:
context
(
context
),
cu
(
cu
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
pinnedMemory
(
pinnedMemory
),
event
(
event
),
interactionCount
(
interactionCount
)
{
}
void
execute
()
{
// Copy coordinates over to this device and execute the kernel.
...
...
@@ -77,7 +77,7 @@ public:
}
kernel
.
beginComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
if
(
cu
.
getNonbondedUtilities
().
getUsePeriodic
())
cu
.
getNonbondedUtilities
().
getInteractionCount
().
download
(
&
numTiles
,
false
);
cu
.
getNonbondedUtilities
().
getInteractionCount
().
download
(
&
interactionCount
,
false
);
}
private:
ContextImpl
&
context
;
...
...
@@ -87,15 +87,15 @@ private:
int
groups
;
void
*
pinnedMemory
;
CUevent
event
;
int
&
numTiles
;
int
2
&
interactionCount
;
};
class
CudaParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
CudaContext
::
WorkTask
{
public:
FinishComputationTask
(
ContextImpl
&
context
,
CudaContext
&
cu
,
CudaCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
long
long
*
pinnedMemory
,
CudaArray
&
contextForces
,
bool
&
valid
,
int
&
numTiles
)
:
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
long
long
*
pinnedMemory
,
CudaArray
&
contextForces
,
bool
&
valid
,
int
2
&
interactionCount
)
:
context
(
context
),
cu
(
cu
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
energy
(
energy
),
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
),
contextForces
(
contextForces
),
valid
(
valid
),
numTiles
(
numTiles
)
{
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
),
contextForces
(
contextForces
),
valid
(
valid
),
interactionCount
(
interactionCount
)
{
}
void
execute
()
{
// Execute the kernel, then download forces.
...
...
@@ -120,7 +120,8 @@ public:
cu
.
getForce
().
download
(
&
pinnedMemory
[(
cu
.
getContextIndex
()
-
1
)
*
numAtoms
*
3
]);
}
}
if
(
cu
.
getNonbondedUtilities
().
getUsePeriodic
()
&&
numTiles
>
cu
.
getNonbondedUtilities
().
getInteractingTiles
().
getSize
())
{
if
(
cu
.
getNonbondedUtilities
().
getUsePeriodic
()
&&
(
interactionCount
.
x
>
cu
.
getNonbondedUtilities
().
getInteractingTiles
().
getSize
()
||
interactionCount
.
y
>
cu
.
getNonbondedUtilities
().
getSinglePairs
().
getSize
()))
{
valid
=
false
;
cu
.
getNonbondedUtilities
().
updateNeighborListSize
();
}
...
...
@@ -136,12 +137,12 @@ private:
long
long
*
pinnedMemory
;
CudaArray
&
contextForces
;
bool
&
valid
;
int
&
numTiles
;
int
2
&
interactionCount
;
};
CudaParallelCalcForcesAndEnergyKernel
::
CudaParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
)
:
CalcForcesAndEnergyKernel
(
name
,
platform
),
data
(
data
),
completionTimes
(
data
.
contexts
.
size
()),
contextNonbondedFractions
(
data
.
contexts
.
size
()),
tile
Counts
(
NULL
),
contextForces
(
NULL
),
pinnedPositionBuffer
(
NULL
),
pinnedForceBuffer
(
NULL
)
{
interaction
Counts
(
NULL
),
contextForces
(
NULL
),
pinnedPositionBuffer
(
NULL
),
pinnedForceBuffer
(
NULL
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcForcesAndEnergyKernel
(
name
,
platform
,
*
data
.
contexts
[
i
])));
}
...
...
@@ -156,8 +157,8 @@ CudaParallelCalcForcesAndEnergyKernel::~CudaParallelCalcForcesAndEnergyKernel()
cuMemFreeHost
(
pinnedForceBuffer
);
cuEventDestroy
(
event
);
cuStreamDestroy
(
peerCopyStream
);
if
(
tile
Counts
!=
NULL
)
cuMemFreeHost
(
tile
Counts
);
if
(
interaction
Counts
!=
NULL
)
cuMemFreeHost
(
interaction
Counts
);
}
void
CudaParallelCalcForcesAndEnergyKernel
::
initialize
(
const
System
&
system
)
{
...
...
@@ -172,7 +173,7 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
contextNonbondedFractions
[
i
]
=
1
/
(
double
)
numContexts
;
CHECK_RESULT
(
cuEventCreate
(
&
event
,
0
),
"Error creating event"
);
CHECK_RESULT
(
cuStreamCreate
(
&
peerCopyStream
,
CU_STREAM_NON_BLOCKING
),
"Error creating stream"
);
CHECK_RESULT
(
cuMemHostAlloc
((
void
**
)
&
tile
Counts
,
numContexts
*
sizeof
(
int
),
0
),
"Error creating
tile
count buffer"
);
CHECK_RESULT
(
cuMemHostAlloc
((
void
**
)
&
interaction
Counts
,
numContexts
*
sizeof
(
int
2
),
0
),
"Error creating
interaction
count
s
buffer"
);
}
void
CudaParallelCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
...
...
@@ -202,7 +203,7 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex
data
.
contextEnergy
[
i
]
=
0.0
;
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
BeginComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
pinnedPositionBuffer
,
event
,
tile
Counts
[
i
]));
thread
.
addTask
(
new
BeginComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
pinnedPositionBuffer
,
event
,
interaction
Counts
[
i
]));
}
}
...
...
@@ -210,7 +211,7 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceBuffer
,
*
contextForces
,
valid
,
tile
Counts
[
i
]));
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceBuffer
,
*
contextForces
,
valid
,
interaction
Counts
[
i
]));
}
data
.
syncContexts
();
double
energy
=
0.0
;
...
...
platforms/cuda/src/kernels/findInteractingBlocks.cu
View file @
669e314e
...
...
@@ -71,9 +71,60 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co
if
(
rebuild
)
{
rebuildNeighborList
[
0
]
=
1
;
interactionCount
[
0
]
=
0
;
interactionCount
[
1
]
=
0
;
}
}
__device__
int
saveSinglePairs
(
int
x
,
int
*
atoms
,
int
*
flags
,
int
length
,
unsigned
int
maxSinglePairs
,
unsigned
int
*
singlePairCount
,
int2
*
singlePairs
,
int
*
sumBuffer
,
volatile
int
&
pairStartIndex
)
{
// Record interactions that should be computed as single pairs rather than in blocks.
const
int
indexInWarp
=
threadIdx
.
x
%
32
;
int
sum
=
0
;
for
(
int
i
=
indexInWarp
;
i
<
length
;
i
+=
32
)
{
int
count
=
__popc
(
flags
[
i
]);
sum
+=
(
count
<=
MAX_BITS_FOR_PAIRS
?
count
:
0
);
}
sumBuffer
[
indexInWarp
]
=
sum
;
for
(
int
step
=
1
;
step
<
32
;
step
*=
2
)
{
int
add
=
(
indexInWarp
>=
step
?
sumBuffer
[
indexInWarp
-
step
]
:
0
);
sumBuffer
[
indexInWarp
]
+=
add
;
}
int
pairsToStore
=
sumBuffer
[
31
];
if
(
indexInWarp
==
0
)
pairStartIndex
=
atomicAdd
(
singlePairCount
,
pairsToStore
);
int
pairIndex
=
pairStartIndex
+
(
indexInWarp
>
0
?
sumBuffer
[
indexInWarp
-
1
]
:
0
);
for
(
int
i
=
indexInWarp
;
i
<
length
;
i
+=
32
)
{
int
count
=
__popc
(
flags
[
i
]);
if
(
count
<=
MAX_BITS_FOR_PAIRS
&&
pairIndex
+
count
<
maxSinglePairs
)
{
int
f
=
flags
[
i
];
while
(
f
!=
0
)
{
singlePairs
[
pairIndex
]
=
make_int2
(
atoms
[
i
],
x
*
TILE_SIZE
+
__ffs
(
f
)
-
1
);
f
&=
f
-
1
;
pairIndex
++
;
}
}
}
// Compact the remaining interactions.
const
int
warpMask
=
(
1
<<
indexInWarp
)
-
1
;
int
numCompacted
=
0
;
for
(
int
start
=
0
;
start
<
length
;
start
+=
32
)
{
int
i
=
start
+
indexInWarp
;
int
atom
=
atoms
[
i
];
int
flag
=
flags
[
i
];
bool
include
=
(
i
<
length
&&
__popc
(
flags
[
i
])
>
MAX_BITS_FOR_PAIRS
);
int
includeFlags
=
__ballot
(
include
);
if
(
include
)
{
int
index
=
numCompacted
+
__popc
(
includeFlags
&
warpMask
);
atoms
[
index
]
=
atom
;
flags
[
index
]
=
flag
;
}
numCompacted
+=
__popc
(
includeFlags
);
}
return
numCompacted
;
}
/**
* Compare the bounding boxes for each pair of atom blocks (comprised of 32 atoms each), forming a tile. If the two
* atom blocks are sufficiently far apart, mark them as non-interacting. There are two stages in the algorithm.
...
...
@@ -124,8 +175,9 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co
*
*/
extern
"C"
__global__
void
findBlocksWithInteractions
(
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
*
__restrict__
interactionCount
,
int
*
__restrict__
interactingTiles
,
unsigned
int
*
__restrict__
interactingAtoms
,
const
real4
*
__restrict__
posq
,
unsigned
int
maxTiles
,
unsigned
int
startBlockIndex
,
unsigned
int
numBlocks
,
real2
*
__restrict__
sortedBlocks
,
const
real4
*
__restrict__
sortedBlockCenter
,
unsigned
int
*
__restrict__
interactionCount
,
int
*
__restrict__
interactingTiles
,
unsigned
int
*
__restrict__
interactingAtoms
,
int2
*
__restrict__
singlePairs
,
const
real4
*
__restrict__
posq
,
unsigned
int
maxTiles
,
unsigned
int
maxSinglePairs
,
unsigned
int
startBlockIndex
,
unsigned
int
numBlocks
,
real2
*
__restrict__
sortedBlocks
,
const
real4
*
__restrict__
sortedBlockCenter
,
const
real4
*
__restrict__
sortedBlockBoundingBox
,
const
unsigned
int
*
__restrict__
exclusionIndices
,
const
unsigned
int
*
__restrict__
exclusionRowIndices
,
real4
*
__restrict__
oldPositions
,
const
int
*
__restrict__
rebuildNeighborList
)
{
...
...
@@ -138,12 +190,17 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
const
int
warpIndex
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
32
;
const
int
warpMask
=
(
1
<<
indexInWarp
)
-
1
;
__shared__
int
workgroupBuffer
[
BUFFER_SIZE
*
(
GROUP_SIZE
/
32
)];
__shared__
int
workgroupFlagsBuffer
[
BUFFER_SIZE
*
(
GROUP_SIZE
/
32
)];
__shared__
int
warpExclusions
[
MAX_EXCLUSIONS
*
(
GROUP_SIZE
/
32
)];
__shared__
real3
posBuffer
[
GROUP_SIZE
];
__shared__
volatile
int
workgroupTileIndex
[
GROUP_SIZE
/
32
];
__shared__
int
sumBuffer
[
GROUP_SIZE
];
__shared__
int
worksgroupPairStartIndex
[
GROUP_SIZE
/
32
];
int
*
buffer
=
workgroupBuffer
+
BUFFER_SIZE
*
(
warpStart
/
32
);
int
*
flagsBuffer
=
workgroupFlagsBuffer
+
BUFFER_SIZE
*
(
warpStart
/
32
);
int
*
exclusionsForX
=
warpExclusions
+
MAX_EXCLUSIONS
*
(
warpStart
/
32
);
volatile
int
&
tileStartIndex
=
workgroupTileIndex
[
warpStart
/
32
];
volatile
int
&
pairStartIndex
=
worksgroupPairStartIndex
[
warpStart
/
32
];
// Loop over blocks.
...
...
@@ -227,7 +284,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
APPLY_PERIODIC_TO_DELTA
(
atomDelta
)
#endif
int
atomFlags
=
ballot
(
atomDelta
.
x
*
atomDelta
.
x
+
atomDelta
.
y
*
atomDelta
.
y
+
atomDelta
.
z
*
atomDelta
.
z
<
(
PADDED_CUTOFF
+
blockCenterY
.
w
)
*
(
PADDED_CUTOFF
+
blockCenterY
.
w
));
bool
interacts
=
false
;
int
interacts
=
0
;
if
(
atom2
<
NUM_ATOMS
&&
atomFlags
!=
0
)
{
int
first
=
__ffs
(
atomFlags
)
-
1
;
int
last
=
32
-
__clz
(
atomFlags
);
...
...
@@ -236,14 +293,14 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
for
(
int
j
=
first
;
j
<
last
;
j
++
)
{
real3
delta
=
pos2
-
posBuffer
[
warpStart
+
j
];
APPLY_PERIODIC_TO_DELTA
(
delta
)
interacts
|=
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
<
PADDED_CUTOFF_SQUARED
);
interacts
|=
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
<
PADDED_CUTOFF_SQUARED
?
1
<<
j
:
0
);
}
}
else
{
#endif
for
(
int
j
=
first
;
j
<
last
;
j
++
)
{
real3
delta
=
pos2
-
posBuffer
[
warpStart
+
j
];
interacts
|=
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
<
PADDED_CUTOFF_SQUARED
);
interacts
|=
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
<
PADDED_CUTOFF_SQUARED
?
1
<<
j
:
0
);
}
#ifdef USE_PERIODIC
}
...
...
@@ -253,15 +310,22 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
// Add any interacting atoms to the buffer.
int
includeAtomFlags
=
__ballot
(
interacts
);
if
(
interacts
)
buffer
[
neighborsInBuffer
+
__popc
(
includeAtomFlags
&
warpMask
)]
=
atom2
;
if
(
interacts
)
{
int
index
=
neighborsInBuffer
+
__popc
(
includeAtomFlags
&
warpMask
);
buffer
[
index
]
=
atom2
;
flagsBuffer
[
index
]
=
interacts
;
}
neighborsInBuffer
+=
__popc
(
includeAtomFlags
);
if
(
neighborsInBuffer
>
BUFFER_SIZE
-
TILE_SIZE
)
{
// Store the new tiles to memory.
#if MAX_BITS_FOR_PAIRS > 0
neighborsInBuffer
=
saveSinglePairs
(
x
,
buffer
,
flagsBuffer
,
neighborsInBuffer
,
maxSinglePairs
,
&
interactionCount
[
1
],
singlePairs
,
sumBuffer
+
warpStart
,
pairStartIndex
);
#endif
int
tilesToStore
=
neighborsInBuffer
/
TILE_SIZE
;
if
(
tilesToStore
>
0
)
{
if
(
indexInWarp
==
0
)
tileStartIndex
=
atomicAdd
(
interactionCount
,
tilesToStore
);
tileStartIndex
=
atomicAdd
(
&
interactionCount
[
0
]
,
tilesToStore
);
int
newTileStartIndex
=
tileStartIndex
;
if
(
newTileStartIndex
+
tilesToStore
<=
maxTiles
)
{
if
(
indexInWarp
<
tilesToStore
)
...
...
@@ -274,13 +338,18 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
}
}
}
}
// If we have a partially filled buffer, store it to memory.
#if MAX_BITS_FOR_PAIRS > 0
if
(
neighborsInBuffer
>
32
)
neighborsInBuffer
=
saveSinglePairs
(
x
,
buffer
,
flagsBuffer
,
neighborsInBuffer
,
maxSinglePairs
,
&
interactionCount
[
1
],
singlePairs
,
sumBuffer
+
warpStart
,
pairStartIndex
);
#endif
if
(
neighborsInBuffer
>
0
)
{
int
tilesToStore
=
(
neighborsInBuffer
+
TILE_SIZE
-
1
)
/
TILE_SIZE
;
if
(
indexInWarp
==
0
)
tileStartIndex
=
atomicAdd
(
interactionCount
,
tilesToStore
);
tileStartIndex
=
atomicAdd
(
&
interactionCount
[
0
]
,
tilesToStore
);
int
newTileStartIndex
=
tileStartIndex
;
if
(
newTileStartIndex
+
tilesToStore
<=
maxTiles
)
{
if
(
indexInWarp
<
tilesToStore
)
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
669e314e
...
...
@@ -103,9 +103,10 @@ extern "C" __global__ void computeNonbonded(
unsigned
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
tileflags
*
__restrict__
exclusions
,
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
#ifdef USE_CUTOFF
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
unsigned
int
maxSinglePairs
,
const
int2
*
__restrict__
singlePairs
#endif
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
...
...
@@ -278,10 +279,10 @@ extern "C" __global__ void computeNonbonded(
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
#endif
#endif // end USE_SYMMETRIC
#endif
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
#ifdef USE_EXCLUSIONS
excl
>>=
1
;
#endif
...
...
@@ -484,9 +485,9 @@ extern "C" __global__ void computeNonbonded(
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
#endif
#endif // end USE_SYMMETRIC
#endif
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
...
...
@@ -555,9 +556,9 @@ extern "C" __global__ void computeNonbonded(
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
#endif
#endif // end USE_SYMMETRIC
#endif
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
...
...
@@ -588,6 +589,59 @@ extern "C" __global__ void computeNonbonded(
}
pos
++
;
}
// Third loop: single pairs that aren't part of a tile.
#if USE_CUTOFF
const
unsigned
int
numPairs
=
interactionCount
[
1
];
if
(
numPairs
>
maxSinglePairs
)
return
;
// There wasn't enough memory for the neighbor list.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
numPairs
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int2
pair
=
singlePairs
[
i
];
int
atom1
=
pair
.
x
;
int
atom2
=
pair
.
y
;
real4
posq1
=
posq
[
atom1
];
real4
posq2
=
posq
[
atom2
];
LOAD_ATOM1_PARAMETERS
int
j
=
atom2
;
atom2
=
threadIdx
.
x
;
DECLARE_LOCAL_PARAMETERS
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_ATOM2_PARAMETERS
atom2
=
pair
.
y
;
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
invR
=
RSQRT
(
r2
);
real
r
=
r2
*
invR
;
#ifdef USE_SYMMETRIC
real
dEdR
=
0.0
f
;
#else
real3
dEdR1
=
make_real3
(
0
);
real3
dEdR2
=
make_real3
(
0
);
#endif
bool
hasExclusions
=
false
;
bool
isExcluded
=
false
;
real
tempEnergy
=
0.0
f
;
const
real
interactionScale
=
1.0
f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
real3
dEdR1
=
delta
*
dEdR
;
real3
dEdR2
=
-
dEdR1
;
#endif
atomicAdd
(
&
forceBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
dEdR1
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
dEdR1
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
dEdR1
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
dEdR2
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
dEdR2
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
dEdR2
.
z
*
0x100000000
)));
#endif
}
#endif
#ifdef INCLUDE_ENERGY
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
#endif
...
...
platforms/reference/src/SimTKReference/ReferenceVariableStochasticDynamics.cpp
View file @
669e314e
...
...
@@ -266,11 +266,11 @@ void ReferenceVariableStochasticDynamics::update(const OpenMM::System& system, v
// copy xPrime -> atomCoordinates
RealOpenMM
invStepSize
=
1.0
/
getDeltaT
();
for
(
int
ii
=
0
;
ii
<
numberOfAtoms
;
ii
++
)
{
if
(
masses
[
ii
]
!=
0.0
)
{
atomCoordinates
[
ii
][
0
]
=
xPrime
[
ii
][
0
];
atomCoordinates
[
ii
][
1
]
=
xPrime
[
ii
][
1
];
atomCoordinates
[
ii
][
2
]
=
xPrime
[
ii
][
2
];
velocities
[
ii
]
=
(
xPrime
[
ii
]
-
atomCoordinates
[
ii
])
*
invStepSize
;
atomCoordinates
[
ii
]
=
xPrime
[
ii
];
}
}
...
...
wrappers/python/src/swig_doxygen/swig_lib/python/extend.i
View file @
669e314e
...
...
@@ -28,6 +28,7 @@
int
getForces
,
int
getEnergy
,
int
getParameters
,
int
getParameterDerivatives
,
int
enforcePeriodic
,
bitmask32t
groups
)
{
State
state
;
...
...
@@ -38,6 +39,7 @@
if
(
getForces
)
types
|=
State
::
Forces
;
if
(
getEnergy
)
types
|=
State
::
Energy
;
if
(
getParameters
)
types
|=
State
::
Parameters
;
if
(
getParameterDerivatives
)
types
|=
State
::
ParameterDerivatives
;
try
{
state
=
self-
>
getState
(
types
,
enforcePeriodic
,
groups
)
;
}
...
...
@@ -53,7 +55,7 @@
%
pythoncode
%
{
def
getState
(
self
,
getPositions
=
False
,
getVelocities
=
False
,
getForces
=
False
,
getEnergy
=
False
,
getParameters
=
False
,
enforcePeriodicBox
=
False
,
groups
=-
1
)
:
getParameterDerivatives
=
False
,
enforcePeriodicBox
=
False
,
groups
=-
1
)
:
"""Get a State object recording the current state information stored in this context.
Parameters
...
...
@@ -66,8 +68,10 @@
whether to store the forces acting on particles in the State
getEnergy : bool=False
whether to store potential and kinetic energy in the State
getParameter : bool=False
getParameter
s
: bool=False
whether to store context parameters in the State
getParameterDerivatives : bool=False
whether to store parameter derivatives in the State
enforcePeriodicBox : bool=False
if false, the position of each particle will be whatever position
is stored in the Context, regardless of periodic boundary conditions.
...
...
@@ -79,9 +83,9 @@
can also be passed as an unsigned integer interpreted as a bitmask,
in which case group i will be included if (groups&(1<<i)) != 0.
"""
getP
,
getV
,
getF
,
getE
,
getPa
,
enforcePeriodic
=
map
(
bool
,
getP
,
getV
,
getF
,
getE
,
getPa
,
getPd
,
enforcePeriodic
=
map
(
bool
,
(
getPositions
,
getVelocities
,
getForces
,
getEnergy
,
getParameters
,
enforcePeriodicBox
))
getParameterDerivatives
,
enforcePeriodicBox
))
try
:
#
is
the
input
integer-like
?
...
...
@@ -95,8 +99,8 @@
raise
TypeError
(
'
%
s
is
neither
an
int
nor
set
'
%
groups
)
(
simTime
,
periodicBoxVectorsList
,
energy
,
coordList
,
velList
,
forceList
,
paramMap
)
=
\
self
.
_getStateAsLists
(
getP
,
getV
,
getF
,
getE
,
getPa
,
enforcePeriodic
,
groups_mask
)
forceList
,
paramMap
,
paramDerivMap
)
=
\
self
.
_getStateAsLists
(
getP
,
getV
,
getF
,
getE
,
getPa
,
getPd
,
enforcePeriodic
,
groups_mask
)
state
=
State
(
simTime
=
simTime
,
energy
=
energy
,
...
...
@@ -104,7 +108,8 @@
velList
=
velList
,
forceList
=
forceList
,
periodicBoxVectorsList
=
periodicBoxVectorsList
,
paramMap
=
paramMap
)
paramMap
=
paramMap
,
paramDerivMap
=
paramDerivMap
)
return
state
def
setState
(
self
,
state
)
:
...
...
@@ -176,6 +181,7 @@ Parameters:
int
getForces
,
int
getEnergy
,
int
getParameters
,
int
getParameterDerivatives
,
int
enforcePeriodic
,
int
groups
)
{
State
state
;
...
...
@@ -186,6 +192,7 @@ Parameters:
if
(
getForces
)
types
|=
State
::
Forces
;
if
(
getEnergy
)
types
|=
State
::
Energy
;
if
(
getParameters
)
types
|=
State
::
Parameters
;
if
(
getParameterDerivatives
)
types
|=
State
::
ParameterDerivatives
;
try
{
state
=
self-
>
getState
(
copy
,
types
,
enforcePeriodic
,
groups
)
;
}
...
...
@@ -206,6 +213,7 @@ Parameters:
getForces
=
False
,
getEnergy
=
False
,
getParameters
=
False
,
getParameterDerivatives
=
False
,
enforcePeriodicBox
=
False
,
groups
=-
1
)
:
"""Get a State object recording the current state information about one copy of the system.
...
...
@@ -222,8 +230,10 @@ Parameters:
whether to store the forces acting on particles in the State
getEnergy : bool=False
whether to store potential and kinetic energy in the State
getParameter : bool=False
getParameter
s
: bool=False
whether to store context parameters in the State
getParameterDerivatives : bool=False
whether to store parameter derivatives in the State
enforcePeriodicBox : bool=False
if false, the position of each particle will be whatever position
is stored in the Context, regardless of periodic boundary conditions.
...
...
@@ -235,9 +245,9 @@ Parameters:
can also be passed as an unsigned integer interpreted as a bitmask,
in which case group i will be included if (groups&(1<<i)) != 0.
"""
getP
,
getV
,
getF
,
getE
,
getPa
,
enforcePeriodic
=
map
(
bool
,
getP
,
getV
,
getF
,
getE
,
getPa
,
getPd
,
enforcePeriodic
=
map
(
bool
,
(
getPositions
,
getVelocities
,
getForces
,
getEnergy
,
getParameters
,
enforcePeriodicBox
))
getParameterDerivatives
,
enforcePeriodicBox
))
try
:
#
is
the
input
integer-like
?
...
...
@@ -250,8 +260,8 @@ Parameters:
raise
TypeError
(
'
%
s
is
neither
an
int
nor
set
'
%
groups
)
(
simTime
,
periodicBoxVectorsList
,
energy
,
coordList
,
velList
,
forceList
,
paramMap
)
=
\
self
.
_getStateAsLists
(
copy
,
getP
,
getV
,
getF
,
getE
,
getPa
,
enforcePeriodic
,
groups_mask
)
forceList
,
paramMap
,
paramDerivMap
)
=
\
self
.
_getStateAsLists
(
getP
,
getV
,
getF
,
getE
,
getPa
,
getPd
,
enforcePeriodic
,
groups_mask
)
state
=
State
(
simTime
=
simTime
,
energy
=
energy
,
...
...
@@ -259,7 +269,8 @@ Parameters:
velList
=
velList
,
forceList
=
forceList
,
periodicBoxVectorsList
=
periodicBoxVectorsList
,
paramMap
=
paramMap
)
paramMap
=
paramMap
,
paramDerivMap
=
paramDerivMap
)
return
state
%
}
}
...
...
@@ -361,8 +372,9 @@ Parameters:
double
time
,
const
std
::
vector
<
Vec3
>
&
boxVectors
,
const
std
::
map
<
string
,
double
>
&
params
,
const
std
::
map
<
string
,
double
>
&
paramDerivs
,
int
types
)
{
OpenMM
::
State
myState
=
_convertListsToState
(
pos
,
vel
,
forces
,
kineticEnergy
,
potentialEnergy
,
time
,
boxVectors
,
params
,
types
)
;
OpenMM
::
State
myState
=
_convertListsToState
(
pos
,
vel
,
forces
,
kineticEnergy
,
potentialEnergy
,
time
,
boxVectors
,
params
,
paramDerivs
,
types
)
;
std
::
stringstream
buffer
;
OpenMM
::
XmlSerializer
::
serialize
<
OpenMM
::
State
>
(
&
myState,
"State"
,
buffer);
return
buffer
.
str
()
;
...
...
@@ -386,6 +398,7 @@ Parameters:
kineticEnergy
=
0.0
potentialEnergy
=
0.0
params
=
{}
paramDerivs
=
{}
types
=
0
try
:
positions
=
pythonState
.
getPositions
()
.
value_in_unit
(
unit
.
nanometers
)
...
...
@@ -413,16 +426,21 @@ Parameters:
types
|=
16
except
:
pass
try
:
params
=
pythonState
.
getEnergyParameterDerivatives
()
types
|=
32
except
:
pass
time
=
pythonState
.
getTime
()
.
value_in_unit
(
unit
.
picoseconds
)
boxVectors
=
pythonState
.
getPeriodicBoxVectors
()
.
value_in_unit
(
unit
.
nanometers
)
string
=
XmlSerializer
.
_serializeStateAsLists
(
positions
,
velocities
,
forces
,
kineticEnergy
,
potentialEnergy
,
time
,
boxVectors
,
params
,
types
)
string
=
XmlSerializer
.
_serializeStateAsLists
(
positions
,
velocities
,
forces
,
kineticEnergy
,
potentialEnergy
,
time
,
boxVectors
,
params
,
paramDerivs
,
types
)
return
string
@
staticmethod
def
_deserializeState
(
pythonString
)
:
(
simTime
,
periodicBoxVectorsList
,
energy
,
coordList
,
velList
,
forceList
,
paramMap
)
=
XmlSerializer
.
_deserializeStringIntoLists
(
pythonString
)
forceList
,
paramMap
,
paramDerivMap
)
=
XmlSerializer
.
_deserializeStringIntoLists
(
pythonString
)
state
=
State
(
simTime
=
simTime
,
energy
=
energy
,
...
...
@@ -430,7 +448,8 @@ Parameters:
velList
=
velList
,
forceList
=
forceList
,
periodicBoxVectorsList
=
periodicBoxVectorsList
,
paramMap
=
paramMap
)
paramMap
=
paramMap
,
paramDerivMap
=
paramDerivMap
)
return
state
@
staticmethod
...
...
wrappers/python/src/swig_doxygen/swig_lib/python/header.i
View file @
669e314e
...
...
@@ -28,6 +28,7 @@ State _convertListsToState( const std::vector<Vec3> &pos,
double
time
,
const
std
::
vector
<
Vec3
>
&
boxVectors,
const
std
::
map
<
std
::
string
,
double
>
&
params,
const
std
::
map
<
std
::
string
,
double
>
&
paramDerivs,
int
types
)
{
State
::
StateBuilder
sb
(
time
)
;
if
(
types
&
State
::
Positions
)
...
...
@@ -40,6 +41,8 @@ State _convertListsToState( const std::vector<Vec3> &pos,
sb
.
setEnergy
(
kineticEnergy
,
potentialEnergy
)
;
if
(
types
&
State
::
Parameters
)
sb
.
setParameters
(
params
)
;
if
(
types
&
State
::
ParameterDerivatives
)
sb
.
setEnergyParameterDerivatives
(
paramDerivs
)
;
sb
.
setPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
])
;
return
sb
.
getState
()
;
}
...
...
@@ -53,6 +56,7 @@ PyObject *_convertStateToLists(const State& state) {
PyObject
*
pForces
;
PyObject
*
pyTuple
;
PyObject
*
pParameters
;
PyObject
*
pParameterDerivs
;
simTime
=
state
.
getTime
()
;
OpenMM
::
Vec3
myVecA
;
...
...
@@ -112,11 +116,21 @@ PyObject *_convertStateToLists(const State& state) {
pParameters
=
Py_None
;
Py_INCREF
(
Py_None
)
;
}
try
{
pParameterDerivs
=
PyDict_New
()
;
const
std
::
map
<
std
::
string
,
double
>
&
params
=
state
.
getEnergyParameterDerivatives
()
;
for
(
std
::
map
<
std
::
string
,
double
>::
const_iterator
iter
=
params
.
begin
()
;
iter
!=
params
.
end
()
;
++
iter
)
PyDict_SetItemString
(
pParameterDerivs
,
iter-
>
first
.
c_str
(),
Py_BuildValue
(
"d"
,
iter-
>
second
))
;
}
catch
(
std
::
exception&
ex
)
{
pParameterDerivs
=
Py_None
;
Py_INCREF
(
Py_None
)
;
}
pyTuple
=
Py_BuildValue
(
"(d,N,N,N,N,N,N)"
,
pyTuple
=
Py_BuildValue
(
"(d,N,N,N,N,N,N
,N
)"
,
simTime
,
pPeriodicBoxVectorsList
,
pEnergy
,
pPositions
,
pVelocities
,
pForces
,
pParameters
)
;
pForces
,
pParameters
,
pParameterDerivs
)
;
return
pyTuple
;
}
...
...
wrappers/python/src/swig_doxygen/swig_lib/python/pythoncode.i
View file @
669e314e
...
...
@@ -63,7 +63,8 @@ class State(_object):
velList
=
None
,
forceList
=
None
,
periodicBoxVectorsList
=
None
,
paramMap
=
None
)
:
paramMap
=
None
,
paramDerivMap
=
None
)
:
self
.
_simTime
=
simTime
self
.
_periodicBoxVectorsList
=
periodicBoxVectorsList
self
.
_periodicBoxVectorsListNumpy
=
None
...
...
@@ -80,6 +81,7 @@ class State(_object):
self
.
_forceList
=
forceList
self
.
_forceListNumpy
=
None
self
.
_paramMap
=
paramMap
self
.
_paramDerivMap
=
paramDerivMap
def
__getstate__
(
self
)
:
serializationString
=
XmlSerializer
.
serialize
(
self
)
...
...
@@ -221,6 +223,18 @@ class State(_object):
raise
TypeError
(
'
Parameters
were
not
requested
in
getState
()
call
,
so
are
not
available
.'
)
return
self
.
_paramMap
def
getEnergyParameterDerivatives
(
self
)
:
"""Get a map containing derivatives of the potential energy with respect to context parameters.
In most cases derivatives are only calculated if the corresponding Force objects have been
specifically told to compute them. Otherwise, the values in the map will be zero. Likewise,
if multiple Forces depend on the same parameter but only some have been told to compute
derivatives with respect to it, the returned value will include only the contributions from
the Forces that were told to compute it."""
if
self
.
_paramDerivMap
is
None
:
raise
TypeError
(
'
Parameter
derivatives
were
not
requested
in
getState
()
call
,
so
are
not
available
.'
)
return
self
.
_paramDerivMap
%
}
%
pythonappend
OpenMM
::
Context
::
Context
%
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment