Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b8c86406
Commit
b8c86406
authored
Feb 12, 2018
by
Peter Eastman
Browse files
Began converting CudaArrays.
parent
b33ee3b0
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
466 additions
and
539 deletions
+466
-539
platforms/cuda/include/CudaArray.h
platforms/cuda/include/CudaArray.h
+37
-2
platforms/cuda/include/CudaBondedUtilities.h
platforms/cuda/include/CudaBondedUtilities.h
+2
-3
platforms/cuda/include/CudaContext.h
platforms/cuda/include/CudaContext.h
+18
-18
platforms/cuda/include/CudaIntegrationUtilities.h
platforms/cuda/include/CudaIntegrationUtilities.h
+33
-33
platforms/cuda/include/CudaNonbondedUtilities.h
platforms/cuda/include/CudaNonbondedUtilities.h
+27
-27
platforms/cuda/include/CudaSort.h
platforms/cuda/include/CudaSort.h
+6
-6
platforms/cuda/src/CudaArray.cpp
platforms/cuda/src/CudaArray.cpp
+47
-13
platforms/cuda/src/CudaBondedUtilities.cpp
platforms/cuda/src/CudaBondedUtilities.cpp
+7
-13
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+54
-73
platforms/cuda/src/CudaIntegrationUtilities.cpp
platforms/cuda/src/CudaIntegrationUtilities.cpp
+154
-220
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+66
-105
platforms/cuda/src/CudaSort.cpp
platforms/cuda/src/CudaSort.cpp
+15
-26
No files found.
platforms/cuda/include/CudaArray.h
View file @
b8c86406
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
2
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -57,6 +57,11 @@ public:
static
CudaArray
*
create
(
CudaContext
&
context
,
int
size
,
const
std
::
string
&
name
)
{
return
new
CudaArray
(
context
,
size
,
sizeof
(
T
),
name
);
}
/**
* Create an uninitialized CudaArray object. It does not point to any device memory,
* and cannot be used until initialize() is called on it.
*/
CudaArray
();
/**
* Create a CudaArray object.
*
...
...
@@ -67,6 +72,36 @@ public:
*/
CudaArray
(
CudaContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
);
~
CudaArray
();
/**
* Initialize this object.
*
* @param context the context for which to create the array
* @param size the number of elements in the array
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
void
initialize
(
CudaContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
);
/**
* Initialize this object. The template argument is the data type of each array element.
*
* @param context the context for which to create the array
* @param size the number of elements in the array
* @param name the name of the array
*/
template
<
class
T
>
void
initialize
(
CudaContext
&
context
,
int
size
,
const
std
::
string
&
name
)
{
initialize
(
context
,
size
,
sizeof
(
T
),
name
);
}
/**
* Recreate the internal storage to have a different size.
*/
void
resize
(
int
size
);
/**
* Get whether this array has been initialized.
*/
bool
isInitialized
()
const
{
return
(
pointer
!=
0
);
}
/**
* Get the number of elements in the array.
*/
...
...
@@ -134,7 +169,7 @@ public:
*/
void
copyTo
(
CudaArray
&
dest
)
const
;
private:
CudaContext
&
context
;
CudaContext
*
context
;
CUdeviceptr
pointer
;
int
size
,
elementSize
;
bool
ownsMemory
;
...
...
platforms/cuda/include/CudaBondedUtilities.h
View file @
b8c86406
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -81,7 +81,6 @@ namespace OpenMM {
class
OPENMM_EXPORT_CUDA
CudaBondedUtilities
{
public:
CudaBondedUtilities
(
CudaContext
&
context
);
~
CudaBondedUtilities
();
/**
* Add a bonded interaction.
*
...
...
@@ -136,7 +135,7 @@ private:
std
::
vector
<
int
>
forceGroup
;
std
::
vector
<
CUdeviceptr
>
arguments
;
std
::
vector
<
std
::
string
>
argTypes
;
std
::
vector
<
std
::
vector
<
CudaArray
*
>
>
atomIndices
;
std
::
vector
<
std
::
vector
<
CudaArray
>
>
atomIndices
;
std
::
vector
<
std
::
string
>
prefixCode
;
std
::
vector
<
std
::
string
>
energyParameterDerivatives
;
std
::
vector
<
void
*>
kernelArgs
;
...
...
platforms/cuda/include/CudaContext.h
View file @
b8c86406
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -41,6 +41,7 @@
#include <builtin_types.h>
#include <vector_functions.h>
#include "windowsExportCuda.h"
#include "CudaArray.h"
#include "CudaPlatform.h"
#include "openmm/Kernel.h"
...
...
@@ -48,7 +49,6 @@ typedef unsigned int tileflags;
namespace
OpenMM
{
class
CudaArray
;
class
CudaForceInfo
;
class
CudaExpressionUtilities
;
class
CudaIntegrationUtilities
;
...
...
@@ -152,37 +152,37 @@ public:
* Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
*/
CudaArray
&
getPosq
()
{
return
*
posq
;
return
posq
;
}
/**
* Get the array which contains a correction to the position of each atom. This only exists if getUseMixedPrecision() returns true.
*/
CudaArray
&
getPosqCorrection
()
{
return
*
posqCorrection
;
return
posqCorrection
;
}
/**
* Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom.
*/
CudaArray
&
getVelm
()
{
return
*
velm
;
return
velm
;
}
/**
* Get the array which contains the force on each atom (represented as three long longs in 64 bit fixed point).
*/
CudaArray
&
getForce
()
{
return
*
force
;
return
force
;
}
/**
* Get the array which contains the buffer in which energy is computed.
*/
CudaArray
&
getEnergyBuffer
()
{
return
*
energyBuffer
;
return
energyBuffer
;
}
/**
* Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
*/
CudaArray
&
getEnergyParamDerivBuffer
()
{
return
*
energyParamDerivBuffer
;
return
energyParamDerivBuffer
;
}
/**
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
...
...
@@ -201,7 +201,7 @@ public:
* Get the array which contains the index of each atom.
*/
CudaArray
&
getAtomIndexArray
()
{
return
*
atomIndexDevice
;
return
atomIndexDevice
;
}
/**
* Get the number of cells by which the positions are offset.
...
...
@@ -649,15 +649,15 @@ private:
std
::
vector
<
MoleculeGroup
>
moleculeGroups
;
std
::
vector
<
int4
>
posCellOffsets
;
void
*
pinnedBuffer
;
CudaArray
*
posq
;
CudaArray
*
posqCorrection
;
CudaArray
*
velm
;
CudaArray
*
force
;
CudaArray
*
energyBuffer
;
CudaArray
*
energySum
;
CudaArray
*
energyParamDerivBuffer
;
CudaArray
*
atomIndexDevice
;
CudaArray
*
chargeBuffer
;
CudaArray
posq
;
CudaArray
posqCorrection
;
CudaArray
velm
;
CudaArray
force
;
CudaArray
energyBuffer
;
CudaArray
energySum
;
CudaArray
energyParamDerivBuffer
;
CudaArray
atomIndexDevice
;
CudaArray
chargeBuffer
;
std
::
vector
<
std
::
string
>
energyParamDerivNames
;
std
::
map
<
std
::
string
,
double
>
energyParamDerivWorkspace
;
std
::
vector
<
int
>
atomIndex
;
...
...
platforms/cuda/include/CudaIntegrationUtilities.h
View file @
b8c86406
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -47,20 +47,20 @@ public:
* Get the array which contains position deltas.
*/
CudaArray
&
getPosDelta
()
{
return
*
posDelta
;
return
posDelta
;
}
/**
* Get the array which contains random values. Each element is a float4, whose components
* are independent, normally distributed random numbers with mean 0 and variance 1.
*/
CudaArray
&
getRandom
()
{
return
*
random
;
return
random
;
}
/**
* Get the array which contains the current step size.
*/
CudaArray
&
getStepSize
()
{
return
*
stepSize
;
return
stepSize
;
}
/**
* Set the size to use for the next step.
...
...
@@ -131,38 +131,38 @@ private:
CUfunction
ccmaUpdateKernel
;
CUfunction
vsitePositionKernel
,
vsiteForceKernel
;
CUfunction
randomKernel
,
timeShiftKernel
;
CudaArray
*
posDelta
;
CudaArray
*
settleAtoms
;
CudaArray
*
settleParams
;
CudaArray
*
shakeAtoms
;
CudaArray
*
shakeParams
;
CudaArray
*
random
;
CudaArray
*
randomSeed
;
CudaArray
*
stepSize
;
CudaArray
*
ccmaAtoms
;
CudaArray
*
ccmaDistance
;
CudaArray
*
ccmaReducedMass
;
CudaArray
*
ccmaAtomConstraints
;
CudaArray
*
ccmaNumAtomConstraints
;
CudaArray
*
ccmaConstraintMatrixColumn
;
CudaArray
*
ccmaConstraintMatrixValue
;
CudaArray
*
ccmaDelta1
;
CudaArray
*
ccmaDelta2
;
CudaArray
*
ccmaConverged
;
CudaArray
posDelta
;
CudaArray
settleAtoms
;
CudaArray
settleParams
;
CudaArray
shakeAtoms
;
CudaArray
shakeParams
;
CudaArray
random
;
CudaArray
randomSeed
;
CudaArray
stepSize
;
CudaArray
ccmaAtoms
;
CudaArray
ccmaDistance
;
CudaArray
ccmaReducedMass
;
CudaArray
ccmaAtomConstraints
;
CudaArray
ccmaNumAtomConstraints
;
CudaArray
ccmaConstraintMatrixColumn
;
CudaArray
ccmaConstraintMatrixValue
;
CudaArray
ccmaDelta1
;
CudaArray
ccmaDelta2
;
CudaArray
ccmaConverged
;
int
*
ccmaConvergedMemory
;
CUdeviceptr
ccmaConvergedDeviceMemory
;
CUevent
ccmaEvent
;
CudaArray
*
vsite2AvgAtoms
;
CudaArray
*
vsite2AvgWeights
;
CudaArray
*
vsite3AvgAtoms
;
CudaArray
*
vsite3AvgWeights
;
CudaArray
*
vsiteOutOfPlaneAtoms
;
CudaArray
*
vsiteOutOfPlaneWeights
;
CudaArray
*
vsiteLocalCoordsIndex
;
CudaArray
*
vsiteLocalCoordsAtoms
;
CudaArray
*
vsiteLocalCoordsWeights
;
CudaArray
*
vsiteLocalCoordsPos
;
CudaArray
*
vsiteLocalCoordsStartIndex
;
CudaArray
vsite2AvgAtoms
;
CudaArray
vsite2AvgWeights
;
CudaArray
vsite3AvgAtoms
;
CudaArray
vsite3AvgWeights
;
CudaArray
vsiteOutOfPlaneAtoms
;
CudaArray
vsiteOutOfPlaneWeights
;
CudaArray
vsiteLocalCoordsIndex
;
CudaArray
vsiteLocalCoordsAtoms
;
CudaArray
vsiteLocalCoordsWeights
;
CudaArray
vsiteLocalCoordsPos
;
CudaArray
vsiteLocalCoordsStartIndex
;
int
randomPos
;
int
lastSeed
,
numVsites
;
double2
lastStepSize
;
...
...
platforms/cuda/include/CudaNonbondedUtilities.h
View file @
b8c86406
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -164,61 +164,61 @@ public:
* Get the array containing the center of each atom block.
*/
CudaArray
&
getBlockCenters
()
{
return
*
blockCenter
;
return
blockCenter
;
}
/**
* Get the array containing the dimensions of each atom block.
*/
CudaArray
&
getBlockBoundingBoxes
()
{
return
*
blockBoundingBox
;
return
blockBoundingBox
;
}
/**
* Get the array whose first element contains the number of tiles with interactions.
*/
CudaArray
&
getInteractionCount
()
{
return
*
interactionCount
;
return
interactionCount
;
}
/**
* Get the array containing tiles with interactions.
*/
CudaArray
&
getInteractingTiles
()
{
return
*
interactingTiles
;
return
interactingTiles
;
}
/**
* Get the array containing the atoms in each tile with interactions.
*/
CudaArray
&
getInteractingAtoms
()
{
return
*
interactingAtoms
;
return
interactingAtoms
;
}
/**
* Get the array containing single pairs in the neighbor list.
*/
CudaArray
&
getSinglePairs
()
{
return
*
singlePairs
;
return
singlePairs
;
}
/**
* Get the array containing exclusion flags.
*/
CudaArray
&
getExclusions
()
{
return
*
exclusions
;
return
exclusions
;
}
/**
* Get the array containing tiles with exclusions.
*/
CudaArray
&
getExclusionTiles
()
{
return
*
exclusionTiles
;
return
exclusionTiles
;
}
/**
* Get the array containing the index into the exclusion array for each tile.
*/
CudaArray
&
getExclusionIndices
()
{
return
*
exclusionIndices
;
return
exclusionIndices
;
}
/**
* Get the array listing where the exclusion data starts for each row.
*/
CudaArray
&
getExclusionRowIndices
()
{
return
*
exclusionRowIndices
;
return
exclusionRowIndices
;
}
/**
* Get the index of the first tile this context is responsible for processing.
...
...
@@ -270,22 +270,22 @@ private:
class
BlockSortTrait
;
CudaContext
&
context
;
std
::
map
<
int
,
KernelSet
>
groupKernels
;
CudaArray
*
exclusionTiles
;
CudaArray
*
exclusions
;
CudaArray
*
exclusionIndices
;
CudaArray
*
exclusionRowIndices
;
CudaArray
*
interactingTiles
;
CudaArray
*
interactingAtoms
;
CudaArray
*
interactionCount
;
CudaArray
*
singlePairs
;
CudaArray
*
singlePairCount
;
CudaArray
*
blockCenter
;
CudaArray
*
blockBoundingBox
;
CudaArray
*
sortedBlocks
;
CudaArray
*
sortedBlockCenter
;
CudaArray
*
sortedBlockBoundingBox
;
CudaArray
*
oldPositions
;
CudaArray
*
rebuildNeighborList
;
CudaArray
exclusionTiles
;
CudaArray
exclusions
;
CudaArray
exclusionIndices
;
CudaArray
exclusionRowIndices
;
CudaArray
interactingTiles
;
CudaArray
interactingAtoms
;
CudaArray
interactionCount
;
CudaArray
singlePairs
;
CudaArray
singlePairCount
;
CudaArray
blockCenter
;
CudaArray
blockBoundingBox
;
CudaArray
sortedBlocks
;
CudaArray
sortedBlockCenter
;
CudaArray
sortedBlockBoundingBox
;
CudaArray
oldPositions
;
CudaArray
rebuildNeighborList
;
CudaSort
*
blockSorter
;
CUevent
downloadCountEvent
;
int
*
pinnedCountBuffer
;
...
...
platforms/cuda/include/CudaSort.h
View file @
b8c86406
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010-201
2
Stanford University and the Authors. *
* Portions copyright (c) 2010-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -87,11 +87,11 @@ public:
private:
CudaContext
&
context
;
SortTrait
*
trait
;
CudaArray
*
dataRange
;
CudaArray
*
bucketOfElement
;
CudaArray
*
offsetInBucket
;
CudaArray
*
bucketOffset
;
CudaArray
*
buckets
;
CudaArray
dataRange
;
CudaArray
bucketOfElement
;
CudaArray
offsetInBucket
;
CudaArray
bucketOffset
;
CudaArray
buckets
;
CUfunction
shortListKernel
,
computeRangeKernel
,
assignElementsKernel
,
computeBucketPositionsKernel
,
copyToBucketsKernel
,
sortBucketsKernel
;
unsigned
int
dataLength
,
rangeKernelSize
,
positionsKernelSize
,
sortKernelSize
;
bool
isShortList
;
...
...
platforms/cuda/src/CudaArray.cpp
View file @
b8c86406
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012 Stanford University and the Authors.
*
* Portions copyright (c) 2012
-2018
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -32,18 +32,15 @@
using
namespace
OpenMM
;
CudaArray
::
CudaArray
(
CudaContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
)
:
context
(
context
),
size
(
size
),
elementSize
(
elementSize
),
name
(
name
),
ownsMemory
(
true
)
{
CUresult
result
=
cuMemAlloc
(
&
pointer
,
size
*
elementSize
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error creating array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
}
CudaArray
::
CudaArray
()
:
pointer
(
0
),
ownsMemory
(
false
)
{
}
CudaArray
::
CudaArray
(
CudaContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
)
:
pointer
(
0
)
{
initialize
(
context
,
size
,
elementSize
,
name
);
}
CudaArray
::~
CudaArray
()
{
if
(
ownsMemory
&&
context
.
getContextIsValid
())
{
if
(
pointer
!=
0
&&
ownsMemory
&&
context
->
getContextIsValid
())
{
CUresult
result
=
cuMemFree
(
pointer
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
...
...
@@ -53,12 +50,45 @@ CudaArray::~CudaArray() {
}
}
void
CudaArray
::
initialize
(
CudaContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
)
{
if
(
this
->
pointer
!=
0
)
throw
OpenMMException
(
"CudaArray has already been initialized"
);
this
->
context
=
&
context
;
this
->
size
=
size
;
this
->
elementSize
=
elementSize
;
this
->
name
=
name
;
ownsMemory
=
true
;
CUresult
result
=
cuMemAlloc
(
&
pointer
,
size
*
elementSize
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error creating array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
}
}
void
CudaArray
::
resize
(
int
size
)
{
if
(
pointer
==
0
)
throw
OpenMMException
(
"CudaArray has not been initialized"
);
if
(
!
ownsMemory
)
throw
OpenMMException
(
"Cannot resize an array that does not own its storage"
);
CUresult
result
=
cuMemFree
(
pointer
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error deleting array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
}
pointer
=
0
;
initialize
(
*
context
,
size
,
elementSize
,
name
);
}
void
CudaArray
::
upload
(
const
void
*
data
,
bool
blocking
)
{
if
(
pointer
==
0
)
throw
OpenMMException
(
"CudaArray has not been initialized"
);
CUresult
result
;
if
(
blocking
)
result
=
cuMemcpyHtoD
(
pointer
,
data
,
size
*
elementSize
);
else
result
=
cuMemcpyHtoDAsync
(
pointer
,
data
,
size
*
elementSize
,
context
.
getCurrentStream
());
result
=
cuMemcpyHtoDAsync
(
pointer
,
data
,
size
*
elementSize
,
context
->
getCurrentStream
());
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error uploading array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
...
...
@@ -67,11 +97,13 @@ void CudaArray::upload(const void* data, bool blocking) {
}
void
CudaArray
::
download
(
void
*
data
,
bool
blocking
)
const
{
if
(
pointer
==
0
)
throw
OpenMMException
(
"CudaArray has not been initialized"
);
CUresult
result
;
if
(
blocking
)
result
=
cuMemcpyDtoH
(
data
,
pointer
,
size
*
elementSize
);
else
result
=
cuMemcpyDtoHAsync
(
data
,
pointer
,
size
*
elementSize
,
context
.
getCurrentStream
());
result
=
cuMemcpyDtoHAsync
(
data
,
pointer
,
size
*
elementSize
,
context
->
getCurrentStream
());
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error downloading array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
...
...
@@ -80,9 +112,11 @@ void CudaArray::download(void* data, bool blocking) const {
}
void
CudaArray
::
copyTo
(
CudaArray
&
dest
)
const
{
if
(
pointer
==
0
)
throw
OpenMMException
(
"CudaArray has not been initialized"
);
if
(
dest
.
getSize
()
!=
size
||
dest
.
getElementSize
()
!=
elementSize
)
throw
OpenMMException
(
"Error copying array "
+
name
+
" to "
+
dest
.
getName
()
+
": The destination array does not match the size of the array"
);
CUresult
result
=
cuMemcpyDtoDAsync
(
dest
.
getDevicePointer
(),
pointer
,
size
*
elementSize
,
context
.
getCurrentStream
());
CUresult
result
=
cuMemcpyDtoDAsync
(
dest
.
getDevicePointer
(),
pointer
,
size
*
elementSize
,
context
->
getCurrentStream
());
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error copying array "
<<
name
<<
" to "
<<
dest
.
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
...
...
platforms/cuda/src/CudaBondedUtilities.cpp
View file @
b8c86406
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -37,12 +37,6 @@ using namespace std;
CudaBondedUtilities
::
CudaBondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
numForceBuffers
(
0
),
maxBonds
(
0
),
allGroups
(
0
),
hasInitializedKernels
(
false
)
{
}
CudaBondedUtilities
::~
CudaBondedUtilities
()
{
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
(
int
)
atomIndices
[
i
].
size
();
j
++
)
delete
atomIndices
[
i
][
j
];
}
void
CudaBondedUtilities
::
addInteraction
(
const
vector
<
vector
<
int
>
>&
atoms
,
const
string
&
source
,
int
group
)
{
if
(
atoms
.
size
()
>
0
)
{
forceAtoms
.
push_back
(
atoms
);
...
...
@@ -99,9 +93,9 @@ void CudaBondedUtilities::initialize(const System& system) {
for
(
int
atom
=
0
;
atom
<
width
;
atom
++
)
indexVec
[
bond
*
paddedWidth
+
atom
]
=
forceAtoms
[
i
][
bond
][
startAtom
+
atom
];
}
CudaArray
*
indices
=
new
CudaArray
(
context
,
numBonds
,
4
*
paddedWidth
,
"bondedIndices"
);
i
ndices
->
upload
(
&
indexVec
[
0
]
);
atomIndices
[
i
].
push_back
(
indices
);
atomIndices
[
i
].
push_back
(
CudaArray
()
);
atomI
ndices
[
i
].
back
().
initialize
(
context
,
numBonds
,
4
*
paddedWidth
,
"bondedIndices"
);
atomIndices
[
i
].
back
().
upload
(
&
indexVec
[
0
]
);
startAtom
+=
width
;
}
}
...
...
@@ -115,7 +109,7 @@ void CudaBondedUtilities::initialize(const System& system) {
s
<<
"extern
\"
C
\"
__global__ void computeBondedForces(unsigned long long* __restrict__ forceBuffer, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, int groups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ"
;
for
(
int
force
=
0
;
force
<
numForces
;
force
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
[
force
].
size
();
i
++
)
{
int
indexWidth
=
atomIndices
[
force
][
i
]
->
getElementSize
()
/
4
;
int
indexWidth
=
atomIndices
[
force
][
i
]
.
getElementSize
()
/
4
;
string
indexType
=
"uint"
+
context
.
intToString
(
indexWidth
);
s
<<
", const "
<<
indexType
<<
"* __restrict__ atomIndices"
<<
force
<<
"_"
<<
i
;
}
...
...
@@ -154,7 +148,7 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int
s
<<
"for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < "
<<
numBonds
<<
"; index += blockDim.x*gridDim.x) {
\n
"
;
int
startAtom
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
[
forceIndex
].
size
();
i
++
)
{
int
indexWidth
=
atomIndices
[
forceIndex
][
i
]
->
getElementSize
()
/
4
;
int
indexWidth
=
atomIndices
[
forceIndex
][
i
]
.
getElementSize
()
/
4
;
string
indexType
=
"uint"
+
context
.
intToString
(
indexWidth
);
s
<<
" "
<<
indexType
<<
" atoms"
<<
i
<<
" = atomIndices"
<<
forceIndex
<<
"_"
<<
i
<<
"[index];
\n
"
;
int
atomsToLoad
=
min
(
indexWidth
,
numAtoms
-
startAtom
);
...
...
@@ -191,7 +185,7 @@ void CudaBondedUtilities::computeInteractions(int groups) {
kernelArgs
.
push_back
(
context
.
getPeriodicBoxVecZPointer
());
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
(
int
)
atomIndices
[
i
].
size
();
j
++
)
kernelArgs
.
push_back
(
&
atomIndices
[
i
][
j
]
->
getDevicePointer
());
kernelArgs
.
push_back
(
&
atomIndices
[
i
][
j
]
.
getDevicePointer
());
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
kernelArgs
.
push_back
(
&
arguments
[
i
]);
if
(
energyParameterDerivatives
.
size
()
>
0
)
...
...
platforms/cuda/src/CudaContext.cpp
View file @
b8c86406
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -108,8 +108,7 @@ static int executeInWindows(const string &command) {
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
,
CudaContext
*
originalContext
)
:
system
(
system
),
currentStream
(
0
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
hasCompilerKernel
(
false
),
isNvccAvailable
(
false
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
force
(
NULL
),
energyBuffer
(
NULL
),
energySum
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
chargeBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
pinnedBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
// Determine what compiler to use.
this
->
compiler
=
"
\"
"
+
compiler
+
"
\"
"
;
...
...
@@ -268,8 +267,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines
[
"BALLOT(var)"
]
=
"__ballot(var);"
;
}
if
(
useDoublePrecision
)
{
posq
=
CudaArray
::
creat
e
<
double4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
CudaArray
::
creat
e
<
double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
posq
.
initializ
e
<
double4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
.
initializ
e
<
double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"USE_DOUBLE_PRECISION"
]
=
"1"
;
compilationDefines
[
"make_real2"
]
=
"make_double2"
;
compilationDefines
[
"make_real3"
]
=
"make_double3"
;
...
...
@@ -279,9 +278,9 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines
[
"make_mixed4"
]
=
"make_double4"
;
}
else
if
(
useMixedPrecision
)
{
posq
=
CudaArray
::
creat
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posqCorrection
=
CudaArray
::
creat
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"posqCorrection"
);
velm
=
CudaArray
::
creat
e
<
double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
posq
.
initializ
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posqCorrection
.
initializ
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"posqCorrection"
);
velm
.
initializ
e
<
double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"USE_MIXED_PRECISION"
]
=
"1"
;
compilationDefines
[
"make_real2"
]
=
"make_float2"
;
compilationDefines
[
"make_real3"
]
=
"make_float3"
;
...
...
@@ -291,8 +290,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines
[
"make_mixed4"
]
=
"make_double4"
;
}
else
{
posq
=
CudaArray
::
creat
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
CudaArray
::
creat
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
posq
.
initializ
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
.
initializ
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"make_real2"
]
=
"make_float2"
;
compilationDefines
[
"make_real3"
]
=
"make_float3"
;
compilationDefines
[
"make_real4"
]
=
"make_float4"
;
...
...
@@ -415,24 +414,6 @@ CudaContext::~CudaContext() {
delete
computation
;
if
(
pinnedBuffer
!=
NULL
)
cuMemFreeHost
(
pinnedBuffer
);
if
(
posq
!=
NULL
)
delete
posq
;
if
(
posqCorrection
!=
NULL
)
delete
posqCorrection
;
if
(
velm
!=
NULL
)
delete
velm
;
if
(
force
!=
NULL
)
delete
force
;
if
(
energyBuffer
!=
NULL
)
delete
energyBuffer
;
if
(
energySum
!=
NULL
)
delete
energySum
;
if
(
energyParamDerivBuffer
!=
NULL
)
delete
energyParamDerivBuffer
;
if
(
atomIndexDevice
!=
NULL
)
delete
atomIndexDevice
;
if
(
chargeBuffer
!=
NULL
)
delete
chargeBuffer
;
if
(
integration
!=
NULL
)
delete
integration
;
if
(
expression
!=
NULL
)
...
...
@@ -456,20 +437,20 @@ void CudaContext::initialize() {
string
errorMessage
=
"Error initializing Context"
;
int
numEnergyBuffers
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
if
(
useDoublePrecision
)
{
energyBuffer
=
CudaArray
::
creat
e
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
=
CudaArray
::
creat
e
<
double
>
(
*
this
,
1
,
"energySum"
);
energyBuffer
.
initializ
e
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
.
initializ
e
<
double
>
(
*
this
,
1
,
"energySum"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
else
if
(
useMixedPrecision
)
{
energyBuffer
=
CudaArray
::
creat
e
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
=
CudaArray
::
creat
e
<
double
>
(
*
this
,
1
,
"energySum"
);
energyBuffer
.
initializ
e
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
.
initializ
e
<
double
>
(
*
this
,
1
,
"energySum"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
else
{
energyBuffer
=
CudaArray
::
creat
e
<
float
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
=
CudaArray
::
creat
e
<
float
>
(
*
this
,
1
,
"energySum"
);
energyBuffer
.
initializ
e
<
float
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
.
initializ
e
<
float
>
(
*
this
,
1
,
"energySum"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
6
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
float
),
0
));
}
...
...
@@ -480,24 +461,24 @@ void CudaContext::initialize() {
else
((
float4
*
)
pinnedBuffer
)[
i
]
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
mass
==
0.0
?
0.0
f
:
(
float
)
(
1.0
/
mass
));
}
velm
->
upload
(
pinnedBuffer
);
velm
.
upload
(
pinnedBuffer
);
bonded
->
initialize
(
system
);
force
=
CudaArray
::
creat
e
<
long
long
>
(
*
this
,
paddedNumAtoms
*
3
,
"force"
);
addAutoclearBuffer
(
force
->
getDevicePointer
(),
force
->
getSize
()
*
force
->
getElementSize
());
addAutoclearBuffer
(
energyBuffer
->
getDevicePointer
(),
energyBuffer
->
getSize
()
*
energyBuffer
->
getElementSize
());
force
.
initializ
e
<
long
long
>
(
*
this
,
paddedNumAtoms
*
3
,
"force"
);
addAutoclearBuffer
(
force
.
getDevicePointer
(),
force
.
getSize
()
*
force
.
getElementSize
());
addAutoclearBuffer
(
energyBuffer
.
getDevicePointer
(),
energyBuffer
.
getSize
()
*
energyBuffer
.
getElementSize
());
int
numEnergyParamDerivs
=
energyParamDerivNames
.
size
();
if
(
numEnergyParamDerivs
>
0
)
{
if
(
useDoublePrecision
||
useMixedPrecision
)
energyParamDerivBuffer
=
CudaArray
::
creat
e
<
double
>
(
*
this
,
numEnergyParamDerivs
*
numEnergyBuffers
,
"energyParamDerivBuffer"
);
energyParamDerivBuffer
.
initializ
e
<
double
>
(
*
this
,
numEnergyParamDerivs
*
numEnergyBuffers
,
"energyParamDerivBuffer"
);
else
energyParamDerivBuffer
=
CudaArray
::
creat
e
<
float
>
(
*
this
,
numEnergyParamDerivs
*
numEnergyBuffers
,
"energyParamDerivBuffer"
);
addAutoclearBuffer
(
*
energyParamDerivBuffer
);
energyParamDerivBuffer
.
initializ
e
<
float
>
(
*
this
,
numEnergyParamDerivs
*
numEnergyBuffers
,
"energyParamDerivBuffer"
);
addAutoclearBuffer
(
energyParamDerivBuffer
);
}
atomIndexDevice
=
CudaArray
::
creat
e
<
int
>
(
*
this
,
paddedNumAtoms
,
"atomIndex"
);
atomIndexDevice
.
initializ
e
<
int
>
(
*
this
,
paddedNumAtoms
,
"atomIndex"
);
atomIndex
.
resize
(
paddedNumAtoms
);
for
(
int
i
=
0
;
i
<
paddedNumAtoms
;
++
i
)
atomIndex
[
i
]
=
i
;
atomIndexDevice
->
upload
(
atomIndex
);
atomIndexDevice
.
upload
(
atomIndex
);
findMoleculeGroups
();
nonbonded
->
initialize
(
system
);
}
...
...
@@ -890,11 +871,11 @@ void CudaContext::clearAutoclearBuffers() {
}
double
CudaContext
::
reduceEnergy
()
{
int
bufferSize
=
energyBuffer
->
getSize
();
int
bufferSize
=
energyBuffer
.
getSize
();
int
workGroupSize
=
512
;
void
*
args
[]
=
{
&
energyBuffer
->
getDevicePointer
(),
&
energySum
->
getDevicePointer
(),
&
bufferSize
,
&
workGroupSize
};
executeKernel
(
reduceEnergyKernel
,
args
,
workGroupSize
,
workGroupSize
,
workGroupSize
*
energyBuffer
->
getElementSize
());
energySum
->
download
(
pinnedBuffer
);
void
*
args
[]
=
{
&
energyBuffer
.
getDevicePointer
(),
&
energySum
.
getDevicePointer
(),
&
bufferSize
,
&
workGroupSize
};
executeKernel
(
reduceEnergyKernel
,
args
,
workGroupSize
,
workGroupSize
,
workGroupSize
*
energyBuffer
.
getElementSize
());
energySum
.
download
(
pinnedBuffer
);
if
(
getUseDoublePrecision
()
||
getUseMixedPrecision
())
return
*
((
double
*
)
pinnedBuffer
);
else
...
...
@@ -902,21 +883,21 @@ double CudaContext::reduceEnergy() {
}
void
CudaContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
if
(
chargeBuffer
==
NULL
)
chargeBuffer
=
new
CudaArray
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
if
(
!
chargeBuffer
.
isInitialized
()
)
chargeBuffer
.
initialize
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
if
(
getUseDoublePrecision
())
{
double
*
c
=
(
double
*
)
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
charges
.
size
();
i
++
)
c
[
i
]
=
charges
[
i
];
chargeBuffer
->
upload
(
c
);
chargeBuffer
.
upload
(
c
);
}
else
{
float
*
c
=
(
float
*
)
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
charges
.
size
();
i
++
)
c
[
i
]
=
(
float
)
charges
[
i
];
chargeBuffer
->
upload
(
c
);
chargeBuffer
.
upload
(
c
);
}
void
*
args
[]
=
{
&
chargeBuffer
->
getDevicePointer
(),
&
posq
->
getDevicePointer
(),
&
atomIndexDevice
->
getDevicePointer
(),
&
numAtoms
};
void
*
args
[]
=
{
&
chargeBuffer
.
getDevicePointer
(),
&
posq
.
getDevicePointer
(),
&
atomIndexDevice
.
getDevicePointer
(),
&
numAtoms
};
executeKernel
(
setChargesKernel
,
args
,
numAtoms
);
}
...
...
@@ -1178,16 +1159,16 @@ bool CudaContext::invalidateMolecules(CudaForceInfo* force) {
vector
<
double4
>
newPosq
(
paddedNumAtoms
,
make_double4
(
0
,
0
,
0
,
0
));
vector
<
double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
double4
>
newVelm
(
paddedNumAtoms
,
make_double4
(
0
,
0
,
0
,
0
));
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
posq
.
download
(
oldPosq
);
velm
.
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
posq
->
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
posq
.
upload
(
newPosq
);
velm
.
upload
(
newVelm
);
}
else
if
(
useMixedPrecision
)
{
vector
<
float4
>
oldPosq
(
paddedNumAtoms
);
...
...
@@ -1196,8 +1177,8 @@ bool CudaContext::invalidateMolecules(CudaForceInfo* force) {
vector
<
float4
>
newPosqCorrection
(
paddedNumAtoms
,
make_float4
(
0
,
0
,
0
,
0
));
vector
<
double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
double4
>
newVelm
(
paddedNumAtoms
,
make_double4
(
0
,
0
,
0
,
0
));
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
posq
.
download
(
oldPosq
);
velm
.
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
...
...
@@ -1205,31 +1186,31 @@ bool CudaContext::invalidateMolecules(CudaForceInfo* force) {
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
posq
->
upload
(
newPosq
);
posqCorrection
->
upload
(
newPosqCorrection
);
velm
->
upload
(
newVelm
);
posq
.
upload
(
newPosq
);
posqCorrection
.
upload
(
newPosqCorrection
);
velm
.
upload
(
newVelm
);
}
else
{
vector
<
float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
float4
>
newPosq
(
paddedNumAtoms
,
make_float4
(
0
,
0
,
0
,
0
));
vector
<
float4
>
oldVelm
(
paddedNumAtoms
);
vector
<
float4
>
newVelm
(
paddedNumAtoms
,
make_float4
(
0
,
0
,
0
,
0
));
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
posq
.
download
(
oldPosq
);
velm
.
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
posq
->
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
posq
.
upload
(
newPosq
);
velm
.
upload
(
newVelm
);
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
atomIndex
[
i
]
=
i
;
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
}
atomIndexDevice
->
upload
(
atomIndex
);
atomIndexDevice
.
upload
(
atomIndex
);
findMoleculeGroups
();
for
(
auto
listener
:
reorderListeners
)
listener
->
execute
();
...
...
@@ -1262,10 +1243,10 @@ void CudaContext::reorderAtomsImpl() {
vector
<
Real4
>
oldPosqCorrection
(
paddedNumAtoms
,
padding
);
Mixed4
paddingMixed
=
{
0
,
0
,
0
,
0
};
vector
<
Mixed4
>
oldVelm
(
paddedNumAtoms
,
paddingMixed
);
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
posq
.
download
(
oldPosq
);
velm
.
download
(
oldVelm
);
if
(
useMixedPrecision
)
posqCorrection
->
download
(
oldPosqCorrection
);
posqCorrection
.
download
(
oldPosqCorrection
);
Real
minx
=
oldPosq
[
0
].
x
,
maxx
=
oldPosq
[
0
].
x
;
Real
miny
=
oldPosq
[
0
].
y
,
maxy
=
oldPosq
[
0
].
y
;
Real
minz
=
oldPosq
[
0
].
z
,
maxz
=
oldPosq
[
0
].
z
;
...
...
@@ -1409,11 +1390,11 @@ void CudaContext::reorderAtomsImpl() {
atomIndex
[
i
]
=
originalIndex
[
i
];
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
}
posq
->
upload
(
newPosq
);
posq
.
upload
(
newPosq
);
if
(
useMixedPrecision
)
posqCorrection
->
upload
(
newPosqCorrection
);
velm
->
upload
(
newVelm
);
atomIndexDevice
->
upload
(
atomIndex
);
posqCorrection
.
upload
(
newPosqCorrection
);
velm
.
upload
(
newVelm
);
atomIndexDevice
.
upload
(
atomIndex
);
for
(
auto
listener
:
reorderListeners
)
listener
->
execute
();
}
...
...
platforms/cuda/src/CudaIntegrationUtilities.cpp
View file @
b8c86406
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -98,30 +98,24 @@ struct CudaIntegrationUtilities::ConstraintOrderer : public binary_function<int,
};
CudaIntegrationUtilities
::
CudaIntegrationUtilities
(
CudaContext
&
context
,
const
System
&
system
)
:
context
(
context
),
posDelta
(
NULL
),
settleAtoms
(
NULL
),
settleParams
(
NULL
),
shakeAtoms
(
NULL
),
shakeParams
(
NULL
),
random
(
NULL
),
randomSeed
(
NULL
),
randomPos
(
0
),
stepSize
(
NULL
),
ccmaAtoms
(
NULL
),
ccmaDistance
(
NULL
),
ccmaReducedMass
(
NULL
),
ccmaAtomConstraints
(
NULL
),
ccmaNumAtomConstraints
(
NULL
),
ccmaConstraintMatrixColumn
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConvergedMemory
(
NULL
),
vsite2AvgAtoms
(
NULL
),
vsite2AvgWeights
(
NULL
),
vsite3AvgAtoms
(
NULL
),
vsite3AvgWeights
(
NULL
),
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
),
vsiteLocalCoordsIndex
(
NULL
),
vsiteLocalCoordsAtoms
(
NULL
),
vsiteLocalCoordsWeights
(
NULL
),
vsiteLocalCoordsPos
(
NULL
),
vsiteLocalCoordsStartIndex
(
NULL
)
{
randomPos
(
0
)
{
// Create workspace arrays.
lastStepSize
=
make_double2
(
0.0
,
0.0
);
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
{
posDelta
=
CudaArray
::
creat
e
<
double4
>
(
context
,
context
.
getPaddedNumAtoms
(),
"posDelta"
);
vector
<
double4
>
deltas
(
posDelta
->
getSize
(),
make_double4
(
0.0
,
0.0
,
0.0
,
0.0
));
posDelta
->
upload
(
deltas
);
stepSize
=
CudaArray
::
creat
e
<
double2
>
(
context
,
1
,
"stepSize"
);
stepSize
->
upload
(
&
lastStepSize
);
posDelta
.
initializ
e
<
double4
>
(
context
,
context
.
getPaddedNumAtoms
(),
"posDelta"
);
vector
<
double4
>
deltas
(
posDelta
.
getSize
(),
make_double4
(
0.0
,
0.0
,
0.0
,
0.0
));
posDelta
.
upload
(
deltas
);
stepSize
.
initializ
e
<
double2
>
(
context
,
1
,
"stepSize"
);
stepSize
.
upload
(
&
lastStepSize
);
}
else
{
posDelta
=
CudaArray
::
creat
e
<
float4
>
(
context
,
context
.
getPaddedNumAtoms
(),
"posDelta"
);
vector
<
float4
>
deltas
(
posDelta
->
getSize
(),
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
));
posDelta
->
upload
(
deltas
);
stepSize
=
CudaArray
::
creat
e
<
float2
>
(
context
,
1
,
"stepSize"
);
posDelta
.
initializ
e
<
float4
>
(
context
,
context
.
getPaddedNumAtoms
(),
"posDelta"
);
vector
<
float4
>
deltas
(
posDelta
.
getSize
(),
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
));
posDelta
.
upload
(
deltas
);
stepSize
.
initializ
e
<
float2
>
(
context
,
1
,
"stepSize"
);
float2
lastStepSizeFloat
=
make_float2
(
0.0
f
,
0.0
f
);
stepSize
->
upload
(
&
lastStepSizeFloat
);
stepSize
.
upload
(
&
lastStepSizeFloat
);
}
// Record the set of constraints and how many constraints each atom is involved in.
...
...
@@ -208,10 +202,10 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
isShakeAtom
[
atom3
]
=
true
;
}
if
(
atoms
.
size
()
>
0
)
{
settleAtoms
=
CudaArray
::
creat
e
<
int4
>
(
context
,
atoms
.
size
(),
"settleAtoms"
);
settleParams
=
CudaArray
::
creat
e
<
float2
>
(
context
,
params
.
size
(),
"settleParams"
);
settleAtoms
->
upload
(
atoms
);
settleParams
->
upload
(
params
);
settleAtoms
.
initializ
e
<
int4
>
(
context
,
atoms
.
size
(),
"settleAtoms"
);
settleParams
.
initializ
e
<
float2
>
(
context
,
params
.
size
(),
"settleParams"
);
settleAtoms
.
upload
(
atoms
);
settleParams
.
upload
(
params
);
}
}
...
...
@@ -291,10 +285,10 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
isShakeAtom
[
cluster
.
peripheralID
[
2
]]
=
true
;
++
index
;
}
shakeAtoms
=
CudaArray
::
creat
e
<
int4
>
(
context
,
atoms
.
size
(),
"shakeAtoms"
);
shakeParams
=
CudaArray
::
creat
e
<
float4
>
(
context
,
params
.
size
(),
"shakeParams"
);
shakeAtoms
->
upload
(
atoms
);
shakeParams
->
upload
(
params
);
shakeAtoms
.
initializ
e
<
int4
>
(
context
,
atoms
.
size
(),
"shakeAtoms"
);
shakeParams
.
initializ
e
<
float4
>
(
context
,
params
.
size
(),
"shakeParams"
);
shakeAtoms
.
upload
(
atoms
);
shakeParams
.
upload
(
params
);
}
// Find connected constraints for CCMA.
...
...
@@ -371,26 +365,26 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
// Record the CCMA data structures.
ccmaAtoms
=
CudaArray
::
creat
e
<
int2
>
(
context
,
numCCMA
,
"CcmaAtoms"
);
ccmaAtomConstraints
=
CudaArray
::
creat
e
<
int
>
(
context
,
numAtoms
*
maxAtomConstraints
,
"CcmaAtomConstraints"
);
ccmaNumAtomConstraints
=
CudaArray
::
creat
e
<
int
>
(
context
,
numAtoms
,
"CcmaAtomConstraintsIndex"
);
ccmaConstraintMatrixColumn
=
CudaArray
::
creat
e
<
int
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixColumn"
);
ccmaConverged
=
CudaArray
::
creat
e
<
int
>
(
context
,
2
,
"ccmaConverged"
);
ccmaAtoms
.
initializ
e
<
int2
>
(
context
,
numCCMA
,
"CcmaAtoms"
);
ccmaAtomConstraints
.
initializ
e
<
int
>
(
context
,
numAtoms
*
maxAtomConstraints
,
"CcmaAtomConstraints"
);
ccmaNumAtomConstraints
.
initializ
e
<
int
>
(
context
,
numAtoms
,
"CcmaAtomConstraintsIndex"
);
ccmaConstraintMatrixColumn
.
initializ
e
<
int
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixColumn"
);
ccmaConverged
.
initializ
e
<
int
>
(
context
,
2
,
"ccmaConverged"
);
CHECK_RESULT2
(
cuMemHostAlloc
((
void
**
)
&
ccmaConvergedMemory
,
sizeof
(
int
),
CU_MEMHOSTALLOC_DEVICEMAP
),
"Error allocating pinned memory"
);
CHECK_RESULT2
(
cuMemHostGetDevicePointer
(
&
ccmaConvergedDeviceMemory
,
ccmaConvergedMemory
,
0
),
"Error getting device address for pinned memory"
);
vector
<
int2
>
atomsVec
(
ccmaAtoms
->
getSize
());
vector
<
int
>
atomConstraintsVec
(
ccmaAtomConstraints
->
getSize
());
vector
<
int
>
numAtomConstraintsVec
(
ccmaNumAtomConstraints
->
getSize
());
vector
<
int
>
constraintMatrixColumnVec
(
ccmaConstraintMatrixColumn
->
getSize
());
vector
<
int2
>
atomsVec
(
ccmaAtoms
.
getSize
());
vector
<
int
>
atomConstraintsVec
(
ccmaAtomConstraints
.
getSize
());
vector
<
int
>
numAtomConstraintsVec
(
ccmaNumAtomConstraints
.
getSize
());
vector
<
int
>
constraintMatrixColumnVec
(
ccmaConstraintMatrixColumn
.
getSize
());
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
{
ccmaDistance
=
CudaArray
::
creat
e
<
double4
>
(
context
,
numCCMA
,
"CcmaDistance"
);
ccmaDelta1
=
CudaArray
::
creat
e
<
double
>
(
context
,
numCCMA
,
"CcmaDelta1"
);
ccmaDelta2
=
CudaArray
::
creat
e
<
double
>
(
context
,
numCCMA
,
"CcmaDelta2"
);
ccmaReducedMass
=
CudaArray
::
creat
e
<
double
>
(
context
,
numCCMA
,
"CcmaReducedMass"
);
ccmaConstraintMatrixValue
=
CudaArray
::
creat
e
<
double
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixValue"
);
vector
<
double4
>
distanceVec
(
ccmaDistance
->
getSize
());
vector
<
double
>
reducedMassVec
(
ccmaReducedMass
->
getSize
());
vector
<
double
>
constraintMatrixValueVec
(
ccmaConstraintMatrixValue
->
getSize
());
ccmaDistance
.
initializ
e
<
double4
>
(
context
,
numCCMA
,
"CcmaDistance"
);
ccmaDelta1
.
initializ
e
<
double
>
(
context
,
numCCMA
,
"CcmaDelta1"
);
ccmaDelta2
.
initializ
e
<
double
>
(
context
,
numCCMA
,
"CcmaDelta2"
);
ccmaReducedMass
.
initializ
e
<
double
>
(
context
,
numCCMA
,
"CcmaReducedMass"
);
ccmaConstraintMatrixValue
.
initializ
e
<
double
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixValue"
);
vector
<
double4
>
distanceVec
(
ccmaDistance
.
getSize
());
vector
<
double
>
reducedMassVec
(
ccmaReducedMass
.
getSize
());
vector
<
double
>
constraintMatrixValueVec
(
ccmaConstraintMatrixValue
.
getSize
());
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
int
index
=
constraintOrder
[
i
];
int
c
=
ccmaConstraints
[
index
];
...
...
@@ -404,19 +398,19 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
}
constraintMatrixColumnVec
[
i
+
matrix
[
index
].
size
()
*
numCCMA
]
=
numCCMA
;
}
ccmaDistance
->
upload
(
distanceVec
);
ccmaReducedMass
->
upload
(
reducedMassVec
);
ccmaConstraintMatrixValue
->
upload
(
constraintMatrixValueVec
);
ccmaDistance
.
upload
(
distanceVec
);
ccmaReducedMass
.
upload
(
reducedMassVec
);
ccmaConstraintMatrixValue
.
upload
(
constraintMatrixValueVec
);
}
else
{
ccmaDistance
=
CudaArray
::
creat
e
<
float4
>
(
context
,
numCCMA
,
"CcmaDistance"
);
ccmaDelta1
=
CudaArray
::
creat
e
<
float
>
(
context
,
numCCMA
,
"CcmaDelta1"
);
ccmaDelta2
=
CudaArray
::
creat
e
<
float
>
(
context
,
numCCMA
,
"CcmaDelta2"
);
ccmaReducedMass
=
CudaArray
::
creat
e
<
float
>
(
context
,
numCCMA
,
"CcmaReducedMass"
);
ccmaConstraintMatrixValue
=
CudaArray
::
creat
e
<
float
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixValue"
);
vector
<
float4
>
distanceVec
(
ccmaDistance
->
getSize
());
vector
<
float
>
reducedMassVec
(
ccmaReducedMass
->
getSize
());
vector
<
float
>
constraintMatrixValueVec
(
ccmaConstraintMatrixValue
->
getSize
());
ccmaDistance
.
initializ
e
<
float4
>
(
context
,
numCCMA
,
"CcmaDistance"
);
ccmaDelta1
.
initializ
e
<
float
>
(
context
,
numCCMA
,
"CcmaDelta1"
);
ccmaDelta2
.
initializ
e
<
float
>
(
context
,
numCCMA
,
"CcmaDelta2"
);
ccmaReducedMass
.
initializ
e
<
float
>
(
context
,
numCCMA
,
"CcmaReducedMass"
);
ccmaConstraintMatrixValue
.
initializ
e
<
float
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixValue"
);
vector
<
float4
>
distanceVec
(
ccmaDistance
.
getSize
());
vector
<
float
>
reducedMassVec
(
ccmaReducedMass
.
getSize
());
vector
<
float
>
constraintMatrixValueVec
(
ccmaConstraintMatrixValue
.
getSize
());
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
int
index
=
constraintOrder
[
i
];
int
c
=
ccmaConstraints
[
index
];
...
...
@@ -430,9 +424,9 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
}
constraintMatrixColumnVec
[
i
+
matrix
[
index
].
size
()
*
numCCMA
]
=
numCCMA
;
}
ccmaDistance
->
upload
(
distanceVec
);
ccmaReducedMass
->
upload
(
reducedMassVec
);
ccmaConstraintMatrixValue
->
upload
(
constraintMatrixValueVec
);
ccmaDistance
.
upload
(
distanceVec
);
ccmaReducedMass
.
upload
(
reducedMassVec
);
ccmaConstraintMatrixValue
.
upload
(
constraintMatrixValueVec
);
}
for
(
unsigned
int
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
{
numAtomConstraintsVec
[
i
]
=
atomConstraints
[
i
].
size
();
...
...
@@ -441,10 +435,10 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
atomConstraintsVec
[
i
+
j
*
numAtoms
]
=
(
forward
?
inverseOrder
[
atomConstraints
[
i
][
j
]]
+
1
:
-
inverseOrder
[
atomConstraints
[
i
][
j
]]
-
1
);
}
}
ccmaAtoms
->
upload
(
atomsVec
);
ccmaAtomConstraints
->
upload
(
atomConstraintsVec
);
ccmaNumAtomConstraints
->
upload
(
numAtomConstraintsVec
);
ccmaConstraintMatrixColumn
->
upload
(
constraintMatrixColumnVec
);
ccmaAtoms
.
upload
(
atomsVec
);
ccmaAtomConstraints
.
upload
(
atomConstraintsVec
);
ccmaNumAtomConstraints
.
upload
(
numAtomConstraintsVec
);
ccmaConstraintMatrixColumn
.
upload
(
constraintMatrixColumnVec
);
}
// Build the list of virtual sites.
...
...
@@ -510,73 +504,73 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
int
num3Avg
=
vsite3AvgAtomVec
.
size
();
int
numOutOfPlane
=
vsiteOutOfPlaneAtomVec
.
size
();
int
numLocalCoords
=
vsiteLocalCoordsPosVec
.
size
();
vsite2AvgAtoms
=
CudaArray
::
creat
e
<
int4
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgAtoms"
);
vsite3AvgAtoms
=
CudaArray
::
creat
e
<
int4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgAtoms"
);
vsiteOutOfPlaneAtoms
=
CudaArray
::
creat
e
<
int4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneAtoms"
);
vsiteLocalCoordsIndex
=
CudaArray
::
creat
e
<
int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsIndexVec
.
size
()),
"vsiteLocalCoordsIndex"
);
vsiteLocalCoordsAtoms
=
CudaArray
::
creat
e
<
int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsAtomVec
.
size
()),
"vsiteLocalCoordsAtoms"
);
vsiteLocalCoordsStartIndex
=
CudaArray
::
creat
e
<
int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsStartVec
.
size
()),
"vsiteLocalCoordsStartIndex"
);
vsite2AvgAtoms
.
initializ
e
<
int4
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgAtoms"
);
vsite3AvgAtoms
.
initializ
e
<
int4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgAtoms"
);
vsiteOutOfPlaneAtoms
.
initializ
e
<
int4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneAtoms"
);
vsiteLocalCoordsIndex
.
initializ
e
<
int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsIndexVec
.
size
()),
"vsiteLocalCoordsIndex"
);
vsiteLocalCoordsAtoms
.
initializ
e
<
int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsAtomVec
.
size
()),
"vsiteLocalCoordsAtoms"
);
vsiteLocalCoordsStartIndex
.
initializ
e
<
int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsStartVec
.
size
()),
"vsiteLocalCoordsStartIndex"
);
if
(
num2Avg
>
0
)
vsite2AvgAtoms
->
upload
(
vsite2AvgAtomVec
);
vsite2AvgAtoms
.
upload
(
vsite2AvgAtomVec
);
if
(
num3Avg
>
0
)
vsite3AvgAtoms
->
upload
(
vsite3AvgAtomVec
);
vsite3AvgAtoms
.
upload
(
vsite3AvgAtomVec
);
if
(
numOutOfPlane
>
0
)
vsiteOutOfPlaneAtoms
->
upload
(
vsiteOutOfPlaneAtomVec
);
vsiteOutOfPlaneAtoms
.
upload
(
vsiteOutOfPlaneAtomVec
);
if
(
numLocalCoords
>
0
)
{
vsiteLocalCoordsIndex
->
upload
(
vsiteLocalCoordsIndexVec
);
vsiteLocalCoordsAtoms
->
upload
(
vsiteLocalCoordsAtomVec
);
vsiteLocalCoordsStartIndex
->
upload
(
vsiteLocalCoordsStartVec
);
vsiteLocalCoordsIndex
.
upload
(
vsiteLocalCoordsIndexVec
);
vsiteLocalCoordsAtoms
.
upload
(
vsiteLocalCoordsAtomVec
);
vsiteLocalCoordsStartIndex
.
upload
(
vsiteLocalCoordsStartVec
);
}
if
(
context
.
getUseDoublePrecision
())
{
vsite2AvgWeights
=
CudaArray
::
creat
e
<
double2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
=
CudaArray
::
creat
e
<
double4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
=
CudaArray
::
creat
e
<
double4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsWeights
=
CudaArray
::
creat
e
<
double
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
=
CudaArray
::
creat
e
<
double4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
vsite2AvgWeights
.
initializ
e
<
double2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
.
initializ
e
<
double4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
.
initializ
e
<
double4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsWeights
.
initializ
e
<
double
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
.
initializ
e
<
double4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
if
(
num2Avg
>
0
)
vsite2AvgWeights
->
upload
(
vsite2AvgWeightVec
);
vsite2AvgWeights
.
upload
(
vsite2AvgWeightVec
);
if
(
num3Avg
>
0
)
vsite3AvgWeights
->
upload
(
vsite3AvgWeightVec
);
vsite3AvgWeights
.
upload
(
vsite3AvgWeightVec
);
if
(
numOutOfPlane
>
0
)
vsiteOutOfPlaneWeights
->
upload
(
vsiteOutOfPlaneWeightVec
);
vsiteOutOfPlaneWeights
.
upload
(
vsiteOutOfPlaneWeightVec
);
if
(
numLocalCoords
>
0
)
{
vsiteLocalCoordsWeights
->
upload
(
vsiteLocalCoordsWeightVec
);
vsiteLocalCoordsPos
->
upload
(
vsiteLocalCoordsPosVec
);
vsiteLocalCoordsWeights
.
upload
(
vsiteLocalCoordsWeightVec
);
vsiteLocalCoordsPos
.
upload
(
vsiteLocalCoordsPosVec
);
}
}
else
{
vsite2AvgWeights
=
CudaArray
::
creat
e
<
float2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
=
CudaArray
::
creat
e
<
float4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
=
CudaArray
::
creat
e
<
float4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsWeights
=
CudaArray
::
creat
e
<
float
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
=
CudaArray
::
creat
e
<
float4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
vsite2AvgWeights
.
initializ
e
<
float2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
.
initializ
e
<
float4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
.
initializ
e
<
float4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsWeights
.
initializ
e
<
float
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
.
initializ
e
<
float4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
if
(
num2Avg
>
0
)
{
vector
<
float2
>
floatWeights
(
num2Avg
);
for
(
int
i
=
0
;
i
<
num2Avg
;
i
++
)
floatWeights
[
i
]
=
make_float2
((
float
)
vsite2AvgWeightVec
[
i
].
x
,
(
float
)
vsite2AvgWeightVec
[
i
].
y
);
vsite2AvgWeights
->
upload
(
floatWeights
);
vsite2AvgWeights
.
upload
(
floatWeights
);
}
if
(
num3Avg
>
0
)
{
vector
<
float4
>
floatWeights
(
num3Avg
);
for
(
int
i
=
0
;
i
<
num3Avg
;
i
++
)
floatWeights
[
i
]
=
make_float4
((
float
)
vsite3AvgWeightVec
[
i
].
x
,
(
float
)
vsite3AvgWeightVec
[
i
].
y
,
(
float
)
vsite3AvgWeightVec
[
i
].
z
,
0.0
f
);
vsite3AvgWeights
->
upload
(
floatWeights
);
vsite3AvgWeights
.
upload
(
floatWeights
);
}
if
(
numOutOfPlane
>
0
)
{
vector
<
float4
>
floatWeights
(
numOutOfPlane
);
for
(
int
i
=
0
;
i
<
numOutOfPlane
;
i
++
)
floatWeights
[
i
]
=
make_float4
((
float
)
vsiteOutOfPlaneWeightVec
[
i
].
x
,
(
float
)
vsiteOutOfPlaneWeightVec
[
i
].
y
,
(
float
)
vsiteOutOfPlaneWeightVec
[
i
].
z
,
0.0
f
);
vsiteOutOfPlaneWeights
->
upload
(
floatWeights
);
vsiteOutOfPlaneWeights
.
upload
(
floatWeights
);
}
if
(
numLocalCoords
>
0
)
{
vector
<
float
>
floatWeights
(
vsiteLocalCoordsWeightVec
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsWeightVec
.
size
();
i
++
)
floatWeights
[
i
]
=
(
float
)
vsiteLocalCoordsWeightVec
[
i
];
vsiteLocalCoordsWeights
->
upload
(
floatWeights
);
vsiteLocalCoordsWeights
.
upload
(
floatWeights
);
vector
<
float4
>
floatPos
(
vsiteLocalCoordsPosVec
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsPosVec
.
size
();
i
++
)
floatPos
[
i
]
=
make_float4
((
float
)
vsiteLocalCoordsPosVec
[
i
].
x
,
(
float
)
vsiteLocalCoordsPosVec
[
i
].
y
,
(
float
)
vsiteLocalCoordsPosVec
[
i
].
z
,
0.0
f
);
vsiteLocalCoordsPos
->
upload
(
floatPos
);
vsiteLocalCoordsPos
.
upload
(
floatPos
);
}
}
...
...
@@ -610,86 +604,28 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
CudaIntegrationUtilities
::~
CudaIntegrationUtilities
()
{
context
.
setAsCurrent
();
if
(
posDelta
!=
NULL
)
delete
posDelta
;
if
(
settleAtoms
!=
NULL
)
delete
settleAtoms
;
if
(
settleParams
!=
NULL
)
delete
settleParams
;
if
(
shakeAtoms
!=
NULL
)
delete
shakeAtoms
;
if
(
shakeParams
!=
NULL
)
delete
shakeParams
;
if
(
random
!=
NULL
)
delete
random
;
if
(
randomSeed
!=
NULL
)
delete
randomSeed
;
if
(
stepSize
!=
NULL
)
delete
stepSize
;
if
(
ccmaAtoms
!=
NULL
)
delete
ccmaAtoms
;
if
(
ccmaDistance
!=
NULL
)
delete
ccmaDistance
;
if
(
ccmaReducedMass
!=
NULL
)
delete
ccmaReducedMass
;
if
(
ccmaAtomConstraints
!=
NULL
)
delete
ccmaAtomConstraints
;
if
(
ccmaNumAtomConstraints
!=
NULL
)
delete
ccmaNumAtomConstraints
;
if
(
ccmaConstraintMatrixColumn
!=
NULL
)
delete
ccmaConstraintMatrixColumn
;
if
(
ccmaConstraintMatrixValue
!=
NULL
)
delete
ccmaConstraintMatrixValue
;
if
(
ccmaDelta1
!=
NULL
)
delete
ccmaDelta1
;
if
(
ccmaDelta2
!=
NULL
)
delete
ccmaDelta2
;
if
(
ccmaConverged
!=
NULL
)
delete
ccmaConverged
;
if
(
ccmaConvergedMemory
!=
NULL
)
cuMemFreeHost
(
ccmaConvergedMemory
);
if
(
vsite2AvgAtoms
!=
NULL
)
delete
vsite2AvgAtoms
;
if
(
vsite2AvgWeights
!=
NULL
)
delete
vsite2AvgWeights
;
if
(
vsite3AvgAtoms
!=
NULL
)
delete
vsite3AvgAtoms
;
if
(
vsite3AvgWeights
!=
NULL
)
delete
vsite3AvgWeights
;
if
(
vsiteOutOfPlaneAtoms
!=
NULL
)
delete
vsiteOutOfPlaneAtoms
;
if
(
vsiteOutOfPlaneWeights
!=
NULL
)
delete
vsiteOutOfPlaneWeights
;
if
(
vsiteLocalCoordsIndex
!=
NULL
)
delete
vsiteLocalCoordsIndex
;
if
(
vsiteLocalCoordsAtoms
!=
NULL
)
delete
vsiteLocalCoordsAtoms
;
if
(
vsiteLocalCoordsWeights
!=
NULL
)
delete
vsiteLocalCoordsWeights
;
if
(
vsiteLocalCoordsPos
!=
NULL
)
delete
vsiteLocalCoordsPos
;
if
(
vsiteLocalCoordsStartIndex
!=
NULL
)
delete
vsiteLocalCoordsStartIndex
;
}
void
CudaIntegrationUtilities
::
setNextStepSize
(
double
size
)
{
if
(
size
!=
lastStepSize
.
x
||
size
!=
lastStepSize
.
y
)
{
lastStepSize
=
make_double2
(
size
,
size
);
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
stepSize
->
upload
(
&
lastStepSize
);
stepSize
.
upload
(
&
lastStepSize
);
else
{
float2
lastStepSizeFloat
=
make_float2
((
float
)
size
,
(
float
)
size
);
stepSize
->
upload
(
&
lastStepSizeFloat
);
stepSize
.
upload
(
&
lastStepSizeFloat
);
}
}
}
double
CudaIntegrationUtilities
::
getLastStepSize
()
{
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
stepSize
->
download
(
&
lastStepSize
);
stepSize
.
download
(
&
lastStepSize
);
else
{
float2
lastStepSizeFloat
;
stepSize
->
download
(
&
lastStepSizeFloat
);
stepSize
.
download
(
&
lastStepSizeFloat
);
lastStepSize
=
make_double2
(
lastStepSizeFloat
.
x
,
lastStepSizeFloat
.
y
);
}
return
lastStepSize
.
y
;
...
...
@@ -718,41 +654,41 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
float
floatTol
=
(
float
)
tol
;
void
*
tolPointer
=
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
()
?
(
void
*
)
&
tol
:
(
void
*
)
&
floatTol
);
CUdeviceptr
posCorrection
=
(
context
.
getUseMixedPrecision
()
?
context
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
if
(
settleAtoms
!=
NULL
)
{
int
numClusters
=
settleAtoms
->
getSize
();
if
(
settleAtoms
.
isInitialized
()
)
{
int
numClusters
=
settleAtoms
.
getSize
();
void
*
args
[]
=
{
&
numClusters
,
tolPointer
,
&
context
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
posDelta
->
getDevicePointer
(),
&
context
.
getVelm
().
getDevicePointer
(),
&
settleAtoms
->
getDevicePointer
(),
&
settleParams
->
getDevicePointer
()};
context
.
executeKernel
(
settleKernel
,
args
,
settleAtoms
->
getSize
());
&
posDelta
.
getDevicePointer
(),
&
context
.
getVelm
().
getDevicePointer
(),
&
settleAtoms
.
getDevicePointer
(),
&
settleParams
.
getDevicePointer
()};
context
.
executeKernel
(
settleKernel
,
args
,
settleAtoms
.
getSize
());
}
if
(
shakeAtoms
!=
NULL
)
{
int
numClusters
=
shakeAtoms
->
getSize
();
if
(
shakeAtoms
.
isInitialized
()
)
{
int
numClusters
=
shakeAtoms
.
getSize
();
void
*
args
[]
=
{
&
numClusters
,
tolPointer
,
&
context
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
->
getDevicePointer
(),
&
shakeAtoms
->
getDevicePointer
(),
&
shakeParams
->
getDevicePointer
()};
context
.
executeKernel
(
shakeKernel
,
args
,
shakeAtoms
->
getSize
());
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
.
getDevicePointer
(),
&
shakeAtoms
.
getDevicePointer
(),
&
shakeParams
.
getDevicePointer
()};
context
.
executeKernel
(
shakeKernel
,
args
,
shakeAtoms
.
getSize
());
}
if
(
ccmaAtoms
!=
NULL
)
{
void
*
directionsArgs
[]
=
{
&
ccmaAtoms
->
getDevicePointer
(),
&
ccmaDistance
->
getDevicePointer
(),
&
context
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
ccmaConverged
->
getDevicePointer
()};
context
.
executeKernel
(
ccmaDirectionsKernel
,
directionsArgs
,
ccmaAtoms
->
getSize
());
if
(
ccmaAtoms
.
isInitialized
()
)
{
void
*
directionsArgs
[]
=
{
&
ccmaAtoms
.
getDevicePointer
(),
&
ccmaDistance
.
getDevicePointer
(),
&
context
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
ccmaConverged
.
getDevicePointer
()};
context
.
executeKernel
(
ccmaDirectionsKernel
,
directionsArgs
,
ccmaAtoms
.
getSize
());
int
i
;
void
*
forceArgs
[]
=
{
&
ccmaAtoms
->
getDevicePointer
(),
&
ccmaDistance
->
getDevicePointer
(),
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
->
getDevicePointer
(),
&
ccmaReducedMass
->
getDevicePointer
(),
&
ccmaDelta1
->
getDevicePointer
(),
&
ccmaConverged
->
getDevicePointer
(),
void
*
forceArgs
[]
=
{
&
ccmaAtoms
.
getDevicePointer
(),
&
ccmaDistance
.
getDevicePointer
(),
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
.
getDevicePointer
(),
&
ccmaReducedMass
.
getDevicePointer
(),
&
ccmaDelta1
.
getDevicePointer
(),
&
ccmaConverged
.
getDevicePointer
(),
&
ccmaConvergedDeviceMemory
,
tolPointer
,
&
i
};
void
*
multiplyArgs
[]
=
{
&
ccmaDelta1
->
getDevicePointer
(),
&
ccmaDelta2
->
getDevicePointer
(),
&
ccmaConstraintMatrixColumn
->
getDevicePointer
(),
&
ccmaConstraintMatrixValue
->
getDevicePointer
(),
&
ccmaConverged
->
getDevicePointer
(),
&
i
};
void
*
updateArgs
[]
=
{
&
ccmaNumAtomConstraints
->
getDevicePointer
(),
&
ccmaAtomConstraints
->
getDevicePointer
(),
&
ccmaDistance
->
getDevicePointer
(),
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
->
getDevicePointer
(),
&
context
.
getVelm
().
getDevicePointer
(),
&
ccmaDelta1
->
getDevicePointer
(),
&
ccmaDelta2
->
getDevicePointer
(),
&
ccmaConverged
->
getDevicePointer
(),
&
i
};
void
*
multiplyArgs
[]
=
{
&
ccmaDelta1
.
getDevicePointer
(),
&
ccmaDelta2
.
getDevicePointer
(),
&
ccmaConstraintMatrixColumn
.
getDevicePointer
(),
&
ccmaConstraintMatrixValue
.
getDevicePointer
(),
&
ccmaConverged
.
getDevicePointer
(),
&
i
};
void
*
updateArgs
[]
=
{
&
ccmaNumAtomConstraints
.
getDevicePointer
(),
&
ccmaAtomConstraints
.
getDevicePointer
(),
&
ccmaDistance
.
getDevicePointer
(),
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
.
getDevicePointer
(),
&
context
.
getVelm
().
getDevicePointer
(),
&
ccmaDelta1
.
getDevicePointer
(),
&
ccmaDelta2
.
getDevicePointer
(),
&
ccmaConverged
.
getDevicePointer
(),
&
i
};
const
int
checkInterval
=
4
;
ccmaConvergedMemory
[
0
]
=
0
;
for
(
i
=
0
;
i
<
150
;
i
++
)
{
context
.
executeKernel
(
ccmaForceKernel
,
forceArgs
,
ccmaAtoms
->
getSize
());
context
.
executeKernel
(
ccmaForceKernel
,
forceArgs
,
ccmaAtoms
.
getSize
());
if
((
i
+
1
)
%
checkInterval
==
0
)
CHECK_RESULT2
(
cuEventRecord
(
ccmaEvent
,
0
),
"Error recording event for CCMA"
);
context
.
executeKernel
(
ccmaMultiplyKernel
,
multiplyArgs
,
ccmaAtoms
->
getSize
());
context
.
executeKernel
(
ccmaMultiplyKernel
,
multiplyArgs
,
ccmaAtoms
.
getSize
());
context
.
executeKernel
(
ccmaUpdateKernel
,
updateArgs
,
context
.
getNumAtoms
());
if
((
i
+
1
)
%
checkInterval
==
0
)
{
CHECK_RESULT2
(
cuEventSynchronize
(
ccmaEvent
),
"Error synchronizing on event for CCMA"
);
...
...
@@ -766,12 +702,12 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
void
CudaIntegrationUtilities
::
computeVirtualSites
()
{
if
(
numVsites
>
0
)
{
CUdeviceptr
posCorrection
=
(
context
.
getUseMixedPrecision
()
?
context
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
void
*
args
[]
=
{
&
context
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
vsite2AvgAtoms
->
getDevicePointer
(),
&
vsite2AvgWeights
->
getDevicePointer
(),
&
vsite3AvgAtoms
->
getDevicePointer
(),
&
vsite3AvgWeights
->
getDevicePointer
(),
&
vsiteOutOfPlaneAtoms
->
getDevicePointer
(),
&
vsiteOutOfPlaneWeights
->
getDevicePointer
(),
&
vsiteLocalCoordsIndex
->
getDevicePointer
(),
&
vsiteLocalCoordsAtoms
->
getDevicePointer
(),
&
vsiteLocalCoordsWeights
->
getDevicePointer
(),
&
vsiteLocalCoordsPos
->
getDevicePointer
(),
&
vsiteLocalCoordsStartIndex
->
getDevicePointer
()};
void
*
args
[]
=
{
&
context
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
vsite2AvgAtoms
.
getDevicePointer
(),
&
vsite2AvgWeights
.
getDevicePointer
(),
&
vsite3AvgAtoms
.
getDevicePointer
(),
&
vsite3AvgWeights
.
getDevicePointer
(),
&
vsiteOutOfPlaneAtoms
.
getDevicePointer
(),
&
vsiteOutOfPlaneWeights
.
getDevicePointer
(),
&
vsiteLocalCoordsIndex
.
getDevicePointer
(),
&
vsiteLocalCoordsAtoms
.
getDevicePointer
(),
&
vsiteLocalCoordsWeights
.
getDevicePointer
(),
&
vsiteLocalCoordsPos
.
getDevicePointer
(),
&
vsiteLocalCoordsStartIndex
.
getDevicePointer
()};
context
.
executeKernel
(
vsitePositionKernel
,
args
,
numVsites
);
}
}
...
...
@@ -780,18 +716,18 @@ void CudaIntegrationUtilities::distributeForcesFromVirtualSites() {
if
(
numVsites
>
0
)
{
CUdeviceptr
posCorrection
=
(
context
.
getUseMixedPrecision
()
?
context
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
void
*
args
[]
=
{
&
context
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
context
.
getForce
().
getDevicePointer
(),
&
vsite2AvgAtoms
->
getDevicePointer
(),
&
vsite2AvgWeights
->
getDevicePointer
(),
&
vsite3AvgAtoms
->
getDevicePointer
(),
&
vsite3AvgWeights
->
getDevicePointer
(),
&
vsiteOutOfPlaneAtoms
->
getDevicePointer
(),
&
vsiteOutOfPlaneWeights
->
getDevicePointer
(),
&
vsiteLocalCoordsIndex
->
getDevicePointer
(),
&
vsiteLocalCoordsAtoms
->
getDevicePointer
(),
&
vsiteLocalCoordsWeights
->
getDevicePointer
(),
&
vsiteLocalCoordsPos
->
getDevicePointer
(),
&
vsiteLocalCoordsStartIndex
->
getDevicePointer
()};
&
vsite2AvgAtoms
.
getDevicePointer
(),
&
vsite2AvgWeights
.
getDevicePointer
(),
&
vsite3AvgAtoms
.
getDevicePointer
(),
&
vsite3AvgWeights
.
getDevicePointer
(),
&
vsiteOutOfPlaneAtoms
.
getDevicePointer
(),
&
vsiteOutOfPlaneWeights
.
getDevicePointer
(),
&
vsiteLocalCoordsIndex
.
getDevicePointer
(),
&
vsiteLocalCoordsAtoms
.
getDevicePointer
(),
&
vsiteLocalCoordsWeights
.
getDevicePointer
(),
&
vsiteLocalCoordsPos
.
getDevicePointer
(),
&
vsiteLocalCoordsStartIndex
.
getDevicePointer
()};
context
.
executeKernel
(
vsiteForceKernel
,
args
,
numVsites
);
}
}
void
CudaIntegrationUtilities
::
initRandomNumberGenerator
(
unsigned
int
randomNumberSeed
)
{
if
(
random
!=
NULL
)
{
if
(
random
.
isInitialized
()
)
{
if
(
randomNumberSeed
!=
lastSeed
)
throw
OpenMMException
(
"CudaIntegrationUtilities::initRandomNumberGenerator(): Requested two different values for the random number seed"
);
return
;
...
...
@@ -800,63 +736,61 @@ void CudaIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumb
// Create the random number arrays.
lastSeed
=
randomNumberSeed
;
random
=
CudaArray
::
creat
e
<
float4
>
(
context
,
4
*
context
.
getPaddedNumAtoms
(),
"random"
);
randomSeed
=
CudaArray
::
creat
e
<
int4
>
(
context
,
context
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
"randomSeed"
);
randomPos
=
random
->
getSize
();
random
.
initializ
e
<
float4
>
(
context
,
4
*
context
.
getPaddedNumAtoms
(),
"random"
);
randomSeed
.
initializ
e
<
int4
>
(
context
,
context
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
"randomSeed"
);
randomPos
=
random
.
getSize
();
// Use a quick and dirty RNG to pick seeds for the real random number generator.
vector
<
int4
>
seed
(
randomSeed
->
getSize
());
vector
<
int4
>
seed
(
randomSeed
.
getSize
());
unsigned
int
r
=
randomNumberSeed
;
if
(
r
==
0
)
r
=
(
unsigned
int
)
osrngseed
();
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
for
(
int
i
=
0
;
i
<
randomSeed
.
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
randomSeed
.
upload
(
seed
);
}
int
CudaIntegrationUtilities
::
prepareRandomNumbers
(
int
numValues
)
{
if
(
randomPos
+
numValues
<=
random
->
getSize
())
{
if
(
randomPos
+
numValues
<=
random
.
getSize
())
{
int
oldPos
=
randomPos
;
randomPos
+=
numValues
;
return
oldPos
;
}
if
(
numValues
>
random
->
getSize
())
{
delete
random
;
random
=
CudaArray
::
create
<
float4
>
(
context
,
numValues
,
"random"
);
}
int
size
=
random
->
getSize
();
void
*
args
[]
=
{
&
size
,
&
random
->
getDevicePointer
(),
&
randomSeed
->
getDevicePointer
()};
context
.
executeKernel
(
randomKernel
,
args
,
random
->
getSize
());
if
(
numValues
>
random
.
getSize
())
random
.
resize
(
numValues
);
int
size
=
random
.
getSize
();
void
*
args
[]
=
{
&
size
,
&
random
.
getDevicePointer
(),
&
randomSeed
.
getDevicePointer
()};
context
.
executeKernel
(
randomKernel
,
args
,
random
.
getSize
());
randomPos
=
numValues
;
return
0
;
}
void
CudaIntegrationUtilities
::
createCheckpoint
(
ostream
&
stream
)
{
if
(
random
==
NULL
)
if
(
!
random
.
isInitialized
()
)
return
;
stream
.
write
((
char
*
)
&
randomPos
,
sizeof
(
int
));
vector
<
float4
>
randomVec
;
random
->
download
(
randomVec
);
stream
.
write
((
char
*
)
&
randomVec
[
0
],
sizeof
(
float4
)
*
random
->
getSize
());
random
.
download
(
randomVec
);
stream
.
write
((
char
*
)
&
randomVec
[
0
],
sizeof
(
float4
)
*
random
.
getSize
());
vector
<
int4
>
randomSeedVec
;
randomSeed
->
download
(
randomSeedVec
);
stream
.
write
((
char
*
)
&
randomSeedVec
[
0
],
sizeof
(
int4
)
*
randomSeed
->
getSize
());
randomSeed
.
download
(
randomSeedVec
);
stream
.
write
((
char
*
)
&
randomSeedVec
[
0
],
sizeof
(
int4
)
*
randomSeed
.
getSize
());
}
void
CudaIntegrationUtilities
::
loadCheckpoint
(
istream
&
stream
)
{
if
(
random
==
NULL
)
if
(
!
random
.
isInitialized
()
)
return
;
stream
.
read
((
char
*
)
&
randomPos
,
sizeof
(
int
));
vector
<
float4
>
randomVec
(
random
->
getSize
());
stream
.
read
((
char
*
)
&
randomVec
[
0
],
sizeof
(
float4
)
*
random
->
getSize
());
random
->
upload
(
randomVec
);
vector
<
int4
>
randomSeedVec
(
randomSeed
->
getSize
());
stream
.
read
((
char
*
)
&
randomSeedVec
[
0
],
sizeof
(
int4
)
*
randomSeed
->
getSize
());
randomSeed
->
upload
(
randomSeedVec
);
vector
<
float4
>
randomVec
(
random
.
getSize
());
stream
.
read
((
char
*
)
&
randomVec
[
0
],
sizeof
(
float4
)
*
random
.
getSize
());
random
.
upload
(
randomVec
);
vector
<
int4
>
randomSeedVec
(
randomSeed
.
getSize
());
stream
.
read
((
char
*
)
&
randomSeedVec
[
0
],
sizeof
(
int4
)
*
randomSeed
.
getSize
());
randomSeed
.
upload
(
randomSeedVec
);
}
double
CudaIntegrationUtilities
::
computeKineticEnergy
(
double
timeShift
)
{
...
...
@@ -867,7 +801,7 @@ double CudaIntegrationUtilities::computeKineticEnergy(double timeShift) {
// Copy the velocities into the posDelta array while we temporarily modify them.
context
.
getVelm
().
copyTo
(
*
posDelta
);
context
.
getVelm
().
copyTo
(
posDelta
);
// Apply the time shift.
...
...
@@ -901,6 +835,6 @@ double CudaIntegrationUtilities::computeKineticEnergy(double timeShift) {
// Restore the velocities.
if
(
timeShift
!=
0
)
posDelta
->
copyTo
(
context
.
getVelm
());
posDelta
.
copyTo
(
context
.
getVelm
());
return
0.5
*
energy
;
}
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
b8c86406
...
...
@@ -63,10 +63,7 @@ private:
};
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
interactionCount
(
NULL
),
singlePairs
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
pinnedCountBuffer
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
),
canUsePairList
(
true
)
{
blockSorter
(
NULL
),
pinnedCountBuffer
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
),
canUsePairList
(
true
)
{
// Decide how many thread blocks to use.
string
errorMessage
=
"Error initializing nonbonded utilities"
;
...
...
@@ -79,36 +76,6 @@ CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(c
}
CudaNonbondedUtilities
::~
CudaNonbondedUtilities
()
{
if
(
exclusionIndices
!=
NULL
)
delete
exclusionIndices
;
if
(
exclusionRowIndices
!=
NULL
)
delete
exclusionRowIndices
;
if
(
exclusionTiles
!=
NULL
)
delete
exclusionTiles
;
if
(
exclusions
!=
NULL
)
delete
exclusions
;
if
(
interactingTiles
!=
NULL
)
delete
interactingTiles
;
if
(
interactingAtoms
!=
NULL
)
delete
interactingAtoms
;
if
(
interactionCount
!=
NULL
)
delete
interactionCount
;
if
(
singlePairs
!=
NULL
)
delete
singlePairs
;
if
(
blockCenter
!=
NULL
)
delete
blockCenter
;
if
(
blockBoundingBox
!=
NULL
)
delete
blockBoundingBox
;
if
(
sortedBlocks
!=
NULL
)
delete
sortedBlocks
;
if
(
sortedBlockCenter
!=
NULL
)
delete
sortedBlockCenter
;
if
(
sortedBlockBoundingBox
!=
NULL
)
delete
sortedBlockBoundingBox
;
if
(
oldPositions
!=
NULL
)
delete
oldPositions
;
if
(
rebuildNeighborList
!=
NULL
)
delete
rebuildNeighborList
;
if
(
blockSorter
!=
NULL
)
delete
blockSorter
;
if
(
pinnedCountBuffer
!=
NULL
)
...
...
@@ -220,8 +187,8 @@ void CudaNonbondedUtilities::initialize(const System& system) {
for
(
set
<
pair
<
int
,
int
>
>::
const_iterator
iter
=
tilesWithExclusions
.
begin
();
iter
!=
tilesWithExclusions
.
end
();
++
iter
)
exclusionTilesVec
.
push_back
(
make_ushort2
((
unsigned
short
)
iter
->
first
,
(
unsigned
short
)
iter
->
second
));
sort
(
exclusionTilesVec
.
begin
(),
exclusionTilesVec
.
end
(),
compareUshort2
);
exclusionTiles
=
CudaArray
::
creat
e
<
ushort2
>
(
context
,
exclusionTilesVec
.
size
(),
"exclusionTiles"
);
exclusionTiles
->
upload
(
exclusionTilesVec
);
exclusionTiles
.
initializ
e
<
ushort2
>
(
context
,
exclusionTilesVec
.
size
(),
"exclusionTiles"
);
exclusionTiles
.
upload
(
exclusionTilesVec
);
map
<
pair
<
int
,
int
>
,
int
>
exclusionTileMap
;
for
(
int
i
=
0
;
i
<
(
int
)
exclusionTilesVec
.
size
();
i
++
)
{
ushort2
tile
=
exclusionTilesVec
[
i
];
...
...
@@ -242,16 +209,16 @@ void CudaNonbondedUtilities::initialize(const System& system) {
maxExclusions
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
exclusionBlocksForBlock
.
size
();
i
++
)
maxExclusions
=
(
maxExclusions
>
exclusionBlocksForBlock
[
i
].
size
()
?
maxExclusions
:
exclusionBlocksForBlock
[
i
].
size
());
exclusionIndices
=
CudaArray
::
creat
e
<
unsigned
int
>
(
context
,
exclusionIndicesVec
.
size
(),
"exclusionIndices"
);
exclusionRowIndices
=
CudaArray
::
creat
e
<
unsigned
int
>
(
context
,
exclusionRowIndicesVec
.
size
(),
"exclusionRowIndices"
);
exclusionIndices
->
upload
(
exclusionIndicesVec
);
exclusionRowIndices
->
upload
(
exclusionRowIndicesVec
);
exclusionIndices
.
initializ
e
<
unsigned
int
>
(
context
,
exclusionIndicesVec
.
size
(),
"exclusionIndices"
);
exclusionRowIndices
.
initializ
e
<
unsigned
int
>
(
context
,
exclusionRowIndicesVec
.
size
(),
"exclusionRowIndices"
);
exclusionIndices
.
upload
(
exclusionIndicesVec
);
exclusionRowIndices
.
upload
(
exclusionRowIndicesVec
);
// Record the exclusion data.
exclusions
=
CudaArray
::
creat
e
<
tileflags
>
(
context
,
tilesWithExclusions
.
size
()
*
CudaContext
::
TileSize
,
"exclusions"
);
exclusions
.
initializ
e
<
tileflags
>
(
context
,
tilesWithExclusions
.
size
()
*
CudaContext
::
TileSize
,
"exclusions"
);
tileflags
allFlags
=
(
tileflags
)
-
1
;
vector
<
tileflags
>
exclusionVec
(
exclusions
->
getSize
(),
allFlags
);
vector
<
tileflags
>
exclusionVec
(
exclusions
.
getSize
(),
allFlags
);
for
(
int
atom1
=
0
;
atom1
<
(
int
)
atomExclusions
.
size
();
++
atom1
)
{
int
x
=
atom1
/
CudaContext
::
TileSize
;
int
offset1
=
atom1
-
x
*
CudaContext
::
TileSize
;
...
...
@@ -270,7 +237,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
}
}
atomExclusions
.
clear
();
// We won't use this again, so free the memory it used
exclusions
->
upload
(
exclusionVec
);
exclusions
.
upload
(
exclusionVec
);
// Create data structures for the neighbor list.
...
...
@@ -284,21 +251,21 @@ void CudaNonbondedUtilities::initialize(const System& system) {
if
(
maxTiles
<
1
)
maxTiles
=
1
;
maxSinglePairs
=
5
*
numAtoms
;
interactingTiles
=
CudaArray
::
creat
e
<
int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
=
CudaArray
::
creat
e
<
int
>
(
context
,
CudaContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
interactionCount
=
CudaArray
::
creat
e
<
unsigned
int
>
(
context
,
2
,
"interactionCount"
);
singlePairs
=
CudaArray
::
creat
e
<
int2
>
(
context
,
maxSinglePairs
,
"singlePairs"
);
interactingTiles
.
initializ
e
<
int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
.
initializ
e
<
int
>
(
context
,
CudaContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
interactionCount
.
initializ
e
<
unsigned
int
>
(
context
,
2
,
"interactionCount"
);
singlePairs
.
initializ
e
<
int2
>
(
context
,
maxSinglePairs
,
"singlePairs"
);
int
elementSize
=
(
context
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
blockCenter
=
new
CudaArray
(
context
,
numAtomBlocks
,
4
*
elementSize
,
"blockCenter"
);
blockBoundingBox
=
new
CudaArray
(
context
,
numAtomBlocks
,
4
*
elementSize
,
"blockBoundingBox"
);
sortedBlocks
=
new
CudaArray
(
context
,
numAtomBlocks
,
2
*
elementSize
,
"sortedBlocks"
);
sortedBlockCenter
=
new
CudaArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockCenter"
);
sortedBlockBoundingBox
=
new
CudaArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockBoundingBox"
);
oldPositions
=
new
CudaArray
(
context
,
numAtoms
,
4
*
elementSize
,
"oldPositions"
);
rebuildNeighborList
=
CudaArray
::
creat
e
<
int
>
(
context
,
1
,
"rebuildNeighborList"
);
blockCenter
.
initialize
(
context
,
numAtomBlocks
,
4
*
elementSize
,
"blockCenter"
);
blockBoundingBox
.
initialize
(
context
,
numAtomBlocks
,
4
*
elementSize
,
"blockBoundingBox"
);
sortedBlocks
.
initialize
(
context
,
numAtomBlocks
,
2
*
elementSize
,
"sortedBlocks"
);
sortedBlockCenter
.
initialize
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockCenter"
);
sortedBlockBoundingBox
.
initialize
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockBoundingBox"
);
oldPositions
.
initialize
(
context
,
numAtoms
,
4
*
elementSize
,
"oldPositions"
);
rebuildNeighborList
.
initializ
e
<
int
>
(
context
,
1
,
"rebuildNeighborList"
);
blockSorter
=
new
CudaSort
(
context
,
new
BlockSortTrait
(
context
.
getUseDoublePrecision
()),
numAtomBlocks
);
vector
<
unsigned
int
>
count
(
2
,
0
);
interactionCount
->
upload
(
count
);
interactionCount
.
upload
(
count
);
}
// Record arguments for kernels.
...
...
@@ -306,24 +273,24 @@ void CudaNonbondedUtilities::initialize(const System& system) {
forceArgs
.
push_back
(
&
context
.
getForce
().
getDevicePointer
());
forceArgs
.
push_back
(
&
context
.
getEnergyBuffer
().
getDevicePointer
());
forceArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
forceArgs
.
push_back
(
&
exclusions
->
getDevicePointer
());
forceArgs
.
push_back
(
&
exclusionTiles
->
getDevicePointer
());
forceArgs
.
push_back
(
&
exclusions
.
getDevicePointer
());
forceArgs
.
push_back
(
&
exclusionTiles
.
getDevicePointer
());
forceArgs
.
push_back
(
&
startTileIndex
);
forceArgs
.
push_back
(
&
numTiles
);
if
(
useCutoff
)
{
forceArgs
.
push_back
(
&
interactingTiles
->
getDevicePointer
());
forceArgs
.
push_back
(
&
interactionCount
->
getDevicePointer
());
forceArgs
.
push_back
(
&
interactingTiles
.
getDevicePointer
());
forceArgs
.
push_back
(
&
interactionCount
.
getDevicePointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxVecXPointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxVecYPointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxVecZPointer
());
forceArgs
.
push_back
(
&
maxTiles
);
forceArgs
.
push_back
(
&
blockCenter
->
getDevicePointer
());
forceArgs
.
push_back
(
&
blockBoundingBox
->
getDevicePointer
());
forceArgs
.
push_back
(
&
interactingAtoms
->
getDevicePointer
());
forceArgs
.
push_back
(
&
blockCenter
.
getDevicePointer
());
forceArgs
.
push_back
(
&
blockBoundingBox
.
getDevicePointer
());
forceArgs
.
push_back
(
&
interactingAtoms
.
getDevicePointer
());
forceArgs
.
push_back
(
&
maxSinglePairs
);
forceArgs
.
push_back
(
&
singlePairs
->
getDevicePointer
());
forceArgs
.
push_back
(
&
singlePairs
.
getDevicePointer
());
}
for
(
int
i
=
0
;
i
<
(
int
)
parameters
.
size
();
i
++
)
forceArgs
.
push_back
(
&
parameters
[
i
].
getMemory
());
...
...
@@ -339,41 +306,41 @@ void CudaNonbondedUtilities::initialize(const System& system) {
findBlockBoundsArgs
.
push_back
(
context
.
getPeriodicBoxVecYPointer
());
findBlockBoundsArgs
.
push_back
(
context
.
getPeriodicBoxVecZPointer
());
findBlockBoundsArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
blockCenter
->
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
blockBoundingBox
->
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
rebuildNeighborList
->
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
sortedBlocks
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
sortedBlocks
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
blockCenter
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
blockBoundingBox
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
sortedBlockCenter
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
sortedBlockBoundingBox
->
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
blockCenter
.
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
blockBoundingBox
.
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
rebuildNeighborList
.
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
sortedBlocks
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
sortedBlocks
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
blockCenter
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
blockBoundingBox
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
sortedBlockCenter
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
sortedBlockBoundingBox
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
oldPositions
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
interactionCount
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
rebuildNeighborList
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
oldPositions
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
interactionCount
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
rebuildNeighborList
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
forceRebuildNeighborList
);
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxVecXPointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxVecYPointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxVecZPointer
());
findInteractingBlocksArgs
.
push_back
(
&
interactionCount
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
interactingTiles
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
interactingAtoms
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
singlePairs
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
interactionCount
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
interactingTiles
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
interactingAtoms
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
singlePairs
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
maxTiles
);
findInteractingBlocksArgs
.
push_back
(
&
maxSinglePairs
);
findInteractingBlocksArgs
.
push_back
(
&
startBlockIndex
);
findInteractingBlocksArgs
.
push_back
(
&
numBlocks
);
findInteractingBlocksArgs
.
push_back
(
&
sortedBlocks
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
sortedBlockCenter
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
sortedBlockBoundingBox
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
exclusionIndices
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
exclusionRowIndices
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
oldPositions
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
rebuildNeighborList
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
sortedBlocks
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
sortedBlockCenter
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
sortedBlockBoundingBox
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
exclusionIndices
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
exclusionRowIndices
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
oldPositions
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
rebuildNeighborList
.
getDevicePointer
());
}
}
...
...
@@ -406,12 +373,12 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
forceRebuildNeighborList
=
true
;
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
&
findBlockBoundsArgs
[
0
],
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
blockSorter
->
sort
(
sortedBlocks
);
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
&
sortBoxDataArgs
[
0
],
context
.
getNumAtoms
());
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
&
findInteractingBlocksArgs
[
0
],
context
.
getNumAtoms
(),
256
);
forceRebuildNeighborList
=
false
;
lastCutoff
=
kernels
.
cutoffDistance
;
interactionCount
->
download
(
pinnedCountBuffer
,
false
);
interactionCount
.
download
(
pinnedCountBuffer
,
false
);
cuEventRecord
(
downloadCountEvent
,
context
.
getCurrentStream
());
}
...
...
@@ -445,27 +412,21 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
if
(
maxTiles
>
totalTiles
)
maxTiles
=
totalTiles
;
delete
interactingTiles
;
delete
interactingAtoms
;
interactingTiles
=
NULL
;
// Avoid an error in the destructor if the following allocation fails
interactingAtoms
=
NULL
;
interactingTiles
=
CudaArray
::
create
<
int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
=
CudaArray
::
create
<
int
>
(
context
,
CudaContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
interactingTiles
.
resize
(
maxTiles
);
interactingAtoms
.
resize
(
CudaContext
::
TileSize
*
maxTiles
);
if
(
forceArgs
.
size
()
>
0
)
forceArgs
[
7
]
=
&
interactingTiles
->
getDevicePointer
();
findInteractingBlocksArgs
[
6
]
=
&
interactingTiles
->
getDevicePointer
();
forceArgs
[
7
]
=
&
interactingTiles
.
getDevicePointer
();
findInteractingBlocksArgs
[
6
]
=
&
interactingTiles
.
getDevicePointer
();
if
(
forceArgs
.
size
()
>
0
)
forceArgs
[
17
]
=
&
interactingAtoms
->
getDevicePointer
();
findInteractingBlocksArgs
[
7
]
=
&
interactingAtoms
->
getDevicePointer
();
forceArgs
[
17
]
=
&
interactingAtoms
.
getDevicePointer
();
findInteractingBlocksArgs
[
7
]
=
&
interactingAtoms
.
getDevicePointer
();
}
if
(
pinnedCountBuffer
[
1
]
>
maxSinglePairs
)
{
maxSinglePairs
=
(
int
)
(
1.2
*
pinnedCountBuffer
[
1
]);
delete
singlePairs
;
singlePairs
=
NULL
;
// Avoid an error in the destructor if the following allocation fails
singlePairs
=
CudaArray
::
create
<
int2
>
(
context
,
maxSinglePairs
,
"singlePairs"
);
singlePairs
.
resize
(
maxSinglePairs
);
if
(
forceArgs
.
size
()
>
0
)
forceArgs
[
19
]
=
&
singlePairs
->
getDevicePointer
();
findInteractingBlocksArgs
[
8
]
=
&
singlePairs
->
getDevicePointer
();
forceArgs
[
19
]
=
&
singlePairs
.
getDevicePointer
();
findInteractingBlocksArgs
[
8
]
=
&
singlePairs
.
getDevicePointer
();
}
forceRebuildNeighborList
=
true
;
context
.
setForcesValid
(
false
);
...
...
@@ -510,7 +471,7 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
defines
[
"PADDING"
]
=
context
.
doubleToString
(
padding
);
defines
[
"PADDED_CUTOFF"
]
=
context
.
doubleToString
(
paddedCutoff
);
defines
[
"PADDED_CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
paddedCutoff
*
paddedCutoff
);
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
exclusionTiles
->
getSize
());
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
exclusionTiles
.
getSize
());
if
(
usePeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
if
(
context
.
getBoxIsTriclinic
())
...
...
@@ -735,7 +696,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"TILE_SIZE"
]
=
context
.
intToString
(
CudaContext
::
TileSize
);
int
numExclusionTiles
=
exclusionTiles
->
getSize
();
int
numExclusionTiles
=
exclusionTiles
.
getSize
();
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
numExclusionTiles
);
int
numContexts
=
context
.
getPlatformData
().
contexts
.
size
();
int
startExclusionIndex
=
context
.
getContextIndex
()
*
numExclusionTiles
/
numContexts
;
...
...
platforms/cuda/src/CudaSort.cpp
View file @
b8c86406
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010-201
2
Stanford University and the Authors. *
* Portions copyright (c) 2010-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -31,8 +31,7 @@
using
namespace
OpenMM
;
using
namespace
std
;
CudaSort
::
CudaSort
(
CudaContext
&
context
,
SortTrait
*
trait
,
unsigned
int
length
)
:
context
(
context
),
trait
(
trait
),
dataRange
(
NULL
),
bucketOfElement
(
NULL
),
offsetInBucket
(
NULL
),
bucketOffset
(
NULL
),
buckets
(
NULL
),
dataLength
(
length
)
{
CudaSort
::
CudaSort
(
CudaContext
&
context
,
SortTrait
*
trait
,
unsigned
int
length
)
:
context
(
context
),
trait
(
trait
),
dataLength
(
length
)
{
// Create kernels.
map
<
string
,
string
>
replacements
;
...
...
@@ -76,26 +75,16 @@ CudaSort::CudaSort(CudaContext& context, SortTrait* trait, unsigned int length)
// Create workspace arrays.
if
(
!
isShortList
)
{
dataRange
=
new
CudaArray
(
context
,
2
,
trait
->
getKeySize
(),
"sortDataRange"
);
bucketOffset
=
CudaArray
::
creat
e
<
uint1
>
(
context
,
numBuckets
,
"bucketOffset"
);
bucketOfElement
=
CudaArray
::
creat
e
<
uint1
>
(
context
,
length
,
"bucketOfElement"
);
offsetInBucket
=
CudaArray
::
creat
e
<
uint1
>
(
context
,
length
,
"offsetInBucket"
);
buckets
=
new
CudaArray
(
context
,
length
,
trait
->
getDataSize
(),
"buckets"
);
dataRange
.
initialize
(
context
,
2
,
trait
->
getKeySize
(),
"sortDataRange"
);
bucketOffset
.
initializ
e
<
uint1
>
(
context
,
numBuckets
,
"bucketOffset"
);
bucketOfElement
.
initializ
e
<
uint1
>
(
context
,
length
,
"bucketOfElement"
);
offsetInBucket
.
initializ
e
<
uint1
>
(
context
,
length
,
"offsetInBucket"
);
buckets
.
initialize
(
context
,
length
,
trait
->
getDataSize
(),
"buckets"
);
}
}
CudaSort
::~
CudaSort
()
{
delete
trait
;
if
(
dataRange
!=
NULL
)
delete
dataRange
;
if
(
bucketOfElement
!=
NULL
)
delete
bucketOfElement
;
if
(
offsetInBucket
!=
NULL
)
delete
offsetInBucket
;
if
(
bucketOffset
!=
NULL
)
delete
bucketOffset
;
if
(
buckets
!=
NULL
)
delete
buckets
;
}
void
CudaSort
::
sort
(
CudaArray
&
data
)
{
...
...
@@ -112,30 +101,30 @@ void CudaSort::sort(CudaArray& data) {
else
{
// Compute the range of data values.
unsigned
int
numBuckets
=
bucketOffset
->
getSize
();
void
*
rangeArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
dataLength
,
&
dataRange
->
getDevicePointer
(),
&
numBuckets
,
&
bucketOffset
->
getDevicePointer
()};
unsigned
int
numBuckets
=
bucketOffset
.
getSize
();
void
*
rangeArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
dataLength
,
&
dataRange
.
getDevicePointer
(),
&
numBuckets
,
&
bucketOffset
.
getDevicePointer
()};
context
.
executeKernel
(
computeRangeKernel
,
rangeArgs
,
rangeKernelSize
,
rangeKernelSize
,
2
*
rangeKernelSize
*
trait
->
getKeySize
());
// Assign array elements to buckets.
void
*
elementsArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
dataLength
,
&
numBuckets
,
&
dataRange
->
getDevicePointer
(),
&
bucketOffset
->
getDevicePointer
(),
&
bucketOfElement
->
getDevicePointer
(),
&
offsetInBucket
->
getDevicePointer
()};
void
*
elementsArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
dataLength
,
&
numBuckets
,
&
dataRange
.
getDevicePointer
(),
&
bucketOffset
.
getDevicePointer
(),
&
bucketOfElement
.
getDevicePointer
(),
&
offsetInBucket
.
getDevicePointer
()};
context
.
executeKernel
(
assignElementsKernel
,
elementsArgs
,
data
.
getSize
(),
128
);
// Compute the position of each bucket.
void
*
computeArgs
[]
=
{
&
numBuckets
,
&
bucketOffset
->
getDevicePointer
()};
void
*
computeArgs
[]
=
{
&
numBuckets
,
&
bucketOffset
.
getDevicePointer
()};
context
.
executeKernel
(
computeBucketPositionsKernel
,
computeArgs
,
positionsKernelSize
,
positionsKernelSize
,
positionsKernelSize
*
sizeof
(
int
));
// Copy the data into the buckets.
void
*
copyArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
buckets
->
getDevicePointer
(),
&
dataLength
,
&
bucketOffset
->
getDevicePointer
(),
&
bucketOfElement
->
getDevicePointer
(),
&
offsetInBucket
->
getDevicePointer
()};
void
*
copyArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
buckets
.
getDevicePointer
(),
&
dataLength
,
&
bucketOffset
.
getDevicePointer
(),
&
bucketOfElement
.
getDevicePointer
(),
&
offsetInBucket
.
getDevicePointer
()};
context
.
executeKernel
(
copyToBucketsKernel
,
copyArgs
,
data
.
getSize
());
// Sort each bucket.
void
*
sortArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
buckets
->
getDevicePointer
(),
&
numBuckets
,
&
bucketOffset
->
getDevicePointer
()};
void
*
sortArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
buckets
.
getDevicePointer
(),
&
numBuckets
,
&
bucketOffset
.
getDevicePointer
()};
context
.
executeKernel
(
sortBucketsKernel
,
sortArgs
,
((
data
.
getSize
()
+
sortKernelSize
-
1
)
/
sortKernelSize
)
*
sortKernelSize
,
sortKernelSize
,
sortKernelSize
*
trait
->
getDataSize
());
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment