Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b8c86406
Commit
b8c86406
authored
Feb 12, 2018
by
Peter Eastman
Browse files
Began converting CudaArrays.
parent
b33ee3b0
Changes
12
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
466 additions
and
539 deletions
+466
-539
platforms/cuda/include/CudaArray.h
platforms/cuda/include/CudaArray.h
+37
-2
platforms/cuda/include/CudaBondedUtilities.h
platforms/cuda/include/CudaBondedUtilities.h
+2
-3
platforms/cuda/include/CudaContext.h
platforms/cuda/include/CudaContext.h
+18
-18
platforms/cuda/include/CudaIntegrationUtilities.h
platforms/cuda/include/CudaIntegrationUtilities.h
+33
-33
platforms/cuda/include/CudaNonbondedUtilities.h
platforms/cuda/include/CudaNonbondedUtilities.h
+27
-27
platforms/cuda/include/CudaSort.h
platforms/cuda/include/CudaSort.h
+6
-6
platforms/cuda/src/CudaArray.cpp
platforms/cuda/src/CudaArray.cpp
+47
-13
platforms/cuda/src/CudaBondedUtilities.cpp
platforms/cuda/src/CudaBondedUtilities.cpp
+7
-13
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+54
-73
platforms/cuda/src/CudaIntegrationUtilities.cpp
platforms/cuda/src/CudaIntegrationUtilities.cpp
+154
-220
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+66
-105
platforms/cuda/src/CudaSort.cpp
platforms/cuda/src/CudaSort.cpp
+15
-26
No files found.
platforms/cuda/include/CudaArray.h
View file @
b8c86406
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
2
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -57,6 +57,11 @@ public:
...
@@ -57,6 +57,11 @@ public:
static
CudaArray
*
create
(
CudaContext
&
context
,
int
size
,
const
std
::
string
&
name
)
{
static
CudaArray
*
create
(
CudaContext
&
context
,
int
size
,
const
std
::
string
&
name
)
{
return
new
CudaArray
(
context
,
size
,
sizeof
(
T
),
name
);
return
new
CudaArray
(
context
,
size
,
sizeof
(
T
),
name
);
}
}
/**
* Create an uninitialized CudaArray object. It does not point to any device memory,
* and cannot be used until initialize() is called on it.
*/
CudaArray
();
/**
/**
* Create a CudaArray object.
* Create a CudaArray object.
*
*
...
@@ -67,6 +72,36 @@ public:
...
@@ -67,6 +72,36 @@ public:
*/
*/
CudaArray
(
CudaContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
);
CudaArray
(
CudaContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
);
~
CudaArray
();
~
CudaArray
();
/**
* Initialize this object.
*
* @param context the context for which to create the array
* @param size the number of elements in the array
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
void
initialize
(
CudaContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
);
/**
* Initialize this object. The template argument is the data type of each array element.
*
* @param context the context for which to create the array
* @param size the number of elements in the array
* @param name the name of the array
*/
template
<
class
T
>
void
initialize
(
CudaContext
&
context
,
int
size
,
const
std
::
string
&
name
)
{
initialize
(
context
,
size
,
sizeof
(
T
),
name
);
}
/**
* Recreate the internal storage to have a different size.
*/
void
resize
(
int
size
);
/**
* Get whether this array has been initialized.
*/
bool
isInitialized
()
const
{
return
(
pointer
!=
0
);
}
/**
/**
* Get the number of elements in the array.
* Get the number of elements in the array.
*/
*/
...
@@ -134,7 +169,7 @@ public:
...
@@ -134,7 +169,7 @@ public:
*/
*/
void
copyTo
(
CudaArray
&
dest
)
const
;
void
copyTo
(
CudaArray
&
dest
)
const
;
private:
private:
CudaContext
&
context
;
CudaContext
*
context
;
CUdeviceptr
pointer
;
CUdeviceptr
pointer
;
int
size
,
elementSize
;
int
size
,
elementSize
;
bool
ownsMemory
;
bool
ownsMemory
;
...
...
platforms/cuda/include/CudaBondedUtilities.h
View file @
b8c86406
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -81,7 +81,6 @@ namespace OpenMM {
...
@@ -81,7 +81,6 @@ namespace OpenMM {
class
OPENMM_EXPORT_CUDA
CudaBondedUtilities
{
class
OPENMM_EXPORT_CUDA
CudaBondedUtilities
{
public:
public:
CudaBondedUtilities
(
CudaContext
&
context
);
CudaBondedUtilities
(
CudaContext
&
context
);
~
CudaBondedUtilities
();
/**
/**
* Add a bonded interaction.
* Add a bonded interaction.
*
*
...
@@ -136,7 +135,7 @@ private:
...
@@ -136,7 +135,7 @@ private:
std
::
vector
<
int
>
forceGroup
;
std
::
vector
<
int
>
forceGroup
;
std
::
vector
<
CUdeviceptr
>
arguments
;
std
::
vector
<
CUdeviceptr
>
arguments
;
std
::
vector
<
std
::
string
>
argTypes
;
std
::
vector
<
std
::
string
>
argTypes
;
std
::
vector
<
std
::
vector
<
CudaArray
*
>
>
atomIndices
;
std
::
vector
<
std
::
vector
<
CudaArray
>
>
atomIndices
;
std
::
vector
<
std
::
string
>
prefixCode
;
std
::
vector
<
std
::
string
>
prefixCode
;
std
::
vector
<
std
::
string
>
energyParameterDerivatives
;
std
::
vector
<
std
::
string
>
energyParameterDerivatives
;
std
::
vector
<
void
*>
kernelArgs
;
std
::
vector
<
void
*>
kernelArgs
;
...
...
platforms/cuda/include/CudaContext.h
View file @
b8c86406
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -41,6 +41,7 @@
...
@@ -41,6 +41,7 @@
#include <builtin_types.h>
#include <builtin_types.h>
#include <vector_functions.h>
#include <vector_functions.h>
#include "windowsExportCuda.h"
#include "windowsExportCuda.h"
#include "CudaArray.h"
#include "CudaPlatform.h"
#include "CudaPlatform.h"
#include "openmm/Kernel.h"
#include "openmm/Kernel.h"
...
@@ -48,7 +49,6 @@ typedef unsigned int tileflags;
...
@@ -48,7 +49,6 @@ typedef unsigned int tileflags;
namespace
OpenMM
{
namespace
OpenMM
{
class
CudaArray
;
class
CudaForceInfo
;
class
CudaForceInfo
;
class
CudaExpressionUtilities
;
class
CudaExpressionUtilities
;
class
CudaIntegrationUtilities
;
class
CudaIntegrationUtilities
;
...
@@ -152,37 +152,37 @@ public:
...
@@ -152,37 +152,37 @@ public:
* Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
* Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
*/
*/
CudaArray
&
getPosq
()
{
CudaArray
&
getPosq
()
{
return
*
posq
;
return
posq
;
}
}
/**
/**
* Get the array which contains a correction to the position of each atom. This only exists if getUseMixedPrecision() returns true.
* Get the array which contains a correction to the position of each atom. This only exists if getUseMixedPrecision() returns true.
*/
*/
CudaArray
&
getPosqCorrection
()
{
CudaArray
&
getPosqCorrection
()
{
return
*
posqCorrection
;
return
posqCorrection
;
}
}
/**
/**
* Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom.
* Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom.
*/
*/
CudaArray
&
getVelm
()
{
CudaArray
&
getVelm
()
{
return
*
velm
;
return
velm
;
}
}
/**
/**
* Get the array which contains the force on each atom (represented as three long longs in 64 bit fixed point).
* Get the array which contains the force on each atom (represented as three long longs in 64 bit fixed point).
*/
*/
CudaArray
&
getForce
()
{
CudaArray
&
getForce
()
{
return
*
force
;
return
force
;
}
}
/**
/**
* Get the array which contains the buffer in which energy is computed.
* Get the array which contains the buffer in which energy is computed.
*/
*/
CudaArray
&
getEnergyBuffer
()
{
CudaArray
&
getEnergyBuffer
()
{
return
*
energyBuffer
;
return
energyBuffer
;
}
}
/**
/**
* Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
* Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
*/
*/
CudaArray
&
getEnergyParamDerivBuffer
()
{
CudaArray
&
getEnergyParamDerivBuffer
()
{
return
*
energyParamDerivBuffer
;
return
energyParamDerivBuffer
;
}
}
/**
/**
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
...
@@ -201,7 +201,7 @@ public:
...
@@ -201,7 +201,7 @@ public:
* Get the array which contains the index of each atom.
* Get the array which contains the index of each atom.
*/
*/
CudaArray
&
getAtomIndexArray
()
{
CudaArray
&
getAtomIndexArray
()
{
return
*
atomIndexDevice
;
return
atomIndexDevice
;
}
}
/**
/**
* Get the number of cells by which the positions are offset.
* Get the number of cells by which the positions are offset.
...
@@ -649,15 +649,15 @@ private:
...
@@ -649,15 +649,15 @@ private:
std
::
vector
<
MoleculeGroup
>
moleculeGroups
;
std
::
vector
<
MoleculeGroup
>
moleculeGroups
;
std
::
vector
<
int4
>
posCellOffsets
;
std
::
vector
<
int4
>
posCellOffsets
;
void
*
pinnedBuffer
;
void
*
pinnedBuffer
;
CudaArray
*
posq
;
CudaArray
posq
;
CudaArray
*
posqCorrection
;
CudaArray
posqCorrection
;
CudaArray
*
velm
;
CudaArray
velm
;
CudaArray
*
force
;
CudaArray
force
;
CudaArray
*
energyBuffer
;
CudaArray
energyBuffer
;
CudaArray
*
energySum
;
CudaArray
energySum
;
CudaArray
*
energyParamDerivBuffer
;
CudaArray
energyParamDerivBuffer
;
CudaArray
*
atomIndexDevice
;
CudaArray
atomIndexDevice
;
CudaArray
*
chargeBuffer
;
CudaArray
chargeBuffer
;
std
::
vector
<
std
::
string
>
energyParamDerivNames
;
std
::
vector
<
std
::
string
>
energyParamDerivNames
;
std
::
map
<
std
::
string
,
double
>
energyParamDerivWorkspace
;
std
::
map
<
std
::
string
,
double
>
energyParamDerivWorkspace
;
std
::
vector
<
int
>
atomIndex
;
std
::
vector
<
int
>
atomIndex
;
...
...
platforms/cuda/include/CudaIntegrationUtilities.h
View file @
b8c86406
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -47,20 +47,20 @@ public:
...
@@ -47,20 +47,20 @@ public:
* Get the array which contains position deltas.
* Get the array which contains position deltas.
*/
*/
CudaArray
&
getPosDelta
()
{
CudaArray
&
getPosDelta
()
{
return
*
posDelta
;
return
posDelta
;
}
}
/**
/**
* Get the array which contains random values. Each element is a float4, whose components
* Get the array which contains random values. Each element is a float4, whose components
* are independent, normally distributed random numbers with mean 0 and variance 1.
* are independent, normally distributed random numbers with mean 0 and variance 1.
*/
*/
CudaArray
&
getRandom
()
{
CudaArray
&
getRandom
()
{
return
*
random
;
return
random
;
}
}
/**
/**
* Get the array which contains the current step size.
* Get the array which contains the current step size.
*/
*/
CudaArray
&
getStepSize
()
{
CudaArray
&
getStepSize
()
{
return
*
stepSize
;
return
stepSize
;
}
}
/**
/**
* Set the size to use for the next step.
* Set the size to use for the next step.
...
@@ -131,38 +131,38 @@ private:
...
@@ -131,38 +131,38 @@ private:
CUfunction
ccmaUpdateKernel
;
CUfunction
ccmaUpdateKernel
;
CUfunction
vsitePositionKernel
,
vsiteForceKernel
;
CUfunction
vsitePositionKernel
,
vsiteForceKernel
;
CUfunction
randomKernel
,
timeShiftKernel
;
CUfunction
randomKernel
,
timeShiftKernel
;
CudaArray
*
posDelta
;
CudaArray
posDelta
;
CudaArray
*
settleAtoms
;
CudaArray
settleAtoms
;
CudaArray
*
settleParams
;
CudaArray
settleParams
;
CudaArray
*
shakeAtoms
;
CudaArray
shakeAtoms
;
CudaArray
*
shakeParams
;
CudaArray
shakeParams
;
CudaArray
*
random
;
CudaArray
random
;
CudaArray
*
randomSeed
;
CudaArray
randomSeed
;
CudaArray
*
stepSize
;
CudaArray
stepSize
;
CudaArray
*
ccmaAtoms
;
CudaArray
ccmaAtoms
;
CudaArray
*
ccmaDistance
;
CudaArray
ccmaDistance
;
CudaArray
*
ccmaReducedMass
;
CudaArray
ccmaReducedMass
;
CudaArray
*
ccmaAtomConstraints
;
CudaArray
ccmaAtomConstraints
;
CudaArray
*
ccmaNumAtomConstraints
;
CudaArray
ccmaNumAtomConstraints
;
CudaArray
*
ccmaConstraintMatrixColumn
;
CudaArray
ccmaConstraintMatrixColumn
;
CudaArray
*
ccmaConstraintMatrixValue
;
CudaArray
ccmaConstraintMatrixValue
;
CudaArray
*
ccmaDelta1
;
CudaArray
ccmaDelta1
;
CudaArray
*
ccmaDelta2
;
CudaArray
ccmaDelta2
;
CudaArray
*
ccmaConverged
;
CudaArray
ccmaConverged
;
int
*
ccmaConvergedMemory
;
int
*
ccmaConvergedMemory
;
CUdeviceptr
ccmaConvergedDeviceMemory
;
CUdeviceptr
ccmaConvergedDeviceMemory
;
CUevent
ccmaEvent
;
CUevent
ccmaEvent
;
CudaArray
*
vsite2AvgAtoms
;
CudaArray
vsite2AvgAtoms
;
CudaArray
*
vsite2AvgWeights
;
CudaArray
vsite2AvgWeights
;
CudaArray
*
vsite3AvgAtoms
;
CudaArray
vsite3AvgAtoms
;
CudaArray
*
vsite3AvgWeights
;
CudaArray
vsite3AvgWeights
;
CudaArray
*
vsiteOutOfPlaneAtoms
;
CudaArray
vsiteOutOfPlaneAtoms
;
CudaArray
*
vsiteOutOfPlaneWeights
;
CudaArray
vsiteOutOfPlaneWeights
;
CudaArray
*
vsiteLocalCoordsIndex
;
CudaArray
vsiteLocalCoordsIndex
;
CudaArray
*
vsiteLocalCoordsAtoms
;
CudaArray
vsiteLocalCoordsAtoms
;
CudaArray
*
vsiteLocalCoordsWeights
;
CudaArray
vsiteLocalCoordsWeights
;
CudaArray
*
vsiteLocalCoordsPos
;
CudaArray
vsiteLocalCoordsPos
;
CudaArray
*
vsiteLocalCoordsStartIndex
;
CudaArray
vsiteLocalCoordsStartIndex
;
int
randomPos
;
int
randomPos
;
int
lastSeed
,
numVsites
;
int
lastSeed
,
numVsites
;
double2
lastStepSize
;
double2
lastStepSize
;
...
...
platforms/cuda/include/CudaNonbondedUtilities.h
View file @
b8c86406
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -164,61 +164,61 @@ public:
...
@@ -164,61 +164,61 @@ public:
* Get the array containing the center of each atom block.
* Get the array containing the center of each atom block.
*/
*/
CudaArray
&
getBlockCenters
()
{
CudaArray
&
getBlockCenters
()
{
return
*
blockCenter
;
return
blockCenter
;
}
}
/**
/**
* Get the array containing the dimensions of each atom block.
* Get the array containing the dimensions of each atom block.
*/
*/
CudaArray
&
getBlockBoundingBoxes
()
{
CudaArray
&
getBlockBoundingBoxes
()
{
return
*
blockBoundingBox
;
return
blockBoundingBox
;
}
}
/**
/**
* Get the array whose first element contains the number of tiles with interactions.
* Get the array whose first element contains the number of tiles with interactions.
*/
*/
CudaArray
&
getInteractionCount
()
{
CudaArray
&
getInteractionCount
()
{
return
*
interactionCount
;
return
interactionCount
;
}
}
/**
/**
* Get the array containing tiles with interactions.
* Get the array containing tiles with interactions.
*/
*/
CudaArray
&
getInteractingTiles
()
{
CudaArray
&
getInteractingTiles
()
{
return
*
interactingTiles
;
return
interactingTiles
;
}
}
/**
/**
* Get the array containing the atoms in each tile with interactions.
* Get the array containing the atoms in each tile with interactions.
*/
*/
CudaArray
&
getInteractingAtoms
()
{
CudaArray
&
getInteractingAtoms
()
{
return
*
interactingAtoms
;
return
interactingAtoms
;
}
}
/**
/**
* Get the array containing single pairs in the neighbor list.
* Get the array containing single pairs in the neighbor list.
*/
*/
CudaArray
&
getSinglePairs
()
{
CudaArray
&
getSinglePairs
()
{
return
*
singlePairs
;
return
singlePairs
;
}
}
/**
/**
* Get the array containing exclusion flags.
* Get the array containing exclusion flags.
*/
*/
CudaArray
&
getExclusions
()
{
CudaArray
&
getExclusions
()
{
return
*
exclusions
;
return
exclusions
;
}
}
/**
/**
* Get the array containing tiles with exclusions.
* Get the array containing tiles with exclusions.
*/
*/
CudaArray
&
getExclusionTiles
()
{
CudaArray
&
getExclusionTiles
()
{
return
*
exclusionTiles
;
return
exclusionTiles
;
}
}
/**
/**
* Get the array containing the index into the exclusion array for each tile.
* Get the array containing the index into the exclusion array for each tile.
*/
*/
CudaArray
&
getExclusionIndices
()
{
CudaArray
&
getExclusionIndices
()
{
return
*
exclusionIndices
;
return
exclusionIndices
;
}
}
/**
/**
* Get the array listing where the exclusion data starts for each row.
* Get the array listing where the exclusion data starts for each row.
*/
*/
CudaArray
&
getExclusionRowIndices
()
{
CudaArray
&
getExclusionRowIndices
()
{
return
*
exclusionRowIndices
;
return
exclusionRowIndices
;
}
}
/**
/**
* Get the index of the first tile this context is responsible for processing.
* Get the index of the first tile this context is responsible for processing.
...
@@ -270,22 +270,22 @@ private:
...
@@ -270,22 +270,22 @@ private:
class
BlockSortTrait
;
class
BlockSortTrait
;
CudaContext
&
context
;
CudaContext
&
context
;
std
::
map
<
int
,
KernelSet
>
groupKernels
;
std
::
map
<
int
,
KernelSet
>
groupKernels
;
CudaArray
*
exclusionTiles
;
CudaArray
exclusionTiles
;
CudaArray
*
exclusions
;
CudaArray
exclusions
;
CudaArray
*
exclusionIndices
;
CudaArray
exclusionIndices
;
CudaArray
*
exclusionRowIndices
;
CudaArray
exclusionRowIndices
;
CudaArray
*
interactingTiles
;
CudaArray
interactingTiles
;
CudaArray
*
interactingAtoms
;
CudaArray
interactingAtoms
;
CudaArray
*
interactionCount
;
CudaArray
interactionCount
;
CudaArray
*
singlePairs
;
CudaArray
singlePairs
;
CudaArray
*
singlePairCount
;
CudaArray
singlePairCount
;
CudaArray
*
blockCenter
;
CudaArray
blockCenter
;
CudaArray
*
blockBoundingBox
;
CudaArray
blockBoundingBox
;
CudaArray
*
sortedBlocks
;
CudaArray
sortedBlocks
;
CudaArray
*
sortedBlockCenter
;
CudaArray
sortedBlockCenter
;
CudaArray
*
sortedBlockBoundingBox
;
CudaArray
sortedBlockBoundingBox
;
CudaArray
*
oldPositions
;
CudaArray
oldPositions
;
CudaArray
*
rebuildNeighborList
;
CudaArray
rebuildNeighborList
;
CudaSort
*
blockSorter
;
CudaSort
*
blockSorter
;
CUevent
downloadCountEvent
;
CUevent
downloadCountEvent
;
int
*
pinnedCountBuffer
;
int
*
pinnedCountBuffer
;
...
...
platforms/cuda/include/CudaSort.h
View file @
b8c86406
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2010-201
2
Stanford University and the Authors. *
* Portions copyright (c) 2010-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -87,11 +87,11 @@ public:
...
@@ -87,11 +87,11 @@ public:
private:
private:
CudaContext
&
context
;
CudaContext
&
context
;
SortTrait
*
trait
;
SortTrait
*
trait
;
CudaArray
*
dataRange
;
CudaArray
dataRange
;
CudaArray
*
bucketOfElement
;
CudaArray
bucketOfElement
;
CudaArray
*
offsetInBucket
;
CudaArray
offsetInBucket
;
CudaArray
*
bucketOffset
;
CudaArray
bucketOffset
;
CudaArray
*
buckets
;
CudaArray
buckets
;
CUfunction
shortListKernel
,
computeRangeKernel
,
assignElementsKernel
,
computeBucketPositionsKernel
,
copyToBucketsKernel
,
sortBucketsKernel
;
CUfunction
shortListKernel
,
computeRangeKernel
,
assignElementsKernel
,
computeBucketPositionsKernel
,
copyToBucketsKernel
,
sortBucketsKernel
;
unsigned
int
dataLength
,
rangeKernelSize
,
positionsKernelSize
,
sortKernelSize
;
unsigned
int
dataLength
,
rangeKernelSize
,
positionsKernelSize
,
sortKernelSize
;
bool
isShortList
;
bool
isShortList
;
...
...
platforms/cuda/src/CudaArray.cpp
View file @
b8c86406
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2012 Stanford University and the Authors.
*
* Portions copyright (c) 2012
-2018
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -32,18 +32,15 @@
...
@@ -32,18 +32,15 @@
using
namespace
OpenMM
;
using
namespace
OpenMM
;
CudaArray
::
CudaArray
(
CudaContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
)
:
CudaArray
::
CudaArray
()
:
pointer
(
0
),
ownsMemory
(
false
)
{
context
(
context
),
size
(
size
),
elementSize
(
elementSize
),
name
(
name
),
ownsMemory
(
true
)
{
}
CUresult
result
=
cuMemAlloc
(
&
pointer
,
size
*
elementSize
);
if
(
result
!=
CUDA_SUCCESS
)
{
CudaArray
::
CudaArray
(
CudaContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
)
:
pointer
(
0
)
{
std
::
stringstream
str
;
initialize
(
context
,
size
,
elementSize
,
name
);
str
<<
"Error creating array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
}
}
}
CudaArray
::~
CudaArray
()
{
CudaArray
::~
CudaArray
()
{
if
(
ownsMemory
&&
context
.
getContextIsValid
())
{
if
(
pointer
!=
0
&&
ownsMemory
&&
context
->
getContextIsValid
())
{
CUresult
result
=
cuMemFree
(
pointer
);
CUresult
result
=
cuMemFree
(
pointer
);
if
(
result
!=
CUDA_SUCCESS
)
{
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
std
::
stringstream
str
;
...
@@ -53,12 +50,45 @@ CudaArray::~CudaArray() {
...
@@ -53,12 +50,45 @@ CudaArray::~CudaArray() {
}
}
}
}
void
CudaArray
::
initialize
(
CudaContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
)
{
if
(
this
->
pointer
!=
0
)
throw
OpenMMException
(
"CudaArray has already been initialized"
);
this
->
context
=
&
context
;
this
->
size
=
size
;
this
->
elementSize
=
elementSize
;
this
->
name
=
name
;
ownsMemory
=
true
;
CUresult
result
=
cuMemAlloc
(
&
pointer
,
size
*
elementSize
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error creating array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
}
}
void
CudaArray
::
resize
(
int
size
)
{
if
(
pointer
==
0
)
throw
OpenMMException
(
"CudaArray has not been initialized"
);
if
(
!
ownsMemory
)
throw
OpenMMException
(
"Cannot resize an array that does not own its storage"
);
CUresult
result
=
cuMemFree
(
pointer
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error deleting array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
}
pointer
=
0
;
initialize
(
*
context
,
size
,
elementSize
,
name
);
}
void
CudaArray
::
upload
(
const
void
*
data
,
bool
blocking
)
{
void
CudaArray
::
upload
(
const
void
*
data
,
bool
blocking
)
{
if
(
pointer
==
0
)
throw
OpenMMException
(
"CudaArray has not been initialized"
);
CUresult
result
;
CUresult
result
;
if
(
blocking
)
if
(
blocking
)
result
=
cuMemcpyHtoD
(
pointer
,
data
,
size
*
elementSize
);
result
=
cuMemcpyHtoD
(
pointer
,
data
,
size
*
elementSize
);
else
else
result
=
cuMemcpyHtoDAsync
(
pointer
,
data
,
size
*
elementSize
,
context
.
getCurrentStream
());
result
=
cuMemcpyHtoDAsync
(
pointer
,
data
,
size
*
elementSize
,
context
->
getCurrentStream
());
if
(
result
!=
CUDA_SUCCESS
)
{
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
std
::
stringstream
str
;
str
<<
"Error uploading array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
str
<<
"Error uploading array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
...
@@ -67,11 +97,13 @@ void CudaArray::upload(const void* data, bool blocking) {
...
@@ -67,11 +97,13 @@ void CudaArray::upload(const void* data, bool blocking) {
}
}
void
CudaArray
::
download
(
void
*
data
,
bool
blocking
)
const
{
void
CudaArray
::
download
(
void
*
data
,
bool
blocking
)
const
{
if
(
pointer
==
0
)
throw
OpenMMException
(
"CudaArray has not been initialized"
);
CUresult
result
;
CUresult
result
;
if
(
blocking
)
if
(
blocking
)
result
=
cuMemcpyDtoH
(
data
,
pointer
,
size
*
elementSize
);
result
=
cuMemcpyDtoH
(
data
,
pointer
,
size
*
elementSize
);
else
else
result
=
cuMemcpyDtoHAsync
(
data
,
pointer
,
size
*
elementSize
,
context
.
getCurrentStream
());
result
=
cuMemcpyDtoHAsync
(
data
,
pointer
,
size
*
elementSize
,
context
->
getCurrentStream
());
if
(
result
!=
CUDA_SUCCESS
)
{
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
std
::
stringstream
str
;
str
<<
"Error downloading array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
str
<<
"Error downloading array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
...
@@ -80,9 +112,11 @@ void CudaArray::download(void* data, bool blocking) const {
...
@@ -80,9 +112,11 @@ void CudaArray::download(void* data, bool blocking) const {
}
}
void
CudaArray
::
copyTo
(
CudaArray
&
dest
)
const
{
void
CudaArray
::
copyTo
(
CudaArray
&
dest
)
const
{
if
(
pointer
==
0
)
throw
OpenMMException
(
"CudaArray has not been initialized"
);
if
(
dest
.
getSize
()
!=
size
||
dest
.
getElementSize
()
!=
elementSize
)
if
(
dest
.
getSize
()
!=
size
||
dest
.
getElementSize
()
!=
elementSize
)
throw
OpenMMException
(
"Error copying array "
+
name
+
" to "
+
dest
.
getName
()
+
": The destination array does not match the size of the array"
);
throw
OpenMMException
(
"Error copying array "
+
name
+
" to "
+
dest
.
getName
()
+
": The destination array does not match the size of the array"
);
CUresult
result
=
cuMemcpyDtoDAsync
(
dest
.
getDevicePointer
(),
pointer
,
size
*
elementSize
,
context
.
getCurrentStream
());
CUresult
result
=
cuMemcpyDtoDAsync
(
dest
.
getDevicePointer
(),
pointer
,
size
*
elementSize
,
context
->
getCurrentStream
());
if
(
result
!=
CUDA_SUCCESS
)
{
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
std
::
stringstream
str
;
str
<<
"Error copying array "
<<
name
<<
" to "
<<
dest
.
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
str
<<
"Error copying array "
<<
name
<<
" to "
<<
dest
.
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
...
...
platforms/cuda/src/CudaBondedUtilities.cpp
View file @
b8c86406
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -37,12 +37,6 @@ using namespace std;
...
@@ -37,12 +37,6 @@ using namespace std;
CudaBondedUtilities
::
CudaBondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
numForceBuffers
(
0
),
maxBonds
(
0
),
allGroups
(
0
),
hasInitializedKernels
(
false
)
{
CudaBondedUtilities
::
CudaBondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
numForceBuffers
(
0
),
maxBonds
(
0
),
allGroups
(
0
),
hasInitializedKernels
(
false
)
{
}
}
CudaBondedUtilities
::~
CudaBondedUtilities
()
{
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
(
int
)
atomIndices
[
i
].
size
();
j
++
)
delete
atomIndices
[
i
][
j
];
}
void
CudaBondedUtilities
::
addInteraction
(
const
vector
<
vector
<
int
>
>&
atoms
,
const
string
&
source
,
int
group
)
{
void
CudaBondedUtilities
::
addInteraction
(
const
vector
<
vector
<
int
>
>&
atoms
,
const
string
&
source
,
int
group
)
{
if
(
atoms
.
size
()
>
0
)
{
if
(
atoms
.
size
()
>
0
)
{
forceAtoms
.
push_back
(
atoms
);
forceAtoms
.
push_back
(
atoms
);
...
@@ -99,9 +93,9 @@ void CudaBondedUtilities::initialize(const System& system) {
...
@@ -99,9 +93,9 @@ void CudaBondedUtilities::initialize(const System& system) {
for
(
int
atom
=
0
;
atom
<
width
;
atom
++
)
for
(
int
atom
=
0
;
atom
<
width
;
atom
++
)
indexVec
[
bond
*
paddedWidth
+
atom
]
=
forceAtoms
[
i
][
bond
][
startAtom
+
atom
];
indexVec
[
bond
*
paddedWidth
+
atom
]
=
forceAtoms
[
i
][
bond
][
startAtom
+
atom
];
}
}
CudaArray
*
indices
=
new
CudaArray
(
context
,
numBonds
,
4
*
paddedWidth
,
"bondedIndices"
);
atomIndices
[
i
].
push_back
(
CudaArray
()
);
i
ndices
->
upload
(
&
indexVec
[
0
]
);
atomI
ndices
[
i
].
back
().
initialize
(
context
,
numBonds
,
4
*
paddedWidth
,
"bondedIndices"
);
atomIndices
[
i
].
push_back
(
indices
);
atomIndices
[
i
].
back
().
upload
(
&
indexVec
[
0
]
);
startAtom
+=
width
;
startAtom
+=
width
;
}
}
}
}
...
@@ -115,7 +109,7 @@ void CudaBondedUtilities::initialize(const System& system) {
...
@@ -115,7 +109,7 @@ void CudaBondedUtilities::initialize(const System& system) {
s
<<
"extern
\"
C
\"
__global__ void computeBondedForces(unsigned long long* __restrict__ forceBuffer, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, int groups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ"
;
s
<<
"extern
\"
C
\"
__global__ void computeBondedForces(unsigned long long* __restrict__ forceBuffer, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, int groups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ"
;
for
(
int
force
=
0
;
force
<
numForces
;
force
++
)
{
for
(
int
force
=
0
;
force
<
numForces
;
force
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
[
force
].
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
[
force
].
size
();
i
++
)
{
int
indexWidth
=
atomIndices
[
force
][
i
]
->
getElementSize
()
/
4
;
int
indexWidth
=
atomIndices
[
force
][
i
]
.
getElementSize
()
/
4
;
string
indexType
=
"uint"
+
context
.
intToString
(
indexWidth
);
string
indexType
=
"uint"
+
context
.
intToString
(
indexWidth
);
s
<<
", const "
<<
indexType
<<
"* __restrict__ atomIndices"
<<
force
<<
"_"
<<
i
;
s
<<
", const "
<<
indexType
<<
"* __restrict__ atomIndices"
<<
force
<<
"_"
<<
i
;
}
}
...
@@ -154,7 +148,7 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int
...
@@ -154,7 +148,7 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int
s
<<
"for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < "
<<
numBonds
<<
"; index += blockDim.x*gridDim.x) {
\n
"
;
s
<<
"for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < "
<<
numBonds
<<
"; index += blockDim.x*gridDim.x) {
\n
"
;
int
startAtom
=
0
;
int
startAtom
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
[
forceIndex
].
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
[
forceIndex
].
size
();
i
++
)
{
int
indexWidth
=
atomIndices
[
forceIndex
][
i
]
->
getElementSize
()
/
4
;
int
indexWidth
=
atomIndices
[
forceIndex
][
i
]
.
getElementSize
()
/
4
;
string
indexType
=
"uint"
+
context
.
intToString
(
indexWidth
);
string
indexType
=
"uint"
+
context
.
intToString
(
indexWidth
);
s
<<
" "
<<
indexType
<<
" atoms"
<<
i
<<
" = atomIndices"
<<
forceIndex
<<
"_"
<<
i
<<
"[index];
\n
"
;
s
<<
" "
<<
indexType
<<
" atoms"
<<
i
<<
" = atomIndices"
<<
forceIndex
<<
"_"
<<
i
<<
"[index];
\n
"
;
int
atomsToLoad
=
min
(
indexWidth
,
numAtoms
-
startAtom
);
int
atomsToLoad
=
min
(
indexWidth
,
numAtoms
-
startAtom
);
...
@@ -191,7 +185,7 @@ void CudaBondedUtilities::computeInteractions(int groups) {
...
@@ -191,7 +185,7 @@ void CudaBondedUtilities::computeInteractions(int groups) {
kernelArgs
.
push_back
(
context
.
getPeriodicBoxVecZPointer
());
kernelArgs
.
push_back
(
context
.
getPeriodicBoxVecZPointer
());
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
(
int
)
atomIndices
[
i
].
size
();
j
++
)
for
(
int
j
=
0
;
j
<
(
int
)
atomIndices
[
i
].
size
();
j
++
)
kernelArgs
.
push_back
(
&
atomIndices
[
i
][
j
]
->
getDevicePointer
());
kernelArgs
.
push_back
(
&
atomIndices
[
i
][
j
]
.
getDevicePointer
());
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
kernelArgs
.
push_back
(
&
arguments
[
i
]);
kernelArgs
.
push_back
(
&
arguments
[
i
]);
if
(
energyParameterDerivatives
.
size
()
>
0
)
if
(
energyParameterDerivatives
.
size
()
>
0
)
...
...
platforms/cuda/src/CudaContext.cpp
View file @
b8c86406
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -108,8 +108,7 @@ static int executeInWindows(const string &command) {
...
@@ -108,8 +108,7 @@ static int executeInWindows(const string &command) {
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
,
CudaContext
*
originalContext
)
:
system
(
system
),
currentStream
(
0
),
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
,
CudaContext
*
originalContext
)
:
system
(
system
),
currentStream
(
0
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
hasCompilerKernel
(
false
),
isNvccAvailable
(
false
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
hasCompilerKernel
(
false
),
isNvccAvailable
(
false
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
force
(
NULL
),
energyBuffer
(
NULL
),
energySum
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
chargeBuffer
(
NULL
),
pinnedBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
// Determine what compiler to use.
// Determine what compiler to use.
this
->
compiler
=
"
\"
"
+
compiler
+
"
\"
"
;
this
->
compiler
=
"
\"
"
+
compiler
+
"
\"
"
;
...
@@ -268,8 +267,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -268,8 +267,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines
[
"BALLOT(var)"
]
=
"__ballot(var);"
;
compilationDefines
[
"BALLOT(var)"
]
=
"__ballot(var);"
;
}
}
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
posq
=
CudaArray
::
creat
e
<
double4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posq
.
initializ
e
<
double4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
CudaArray
::
creat
e
<
double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
velm
.
initializ
e
<
double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"USE_DOUBLE_PRECISION"
]
=
"1"
;
compilationDefines
[
"USE_DOUBLE_PRECISION"
]
=
"1"
;
compilationDefines
[
"make_real2"
]
=
"make_double2"
;
compilationDefines
[
"make_real2"
]
=
"make_double2"
;
compilationDefines
[
"make_real3"
]
=
"make_double3"
;
compilationDefines
[
"make_real3"
]
=
"make_double3"
;
...
@@ -279,9 +278,9 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -279,9 +278,9 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines
[
"make_mixed4"
]
=
"make_double4"
;
compilationDefines
[
"make_mixed4"
]
=
"make_double4"
;
}
}
else
if
(
useMixedPrecision
)
{
else
if
(
useMixedPrecision
)
{
posq
=
CudaArray
::
creat
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posq
.
initializ
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posqCorrection
=
CudaArray
::
creat
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"posqCorrection"
);
posqCorrection
.
initializ
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"posqCorrection"
);
velm
=
CudaArray
::
creat
e
<
double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
velm
.
initializ
e
<
double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"USE_MIXED_PRECISION"
]
=
"1"
;
compilationDefines
[
"USE_MIXED_PRECISION"
]
=
"1"
;
compilationDefines
[
"make_real2"
]
=
"make_float2"
;
compilationDefines
[
"make_real2"
]
=
"make_float2"
;
compilationDefines
[
"make_real3"
]
=
"make_float3"
;
compilationDefines
[
"make_real3"
]
=
"make_float3"
;
...
@@ -291,8 +290,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -291,8 +290,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines
[
"make_mixed4"
]
=
"make_double4"
;
compilationDefines
[
"make_mixed4"
]
=
"make_double4"
;
}
}
else
{
else
{
posq
=
CudaArray
::
creat
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posq
.
initializ
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
CudaArray
::
creat
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
velm
.
initializ
e
<
float4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"make_real2"
]
=
"make_float2"
;
compilationDefines
[
"make_real2"
]
=
"make_float2"
;
compilationDefines
[
"make_real3"
]
=
"make_float3"
;
compilationDefines
[
"make_real3"
]
=
"make_float3"
;
compilationDefines
[
"make_real4"
]
=
"make_float4"
;
compilationDefines
[
"make_real4"
]
=
"make_float4"
;
...
@@ -415,24 +414,6 @@ CudaContext::~CudaContext() {
...
@@ -415,24 +414,6 @@ CudaContext::~CudaContext() {
delete
computation
;
delete
computation
;
if
(
pinnedBuffer
!=
NULL
)
if
(
pinnedBuffer
!=
NULL
)
cuMemFreeHost
(
pinnedBuffer
);
cuMemFreeHost
(
pinnedBuffer
);
if
(
posq
!=
NULL
)
delete
posq
;
if
(
posqCorrection
!=
NULL
)
delete
posqCorrection
;
if
(
velm
!=
NULL
)
delete
velm
;
if
(
force
!=
NULL
)
delete
force
;
if
(
energyBuffer
!=
NULL
)
delete
energyBuffer
;
if
(
energySum
!=
NULL
)
delete
energySum
;
if
(
energyParamDerivBuffer
!=
NULL
)
delete
energyParamDerivBuffer
;
if
(
atomIndexDevice
!=
NULL
)
delete
atomIndexDevice
;
if
(
chargeBuffer
!=
NULL
)
delete
chargeBuffer
;
if
(
integration
!=
NULL
)
if
(
integration
!=
NULL
)
delete
integration
;
delete
integration
;
if
(
expression
!=
NULL
)
if
(
expression
!=
NULL
)
...
@@ -456,20 +437,20 @@ void CudaContext::initialize() {
...
@@ -456,20 +437,20 @@ void CudaContext::initialize() {
string
errorMessage
=
"Error initializing Context"
;
string
errorMessage
=
"Error initializing Context"
;
int
numEnergyBuffers
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
int
numEnergyBuffers
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
energyBuffer
=
CudaArray
::
creat
e
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energyBuffer
.
initializ
e
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
=
CudaArray
::
creat
e
<
double
>
(
*
this
,
1
,
"energySum"
);
energySum
.
initializ
e
<
double
>
(
*
this
,
1
,
"energySum"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
}
else
if
(
useMixedPrecision
)
{
else
if
(
useMixedPrecision
)
{
energyBuffer
=
CudaArray
::
creat
e
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energyBuffer
.
initializ
e
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
=
CudaArray
::
creat
e
<
double
>
(
*
this
,
1
,
"energySum"
);
energySum
.
initializ
e
<
double
>
(
*
this
,
1
,
"energySum"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
}
else
{
else
{
energyBuffer
=
CudaArray
::
creat
e
<
float
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energyBuffer
.
initializ
e
<
float
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
=
CudaArray
::
creat
e
<
float
>
(
*
this
,
1
,
"energySum"
);
energySum
.
initializ
e
<
float
>
(
*
this
,
1
,
"energySum"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
6
,
numEnergyBuffers
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
6
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
float
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
float
),
0
));
}
}
...
@@ -480,24 +461,24 @@ void CudaContext::initialize() {
...
@@ -480,24 +461,24 @@ void CudaContext::initialize() {
else
else
((
float4
*
)
pinnedBuffer
)[
i
]
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
mass
==
0.0
?
0.0
f
:
(
float
)
(
1.0
/
mass
));
((
float4
*
)
pinnedBuffer
)[
i
]
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
mass
==
0.0
?
0.0
f
:
(
float
)
(
1.0
/
mass
));
}
}
velm
->
upload
(
pinnedBuffer
);
velm
.
upload
(
pinnedBuffer
);
bonded
->
initialize
(
system
);
bonded
->
initialize
(
system
);
force
=
CudaArray
::
creat
e
<
long
long
>
(
*
this
,
paddedNumAtoms
*
3
,
"force"
);
force
.
initializ
e
<
long
long
>
(
*
this
,
paddedNumAtoms
*
3
,
"force"
);
addAutoclearBuffer
(
force
->
getDevicePointer
(),
force
->
getSize
()
*
force
->
getElementSize
());
addAutoclearBuffer
(
force
.
getDevicePointer
(),
force
.
getSize
()
*
force
.
getElementSize
());
addAutoclearBuffer
(
energyBuffer
->
getDevicePointer
(),
energyBuffer
->
getSize
()
*
energyBuffer
->
getElementSize
());
addAutoclearBuffer
(
energyBuffer
.
getDevicePointer
(),
energyBuffer
.
getSize
()
*
energyBuffer
.
getElementSize
());
int
numEnergyParamDerivs
=
energyParamDerivNames
.
size
();
int
numEnergyParamDerivs
=
energyParamDerivNames
.
size
();
if
(
numEnergyParamDerivs
>
0
)
{
if
(
numEnergyParamDerivs
>
0
)
{
if
(
useDoublePrecision
||
useMixedPrecision
)
if
(
useDoublePrecision
||
useMixedPrecision
)
energyParamDerivBuffer
=
CudaArray
::
creat
e
<
double
>
(
*
this
,
numEnergyParamDerivs
*
numEnergyBuffers
,
"energyParamDerivBuffer"
);
energyParamDerivBuffer
.
initializ
e
<
double
>
(
*
this
,
numEnergyParamDerivs
*
numEnergyBuffers
,
"energyParamDerivBuffer"
);
else
else
energyParamDerivBuffer
=
CudaArray
::
creat
e
<
float
>
(
*
this
,
numEnergyParamDerivs
*
numEnergyBuffers
,
"energyParamDerivBuffer"
);
energyParamDerivBuffer
.
initializ
e
<
float
>
(
*
this
,
numEnergyParamDerivs
*
numEnergyBuffers
,
"energyParamDerivBuffer"
);
addAutoclearBuffer
(
*
energyParamDerivBuffer
);
addAutoclearBuffer
(
energyParamDerivBuffer
);
}
}
atomIndexDevice
=
CudaArray
::
creat
e
<
int
>
(
*
this
,
paddedNumAtoms
,
"atomIndex"
);
atomIndexDevice
.
initializ
e
<
int
>
(
*
this
,
paddedNumAtoms
,
"atomIndex"
);
atomIndex
.
resize
(
paddedNumAtoms
);
atomIndex
.
resize
(
paddedNumAtoms
);
for
(
int
i
=
0
;
i
<
paddedNumAtoms
;
++
i
)
for
(
int
i
=
0
;
i
<
paddedNumAtoms
;
++
i
)
atomIndex
[
i
]
=
i
;
atomIndex
[
i
]
=
i
;
atomIndexDevice
->
upload
(
atomIndex
);
atomIndexDevice
.
upload
(
atomIndex
);
findMoleculeGroups
();
findMoleculeGroups
();
nonbonded
->
initialize
(
system
);
nonbonded
->
initialize
(
system
);
}
}
...
@@ -890,11 +871,11 @@ void CudaContext::clearAutoclearBuffers() {
...
@@ -890,11 +871,11 @@ void CudaContext::clearAutoclearBuffers() {
}
}
double
CudaContext
::
reduceEnergy
()
{
double
CudaContext
::
reduceEnergy
()
{
int
bufferSize
=
energyBuffer
->
getSize
();
int
bufferSize
=
energyBuffer
.
getSize
();
int
workGroupSize
=
512
;
int
workGroupSize
=
512
;
void
*
args
[]
=
{
&
energyBuffer
->
getDevicePointer
(),
&
energySum
->
getDevicePointer
(),
&
bufferSize
,
&
workGroupSize
};
void
*
args
[]
=
{
&
energyBuffer
.
getDevicePointer
(),
&
energySum
.
getDevicePointer
(),
&
bufferSize
,
&
workGroupSize
};
executeKernel
(
reduceEnergyKernel
,
args
,
workGroupSize
,
workGroupSize
,
workGroupSize
*
energyBuffer
->
getElementSize
());
executeKernel
(
reduceEnergyKernel
,
args
,
workGroupSize
,
workGroupSize
,
workGroupSize
*
energyBuffer
.
getElementSize
());
energySum
->
download
(
pinnedBuffer
);
energySum
.
download
(
pinnedBuffer
);
if
(
getUseDoublePrecision
()
||
getUseMixedPrecision
())
if
(
getUseDoublePrecision
()
||
getUseMixedPrecision
())
return
*
((
double
*
)
pinnedBuffer
);
return
*
((
double
*
)
pinnedBuffer
);
else
else
...
@@ -902,21 +883,21 @@ double CudaContext::reduceEnergy() {
...
@@ -902,21 +883,21 @@ double CudaContext::reduceEnergy() {
}
}
void
CudaContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
void
CudaContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
if
(
chargeBuffer
==
NULL
)
if
(
!
chargeBuffer
.
isInitialized
()
)
chargeBuffer
=
new
CudaArray
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
chargeBuffer
.
initialize
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
if
(
getUseDoublePrecision
())
{
if
(
getUseDoublePrecision
())
{
double
*
c
=
(
double
*
)
getPinnedBuffer
();
double
*
c
=
(
double
*
)
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
charges
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
charges
.
size
();
i
++
)
c
[
i
]
=
charges
[
i
];
c
[
i
]
=
charges
[
i
];
chargeBuffer
->
upload
(
c
);
chargeBuffer
.
upload
(
c
);
}
}
else
{
else
{
float
*
c
=
(
float
*
)
getPinnedBuffer
();
float
*
c
=
(
float
*
)
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
charges
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
charges
.
size
();
i
++
)
c
[
i
]
=
(
float
)
charges
[
i
];
c
[
i
]
=
(
float
)
charges
[
i
];
chargeBuffer
->
upload
(
c
);
chargeBuffer
.
upload
(
c
);
}
}
void
*
args
[]
=
{
&
chargeBuffer
->
getDevicePointer
(),
&
posq
->
getDevicePointer
(),
&
atomIndexDevice
->
getDevicePointer
(),
&
numAtoms
};
void
*
args
[]
=
{
&
chargeBuffer
.
getDevicePointer
(),
&
posq
.
getDevicePointer
(),
&
atomIndexDevice
.
getDevicePointer
(),
&
numAtoms
};
executeKernel
(
setChargesKernel
,
args
,
numAtoms
);
executeKernel
(
setChargesKernel
,
args
,
numAtoms
);
}
}
...
@@ -1178,16 +1159,16 @@ bool CudaContext::invalidateMolecules(CudaForceInfo* force) {
...
@@ -1178,16 +1159,16 @@ bool CudaContext::invalidateMolecules(CudaForceInfo* force) {
vector
<
double4
>
newPosq
(
paddedNumAtoms
,
make_double4
(
0
,
0
,
0
,
0
));
vector
<
double4
>
newPosq
(
paddedNumAtoms
,
make_double4
(
0
,
0
,
0
,
0
));
vector
<
double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
double4
>
newVelm
(
paddedNumAtoms
,
make_double4
(
0
,
0
,
0
,
0
));
vector
<
double4
>
newVelm
(
paddedNumAtoms
,
make_double4
(
0
,
0
,
0
,
0
));
posq
->
download
(
oldPosq
);
posq
.
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
velm
.
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
}
posq
->
upload
(
newPosq
);
posq
.
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
velm
.
upload
(
newVelm
);
}
}
else
if
(
useMixedPrecision
)
{
else
if
(
useMixedPrecision
)
{
vector
<
float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
float4
>
oldPosq
(
paddedNumAtoms
);
...
@@ -1196,8 +1177,8 @@ bool CudaContext::invalidateMolecules(CudaForceInfo* force) {
...
@@ -1196,8 +1177,8 @@ bool CudaContext::invalidateMolecules(CudaForceInfo* force) {
vector
<
float4
>
newPosqCorrection
(
paddedNumAtoms
,
make_float4
(
0
,
0
,
0
,
0
));
vector
<
float4
>
newPosqCorrection
(
paddedNumAtoms
,
make_float4
(
0
,
0
,
0
,
0
));
vector
<
double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
double4
>
newVelm
(
paddedNumAtoms
,
make_double4
(
0
,
0
,
0
,
0
));
vector
<
double4
>
newVelm
(
paddedNumAtoms
,
make_double4
(
0
,
0
,
0
,
0
));
posq
->
download
(
oldPosq
);
posq
.
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
velm
.
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
...
@@ -1205,31 +1186,31 @@ bool CudaContext::invalidateMolecules(CudaForceInfo* force) {
...
@@ -1205,31 +1186,31 @@ bool CudaContext::invalidateMolecules(CudaForceInfo* force) {
newVelm
[
index
]
=
oldVelm
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
}
posq
->
upload
(
newPosq
);
posq
.
upload
(
newPosq
);
posqCorrection
->
upload
(
newPosqCorrection
);
posqCorrection
.
upload
(
newPosqCorrection
);
velm
->
upload
(
newVelm
);
velm
.
upload
(
newVelm
);
}
}
else
{
else
{
vector
<
float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
float4
>
newPosq
(
paddedNumAtoms
,
make_float4
(
0
,
0
,
0
,
0
));
vector
<
float4
>
newPosq
(
paddedNumAtoms
,
make_float4
(
0
,
0
,
0
,
0
));
vector
<
float4
>
oldVelm
(
paddedNumAtoms
);
vector
<
float4
>
oldVelm
(
paddedNumAtoms
);
vector
<
float4
>
newVelm
(
paddedNumAtoms
,
make_float4
(
0
,
0
,
0
,
0
));
vector
<
float4
>
newVelm
(
paddedNumAtoms
,
make_float4
(
0
,
0
,
0
,
0
));
posq
->
download
(
oldPosq
);
posq
.
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
velm
.
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
}
posq
->
upload
(
newPosq
);
posq
.
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
velm
.
upload
(
newVelm
);
}
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
atomIndex
[
i
]
=
i
;
atomIndex
[
i
]
=
i
;
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
}
}
atomIndexDevice
->
upload
(
atomIndex
);
atomIndexDevice
.
upload
(
atomIndex
);
findMoleculeGroups
();
findMoleculeGroups
();
for
(
auto
listener
:
reorderListeners
)
for
(
auto
listener
:
reorderListeners
)
listener
->
execute
();
listener
->
execute
();
...
@@ -1262,10 +1243,10 @@ void CudaContext::reorderAtomsImpl() {
...
@@ -1262,10 +1243,10 @@ void CudaContext::reorderAtomsImpl() {
vector
<
Real4
>
oldPosqCorrection
(
paddedNumAtoms
,
padding
);
vector
<
Real4
>
oldPosqCorrection
(
paddedNumAtoms
,
padding
);
Mixed4
paddingMixed
=
{
0
,
0
,
0
,
0
};
Mixed4
paddingMixed
=
{
0
,
0
,
0
,
0
};
vector
<
Mixed4
>
oldVelm
(
paddedNumAtoms
,
paddingMixed
);
vector
<
Mixed4
>
oldVelm
(
paddedNumAtoms
,
paddingMixed
);
posq
->
download
(
oldPosq
);
posq
.
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
velm
.
download
(
oldVelm
);
if
(
useMixedPrecision
)
if
(
useMixedPrecision
)
posqCorrection
->
download
(
oldPosqCorrection
);
posqCorrection
.
download
(
oldPosqCorrection
);
Real
minx
=
oldPosq
[
0
].
x
,
maxx
=
oldPosq
[
0
].
x
;
Real
minx
=
oldPosq
[
0
].
x
,
maxx
=
oldPosq
[
0
].
x
;
Real
miny
=
oldPosq
[
0
].
y
,
maxy
=
oldPosq
[
0
].
y
;
Real
miny
=
oldPosq
[
0
].
y
,
maxy
=
oldPosq
[
0
].
y
;
Real
minz
=
oldPosq
[
0
].
z
,
maxz
=
oldPosq
[
0
].
z
;
Real
minz
=
oldPosq
[
0
].
z
,
maxz
=
oldPosq
[
0
].
z
;
...
@@ -1409,11 +1390,11 @@ void CudaContext::reorderAtomsImpl() {
...
@@ -1409,11 +1390,11 @@ void CudaContext::reorderAtomsImpl() {
atomIndex
[
i
]
=
originalIndex
[
i
];
atomIndex
[
i
]
=
originalIndex
[
i
];
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
}
}
posq
->
upload
(
newPosq
);
posq
.
upload
(
newPosq
);
if
(
useMixedPrecision
)
if
(
useMixedPrecision
)
posqCorrection
->
upload
(
newPosqCorrection
);
posqCorrection
.
upload
(
newPosqCorrection
);
velm
->
upload
(
newVelm
);
velm
.
upload
(
newVelm
);
atomIndexDevice
->
upload
(
atomIndex
);
atomIndexDevice
.
upload
(
atomIndex
);
for
(
auto
listener
:
reorderListeners
)
for
(
auto
listener
:
reorderListeners
)
listener
->
execute
();
listener
->
execute
();
}
}
...
...
platforms/cuda/src/CudaIntegrationUtilities.cpp
View file @
b8c86406
This diff is collapsed.
Click to expand it.
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
b8c86406
...
@@ -63,10 +63,7 @@ private:
...
@@ -63,10 +63,7 @@ private:
};
};
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
blockSorter
(
NULL
),
pinnedCountBuffer
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
),
canUsePairList
(
true
)
{
interactionCount
(
NULL
),
singlePairs
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
pinnedCountBuffer
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
),
canUsePairList
(
true
)
{
// Decide how many thread blocks to use.
// Decide how many thread blocks to use.
string
errorMessage
=
"Error initializing nonbonded utilities"
;
string
errorMessage
=
"Error initializing nonbonded utilities"
;
...
@@ -79,36 +76,6 @@ CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(c
...
@@ -79,36 +76,6 @@ CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(c
}
}
CudaNonbondedUtilities
::~
CudaNonbondedUtilities
()
{
CudaNonbondedUtilities
::~
CudaNonbondedUtilities
()
{
if
(
exclusionIndices
!=
NULL
)
delete
exclusionIndices
;
if
(
exclusionRowIndices
!=
NULL
)
delete
exclusionRowIndices
;
if
(
exclusionTiles
!=
NULL
)
delete
exclusionTiles
;
if
(
exclusions
!=
NULL
)
delete
exclusions
;
if
(
interactingTiles
!=
NULL
)
delete
interactingTiles
;
if
(
interactingAtoms
!=
NULL
)
delete
interactingAtoms
;
if
(
interactionCount
!=
NULL
)
delete
interactionCount
;
if
(
singlePairs
!=
NULL
)
delete
singlePairs
;
if
(
blockCenter
!=
NULL
)
delete
blockCenter
;
if
(
blockBoundingBox
!=
NULL
)
delete
blockBoundingBox
;
if
(
sortedBlocks
!=
NULL
)
delete
sortedBlocks
;
if
(
sortedBlockCenter
!=
NULL
)
delete
sortedBlockCenter
;
if
(
sortedBlockBoundingBox
!=
NULL
)
delete
sortedBlockBoundingBox
;
if
(
oldPositions
!=
NULL
)
delete
oldPositions
;
if
(
rebuildNeighborList
!=
NULL
)
delete
rebuildNeighborList
;
if
(
blockSorter
!=
NULL
)
if
(
blockSorter
!=
NULL
)
delete
blockSorter
;
delete
blockSorter
;
if
(
pinnedCountBuffer
!=
NULL
)
if
(
pinnedCountBuffer
!=
NULL
)
...
@@ -220,8 +187,8 @@ void CudaNonbondedUtilities::initialize(const System& system) {
...
@@ -220,8 +187,8 @@ void CudaNonbondedUtilities::initialize(const System& system) {
for
(
set
<
pair
<
int
,
int
>
>::
const_iterator
iter
=
tilesWithExclusions
.
begin
();
iter
!=
tilesWithExclusions
.
end
();
++
iter
)
for
(
set
<
pair
<
int
,
int
>
>::
const_iterator
iter
=
tilesWithExclusions
.
begin
();
iter
!=
tilesWithExclusions
.
end
();
++
iter
)
exclusionTilesVec
.
push_back
(
make_ushort2
((
unsigned
short
)
iter
->
first
,
(
unsigned
short
)
iter
->
second
));
exclusionTilesVec
.
push_back
(
make_ushort2
((
unsigned
short
)
iter
->
first
,
(
unsigned
short
)
iter
->
second
));
sort
(
exclusionTilesVec
.
begin
(),
exclusionTilesVec
.
end
(),
compareUshort2
);
sort
(
exclusionTilesVec
.
begin
(),
exclusionTilesVec
.
end
(),
compareUshort2
);
exclusionTiles
=
CudaArray
::
creat
e
<
ushort2
>
(
context
,
exclusionTilesVec
.
size
(),
"exclusionTiles"
);
exclusionTiles
.
initializ
e
<
ushort2
>
(
context
,
exclusionTilesVec
.
size
(),
"exclusionTiles"
);
exclusionTiles
->
upload
(
exclusionTilesVec
);
exclusionTiles
.
upload
(
exclusionTilesVec
);
map
<
pair
<
int
,
int
>
,
int
>
exclusionTileMap
;
map
<
pair
<
int
,
int
>
,
int
>
exclusionTileMap
;
for
(
int
i
=
0
;
i
<
(
int
)
exclusionTilesVec
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
exclusionTilesVec
.
size
();
i
++
)
{
ushort2
tile
=
exclusionTilesVec
[
i
];
ushort2
tile
=
exclusionTilesVec
[
i
];
...
@@ -242,16 +209,16 @@ void CudaNonbondedUtilities::initialize(const System& system) {
...
@@ -242,16 +209,16 @@ void CudaNonbondedUtilities::initialize(const System& system) {
maxExclusions
=
0
;
maxExclusions
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
exclusionBlocksForBlock
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
exclusionBlocksForBlock
.
size
();
i
++
)
maxExclusions
=
(
maxExclusions
>
exclusionBlocksForBlock
[
i
].
size
()
?
maxExclusions
:
exclusionBlocksForBlock
[
i
].
size
());
maxExclusions
=
(
maxExclusions
>
exclusionBlocksForBlock
[
i
].
size
()
?
maxExclusions
:
exclusionBlocksForBlock
[
i
].
size
());
exclusionIndices
=
CudaArray
::
creat
e
<
unsigned
int
>
(
context
,
exclusionIndicesVec
.
size
(),
"exclusionIndices"
);
exclusionIndices
.
initializ
e
<
unsigned
int
>
(
context
,
exclusionIndicesVec
.
size
(),
"exclusionIndices"
);
exclusionRowIndices
=
CudaArray
::
creat
e
<
unsigned
int
>
(
context
,
exclusionRowIndicesVec
.
size
(),
"exclusionRowIndices"
);
exclusionRowIndices
.
initializ
e
<
unsigned
int
>
(
context
,
exclusionRowIndicesVec
.
size
(),
"exclusionRowIndices"
);
exclusionIndices
->
upload
(
exclusionIndicesVec
);
exclusionIndices
.
upload
(
exclusionIndicesVec
);
exclusionRowIndices
->
upload
(
exclusionRowIndicesVec
);
exclusionRowIndices
.
upload
(
exclusionRowIndicesVec
);
// Record the exclusion data.
// Record the exclusion data.
exclusions
=
CudaArray
::
creat
e
<
tileflags
>
(
context
,
tilesWithExclusions
.
size
()
*
CudaContext
::
TileSize
,
"exclusions"
);
exclusions
.
initializ
e
<
tileflags
>
(
context
,
tilesWithExclusions
.
size
()
*
CudaContext
::
TileSize
,
"exclusions"
);
tileflags
allFlags
=
(
tileflags
)
-
1
;
tileflags
allFlags
=
(
tileflags
)
-
1
;
vector
<
tileflags
>
exclusionVec
(
exclusions
->
getSize
(),
allFlags
);
vector
<
tileflags
>
exclusionVec
(
exclusions
.
getSize
(),
allFlags
);
for
(
int
atom1
=
0
;
atom1
<
(
int
)
atomExclusions
.
size
();
++
atom1
)
{
for
(
int
atom1
=
0
;
atom1
<
(
int
)
atomExclusions
.
size
();
++
atom1
)
{
int
x
=
atom1
/
CudaContext
::
TileSize
;
int
x
=
atom1
/
CudaContext
::
TileSize
;
int
offset1
=
atom1
-
x
*
CudaContext
::
TileSize
;
int
offset1
=
atom1
-
x
*
CudaContext
::
TileSize
;
...
@@ -270,7 +237,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
...
@@ -270,7 +237,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
}
}
}
}
atomExclusions
.
clear
();
// We won't use this again, so free the memory it used
atomExclusions
.
clear
();
// We won't use this again, so free the memory it used
exclusions
->
upload
(
exclusionVec
);
exclusions
.
upload
(
exclusionVec
);
// Create data structures for the neighbor list.
// Create data structures for the neighbor list.
...
@@ -284,21 +251,21 @@ void CudaNonbondedUtilities::initialize(const System& system) {
...
@@ -284,21 +251,21 @@ void CudaNonbondedUtilities::initialize(const System& system) {
if
(
maxTiles
<
1
)
if
(
maxTiles
<
1
)
maxTiles
=
1
;
maxTiles
=
1
;
maxSinglePairs
=
5
*
numAtoms
;
maxSinglePairs
=
5
*
numAtoms
;
interactingTiles
=
CudaArray
::
creat
e
<
int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingTiles
.
initializ
e
<
int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
=
CudaArray
::
creat
e
<
int
>
(
context
,
CudaContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
interactingAtoms
.
initializ
e
<
int
>
(
context
,
CudaContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
interactionCount
=
CudaArray
::
creat
e
<
unsigned
int
>
(
context
,
2
,
"interactionCount"
);
interactionCount
.
initializ
e
<
unsigned
int
>
(
context
,
2
,
"interactionCount"
);
singlePairs
=
CudaArray
::
creat
e
<
int2
>
(
context
,
maxSinglePairs
,
"singlePairs"
);
singlePairs
.
initializ
e
<
int2
>
(
context
,
maxSinglePairs
,
"singlePairs"
);
int
elementSize
=
(
context
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
int
elementSize
=
(
context
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
blockCenter
=
new
CudaArray
(
context
,
numAtomBlocks
,
4
*
elementSize
,
"blockCenter"
);
blockCenter
.
initialize
(
context
,
numAtomBlocks
,
4
*
elementSize
,
"blockCenter"
);
blockBoundingBox
=
new
CudaArray
(
context
,
numAtomBlocks
,
4
*
elementSize
,
"blockBoundingBox"
);
blockBoundingBox
.
initialize
(
context
,
numAtomBlocks
,
4
*
elementSize
,
"blockBoundingBox"
);
sortedBlocks
=
new
CudaArray
(
context
,
numAtomBlocks
,
2
*
elementSize
,
"sortedBlocks"
);
sortedBlocks
.
initialize
(
context
,
numAtomBlocks
,
2
*
elementSize
,
"sortedBlocks"
);
sortedBlockCenter
=
new
CudaArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockCenter"
);
sortedBlockCenter
.
initialize
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockCenter"
);
sortedBlockBoundingBox
=
new
CudaArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockBoundingBox"
);
sortedBlockBoundingBox
.
initialize
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockBoundingBox"
);
oldPositions
=
new
CudaArray
(
context
,
numAtoms
,
4
*
elementSize
,
"oldPositions"
);
oldPositions
.
initialize
(
context
,
numAtoms
,
4
*
elementSize
,
"oldPositions"
);
rebuildNeighborList
=
CudaArray
::
creat
e
<
int
>
(
context
,
1
,
"rebuildNeighborList"
);
rebuildNeighborList
.
initializ
e
<
int
>
(
context
,
1
,
"rebuildNeighborList"
);
blockSorter
=
new
CudaSort
(
context
,
new
BlockSortTrait
(
context
.
getUseDoublePrecision
()),
numAtomBlocks
);
blockSorter
=
new
CudaSort
(
context
,
new
BlockSortTrait
(
context
.
getUseDoublePrecision
()),
numAtomBlocks
);
vector
<
unsigned
int
>
count
(
2
,
0
);
vector
<
unsigned
int
>
count
(
2
,
0
);
interactionCount
->
upload
(
count
);
interactionCount
.
upload
(
count
);
}
}
// Record arguments for kernels.
// Record arguments for kernels.
...
@@ -306,24 +273,24 @@ void CudaNonbondedUtilities::initialize(const System& system) {
...
@@ -306,24 +273,24 @@ void CudaNonbondedUtilities::initialize(const System& system) {
forceArgs
.
push_back
(
&
context
.
getForce
().
getDevicePointer
());
forceArgs
.
push_back
(
&
context
.
getForce
().
getDevicePointer
());
forceArgs
.
push_back
(
&
context
.
getEnergyBuffer
().
getDevicePointer
());
forceArgs
.
push_back
(
&
context
.
getEnergyBuffer
().
getDevicePointer
());
forceArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
forceArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
forceArgs
.
push_back
(
&
exclusions
->
getDevicePointer
());
forceArgs
.
push_back
(
&
exclusions
.
getDevicePointer
());
forceArgs
.
push_back
(
&
exclusionTiles
->
getDevicePointer
());
forceArgs
.
push_back
(
&
exclusionTiles
.
getDevicePointer
());
forceArgs
.
push_back
(
&
startTileIndex
);
forceArgs
.
push_back
(
&
startTileIndex
);
forceArgs
.
push_back
(
&
numTiles
);
forceArgs
.
push_back
(
&
numTiles
);
if
(
useCutoff
)
{
if
(
useCutoff
)
{
forceArgs
.
push_back
(
&
interactingTiles
->
getDevicePointer
());
forceArgs
.
push_back
(
&
interactingTiles
.
getDevicePointer
());
forceArgs
.
push_back
(
&
interactionCount
->
getDevicePointer
());
forceArgs
.
push_back
(
&
interactionCount
.
getDevicePointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxVecXPointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxVecXPointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxVecYPointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxVecYPointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxVecZPointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxVecZPointer
());
forceArgs
.
push_back
(
&
maxTiles
);
forceArgs
.
push_back
(
&
maxTiles
);
forceArgs
.
push_back
(
&
blockCenter
->
getDevicePointer
());
forceArgs
.
push_back
(
&
blockCenter
.
getDevicePointer
());
forceArgs
.
push_back
(
&
blockBoundingBox
->
getDevicePointer
());
forceArgs
.
push_back
(
&
blockBoundingBox
.
getDevicePointer
());
forceArgs
.
push_back
(
&
interactingAtoms
->
getDevicePointer
());
forceArgs
.
push_back
(
&
interactingAtoms
.
getDevicePointer
());
forceArgs
.
push_back
(
&
maxSinglePairs
);
forceArgs
.
push_back
(
&
maxSinglePairs
);
forceArgs
.
push_back
(
&
singlePairs
->
getDevicePointer
());
forceArgs
.
push_back
(
&
singlePairs
.
getDevicePointer
());
}
}
for
(
int
i
=
0
;
i
<
(
int
)
parameters
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
parameters
.
size
();
i
++
)
forceArgs
.
push_back
(
&
parameters
[
i
].
getMemory
());
forceArgs
.
push_back
(
&
parameters
[
i
].
getMemory
());
...
@@ -339,41 +306,41 @@ void CudaNonbondedUtilities::initialize(const System& system) {
...
@@ -339,41 +306,41 @@ void CudaNonbondedUtilities::initialize(const System& system) {
findBlockBoundsArgs
.
push_back
(
context
.
getPeriodicBoxVecYPointer
());
findBlockBoundsArgs
.
push_back
(
context
.
getPeriodicBoxVecYPointer
());
findBlockBoundsArgs
.
push_back
(
context
.
getPeriodicBoxVecZPointer
());
findBlockBoundsArgs
.
push_back
(
context
.
getPeriodicBoxVecZPointer
());
findBlockBoundsArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
blockCenter
->
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
blockCenter
.
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
blockBoundingBox
->
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
blockBoundingBox
.
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
rebuildNeighborList
->
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
rebuildNeighborList
.
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
sortedBlocks
->
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
sortedBlocks
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
sortedBlocks
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
sortedBlocks
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
blockCenter
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
blockCenter
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
blockBoundingBox
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
blockBoundingBox
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
sortedBlockCenter
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
sortedBlockCenter
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
sortedBlockBoundingBox
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
sortedBlockBoundingBox
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
oldPositions
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
oldPositions
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
interactionCount
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
interactionCount
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
rebuildNeighborList
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
rebuildNeighborList
.
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
forceRebuildNeighborList
);
sortBoxDataArgs
.
push_back
(
&
forceRebuildNeighborList
);
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxVecXPointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxVecXPointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxVecYPointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxVecYPointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxVecZPointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxVecZPointer
());
findInteractingBlocksArgs
.
push_back
(
&
interactionCount
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
interactionCount
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
interactingTiles
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
interactingTiles
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
interactingAtoms
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
interactingAtoms
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
singlePairs
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
singlePairs
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
maxTiles
);
findInteractingBlocksArgs
.
push_back
(
&
maxTiles
);
findInteractingBlocksArgs
.
push_back
(
&
maxSinglePairs
);
findInteractingBlocksArgs
.
push_back
(
&
maxSinglePairs
);
findInteractingBlocksArgs
.
push_back
(
&
startBlockIndex
);
findInteractingBlocksArgs
.
push_back
(
&
startBlockIndex
);
findInteractingBlocksArgs
.
push_back
(
&
numBlocks
);
findInteractingBlocksArgs
.
push_back
(
&
numBlocks
);
findInteractingBlocksArgs
.
push_back
(
&
sortedBlocks
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
sortedBlocks
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
sortedBlockCenter
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
sortedBlockCenter
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
sortedBlockBoundingBox
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
sortedBlockBoundingBox
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
exclusionIndices
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
exclusionIndices
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
exclusionRowIndices
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
exclusionRowIndices
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
oldPositions
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
oldPositions
.
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
rebuildNeighborList
->
getDevicePointer
());
findInteractingBlocksArgs
.
push_back
(
&
rebuildNeighborList
.
getDevicePointer
());
}
}
}
}
...
@@ -406,12 +373,12 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
...
@@ -406,12 +373,12 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
forceRebuildNeighborList
=
true
;
forceRebuildNeighborList
=
true
;
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
&
findBlockBoundsArgs
[
0
],
context
.
getNumAtoms
());
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
&
findBlockBoundsArgs
[
0
],
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
blockSorter
->
sort
(
sortedBlocks
);
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
&
sortBoxDataArgs
[
0
],
context
.
getNumAtoms
());
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
&
sortBoxDataArgs
[
0
],
context
.
getNumAtoms
());
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
&
findInteractingBlocksArgs
[
0
],
context
.
getNumAtoms
(),
256
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
&
findInteractingBlocksArgs
[
0
],
context
.
getNumAtoms
(),
256
);
forceRebuildNeighborList
=
false
;
forceRebuildNeighborList
=
false
;
lastCutoff
=
kernels
.
cutoffDistance
;
lastCutoff
=
kernels
.
cutoffDistance
;
interactionCount
->
download
(
pinnedCountBuffer
,
false
);
interactionCount
.
download
(
pinnedCountBuffer
,
false
);
cuEventRecord
(
downloadCountEvent
,
context
.
getCurrentStream
());
cuEventRecord
(
downloadCountEvent
,
context
.
getCurrentStream
());
}
}
...
@@ -445,27 +412,21 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
...
@@ -445,27 +412,21 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
if
(
maxTiles
>
totalTiles
)
if
(
maxTiles
>
totalTiles
)
maxTiles
=
totalTiles
;
maxTiles
=
totalTiles
;
delete
interactingTiles
;
interactingTiles
.
resize
(
maxTiles
);
delete
interactingAtoms
;
interactingAtoms
.
resize
(
CudaContext
::
TileSize
*
maxTiles
);
interactingTiles
=
NULL
;
// Avoid an error in the destructor if the following allocation fails
interactingAtoms
=
NULL
;
interactingTiles
=
CudaArray
::
create
<
int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
=
CudaArray
::
create
<
int
>
(
context
,
CudaContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
if
(
forceArgs
.
size
()
>
0
)
if
(
forceArgs
.
size
()
>
0
)
forceArgs
[
7
]
=
&
interactingTiles
->
getDevicePointer
();
forceArgs
[
7
]
=
&
interactingTiles
.
getDevicePointer
();
findInteractingBlocksArgs
[
6
]
=
&
interactingTiles
->
getDevicePointer
();
findInteractingBlocksArgs
[
6
]
=
&
interactingTiles
.
getDevicePointer
();
if
(
forceArgs
.
size
()
>
0
)
if
(
forceArgs
.
size
()
>
0
)
forceArgs
[
17
]
=
&
interactingAtoms
->
getDevicePointer
();
forceArgs
[
17
]
=
&
interactingAtoms
.
getDevicePointer
();
findInteractingBlocksArgs
[
7
]
=
&
interactingAtoms
->
getDevicePointer
();
findInteractingBlocksArgs
[
7
]
=
&
interactingAtoms
.
getDevicePointer
();
}
}
if
(
pinnedCountBuffer
[
1
]
>
maxSinglePairs
)
{
if
(
pinnedCountBuffer
[
1
]
>
maxSinglePairs
)
{
maxSinglePairs
=
(
int
)
(
1.2
*
pinnedCountBuffer
[
1
]);
maxSinglePairs
=
(
int
)
(
1.2
*
pinnedCountBuffer
[
1
]);
delete
singlePairs
;
singlePairs
.
resize
(
maxSinglePairs
);
singlePairs
=
NULL
;
// Avoid an error in the destructor if the following allocation fails
singlePairs
=
CudaArray
::
create
<
int2
>
(
context
,
maxSinglePairs
,
"singlePairs"
);
if
(
forceArgs
.
size
()
>
0
)
if
(
forceArgs
.
size
()
>
0
)
forceArgs
[
19
]
=
&
singlePairs
->
getDevicePointer
();
forceArgs
[
19
]
=
&
singlePairs
.
getDevicePointer
();
findInteractingBlocksArgs
[
8
]
=
&
singlePairs
->
getDevicePointer
();
findInteractingBlocksArgs
[
8
]
=
&
singlePairs
.
getDevicePointer
();
}
}
forceRebuildNeighborList
=
true
;
forceRebuildNeighborList
=
true
;
context
.
setForcesValid
(
false
);
context
.
setForcesValid
(
false
);
...
@@ -510,7 +471,7 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
...
@@ -510,7 +471,7 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
defines
[
"PADDING"
]
=
context
.
doubleToString
(
padding
);
defines
[
"PADDING"
]
=
context
.
doubleToString
(
padding
);
defines
[
"PADDED_CUTOFF"
]
=
context
.
doubleToString
(
paddedCutoff
);
defines
[
"PADDED_CUTOFF"
]
=
context
.
doubleToString
(
paddedCutoff
);
defines
[
"PADDED_CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
paddedCutoff
*
paddedCutoff
);
defines
[
"PADDED_CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
paddedCutoff
*
paddedCutoff
);
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
exclusionTiles
->
getSize
());
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
exclusionTiles
.
getSize
());
if
(
usePeriodic
)
if
(
usePeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines
[
"USE_PERIODIC"
]
=
"1"
;
if
(
context
.
getBoxIsTriclinic
())
if
(
context
.
getBoxIsTriclinic
())
...
@@ -735,7 +696,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
...
@@ -735,7 +696,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"TILE_SIZE"
]
=
context
.
intToString
(
CudaContext
::
TileSize
);
defines
[
"TILE_SIZE"
]
=
context
.
intToString
(
CudaContext
::
TileSize
);
int
numExclusionTiles
=
exclusionTiles
->
getSize
();
int
numExclusionTiles
=
exclusionTiles
.
getSize
();
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
numExclusionTiles
);
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
numExclusionTiles
);
int
numContexts
=
context
.
getPlatformData
().
contexts
.
size
();
int
numContexts
=
context
.
getPlatformData
().
contexts
.
size
();
int
startExclusionIndex
=
context
.
getContextIndex
()
*
numExclusionTiles
/
numContexts
;
int
startExclusionIndex
=
context
.
getContextIndex
()
*
numExclusionTiles
/
numContexts
;
...
...
platforms/cuda/src/CudaSort.cpp
View file @
b8c86406
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2010-201
2
Stanford University and the Authors. *
* Portions copyright (c) 2010-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -31,8 +31,7 @@
...
@@ -31,8 +31,7 @@
using
namespace
OpenMM
;
using
namespace
OpenMM
;
using
namespace
std
;
using
namespace
std
;
CudaSort
::
CudaSort
(
CudaContext
&
context
,
SortTrait
*
trait
,
unsigned
int
length
)
:
context
(
context
),
trait
(
trait
),
CudaSort
::
CudaSort
(
CudaContext
&
context
,
SortTrait
*
trait
,
unsigned
int
length
)
:
context
(
context
),
trait
(
trait
),
dataLength
(
length
)
{
dataRange
(
NULL
),
bucketOfElement
(
NULL
),
offsetInBucket
(
NULL
),
bucketOffset
(
NULL
),
buckets
(
NULL
),
dataLength
(
length
)
{
// Create kernels.
// Create kernels.
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
...
@@ -76,26 +75,16 @@ CudaSort::CudaSort(CudaContext& context, SortTrait* trait, unsigned int length)
...
@@ -76,26 +75,16 @@ CudaSort::CudaSort(CudaContext& context, SortTrait* trait, unsigned int length)
// Create workspace arrays.
// Create workspace arrays.
if
(
!
isShortList
)
{
if
(
!
isShortList
)
{
dataRange
=
new
CudaArray
(
context
,
2
,
trait
->
getKeySize
(),
"sortDataRange"
);
dataRange
.
initialize
(
context
,
2
,
trait
->
getKeySize
(),
"sortDataRange"
);
bucketOffset
=
CudaArray
::
creat
e
<
uint1
>
(
context
,
numBuckets
,
"bucketOffset"
);
bucketOffset
.
initializ
e
<
uint1
>
(
context
,
numBuckets
,
"bucketOffset"
);
bucketOfElement
=
CudaArray
::
creat
e
<
uint1
>
(
context
,
length
,
"bucketOfElement"
);
bucketOfElement
.
initializ
e
<
uint1
>
(
context
,
length
,
"bucketOfElement"
);
offsetInBucket
=
CudaArray
::
creat
e
<
uint1
>
(
context
,
length
,
"offsetInBucket"
);
offsetInBucket
.
initializ
e
<
uint1
>
(
context
,
length
,
"offsetInBucket"
);
buckets
=
new
CudaArray
(
context
,
length
,
trait
->
getDataSize
(),
"buckets"
);
buckets
.
initialize
(
context
,
length
,
trait
->
getDataSize
(),
"buckets"
);
}
}
}
}
CudaSort
::~
CudaSort
()
{
CudaSort
::~
CudaSort
()
{
delete
trait
;
delete
trait
;
if
(
dataRange
!=
NULL
)
delete
dataRange
;
if
(
bucketOfElement
!=
NULL
)
delete
bucketOfElement
;
if
(
offsetInBucket
!=
NULL
)
delete
offsetInBucket
;
if
(
bucketOffset
!=
NULL
)
delete
bucketOffset
;
if
(
buckets
!=
NULL
)
delete
buckets
;
}
}
void
CudaSort
::
sort
(
CudaArray
&
data
)
{
void
CudaSort
::
sort
(
CudaArray
&
data
)
{
...
@@ -112,30 +101,30 @@ void CudaSort::sort(CudaArray& data) {
...
@@ -112,30 +101,30 @@ void CudaSort::sort(CudaArray& data) {
else
{
else
{
// Compute the range of data values.
// Compute the range of data values.
unsigned
int
numBuckets
=
bucketOffset
->
getSize
();
unsigned
int
numBuckets
=
bucketOffset
.
getSize
();
void
*
rangeArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
dataLength
,
&
dataRange
->
getDevicePointer
(),
&
numBuckets
,
&
bucketOffset
->
getDevicePointer
()};
void
*
rangeArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
dataLength
,
&
dataRange
.
getDevicePointer
(),
&
numBuckets
,
&
bucketOffset
.
getDevicePointer
()};
context
.
executeKernel
(
computeRangeKernel
,
rangeArgs
,
rangeKernelSize
,
rangeKernelSize
,
2
*
rangeKernelSize
*
trait
->
getKeySize
());
context
.
executeKernel
(
computeRangeKernel
,
rangeArgs
,
rangeKernelSize
,
rangeKernelSize
,
2
*
rangeKernelSize
*
trait
->
getKeySize
());
// Assign array elements to buckets.
// Assign array elements to buckets.
void
*
elementsArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
dataLength
,
&
numBuckets
,
&
dataRange
->
getDevicePointer
(),
void
*
elementsArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
dataLength
,
&
numBuckets
,
&
dataRange
.
getDevicePointer
(),
&
bucketOffset
->
getDevicePointer
(),
&
bucketOfElement
->
getDevicePointer
(),
&
offsetInBucket
->
getDevicePointer
()};
&
bucketOffset
.
getDevicePointer
(),
&
bucketOfElement
.
getDevicePointer
(),
&
offsetInBucket
.
getDevicePointer
()};
context
.
executeKernel
(
assignElementsKernel
,
elementsArgs
,
data
.
getSize
(),
128
);
context
.
executeKernel
(
assignElementsKernel
,
elementsArgs
,
data
.
getSize
(),
128
);
// Compute the position of each bucket.
// Compute the position of each bucket.
void
*
computeArgs
[]
=
{
&
numBuckets
,
&
bucketOffset
->
getDevicePointer
()};
void
*
computeArgs
[]
=
{
&
numBuckets
,
&
bucketOffset
.
getDevicePointer
()};
context
.
executeKernel
(
computeBucketPositionsKernel
,
computeArgs
,
positionsKernelSize
,
positionsKernelSize
,
positionsKernelSize
*
sizeof
(
int
));
context
.
executeKernel
(
computeBucketPositionsKernel
,
computeArgs
,
positionsKernelSize
,
positionsKernelSize
,
positionsKernelSize
*
sizeof
(
int
));
// Copy the data into the buckets.
// Copy the data into the buckets.
void
*
copyArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
buckets
->
getDevicePointer
(),
&
dataLength
,
&
bucketOffset
->
getDevicePointer
(),
void
*
copyArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
buckets
.
getDevicePointer
(),
&
dataLength
,
&
bucketOffset
.
getDevicePointer
(),
&
bucketOfElement
->
getDevicePointer
(),
&
offsetInBucket
->
getDevicePointer
()};
&
bucketOfElement
.
getDevicePointer
(),
&
offsetInBucket
.
getDevicePointer
()};
context
.
executeKernel
(
copyToBucketsKernel
,
copyArgs
,
data
.
getSize
());
context
.
executeKernel
(
copyToBucketsKernel
,
copyArgs
,
data
.
getSize
());
// Sort each bucket.
// Sort each bucket.
void
*
sortArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
buckets
->
getDevicePointer
(),
&
numBuckets
,
&
bucketOffset
->
getDevicePointer
()};
void
*
sortArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
buckets
.
getDevicePointer
(),
&
numBuckets
,
&
bucketOffset
.
getDevicePointer
()};
context
.
executeKernel
(
sortBucketsKernel
,
sortArgs
,
((
data
.
getSize
()
+
sortKernelSize
-
1
)
/
sortKernelSize
)
*
sortKernelSize
,
sortKernelSize
,
sortKernelSize
*
trait
->
getDataSize
());
context
.
executeKernel
(
sortBucketsKernel
,
sortArgs
,
((
data
.
getSize
()
+
sortKernelSize
-
1
)
/
sortKernelSize
)
*
sortKernelSize
,
sortKernelSize
,
sortKernelSize
*
trait
->
getDataSize
());
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment