Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b33ee3b0
Commit
b33ee3b0
authored
Feb 08, 2018
by
peastman
Browse files
More conversion of OpenCLArrays
parent
d59b0373
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
675 additions
and
964 deletions
+675
-964
platforms/opencl/include/OpenCLBondedUtilities.h
platforms/opencl/include/OpenCLBondedUtilities.h
+3
-4
platforms/opencl/include/OpenCLCompact.h
platforms/opencl/include/OpenCLCompact.h
+1
-2
platforms/opencl/include/OpenCLIntegrationUtilities.h
platforms/opencl/include/OpenCLIntegrationUtilities.h
+34
-35
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+50
-63
platforms/opencl/include/OpenCLNonbondedUtilities.h
platforms/opencl/include/OpenCLNonbondedUtilities.h
+24
-24
platforms/opencl/include/OpenCLParallelKernels.h
platforms/opencl/include/OpenCLParallelKernels.h
+1
-1
platforms/opencl/include/OpenCLSort.h
platforms/opencl/include/OpenCLSort.h
+6
-6
platforms/opencl/src/OpenCLBondedUtilities.cpp
platforms/opencl/src/OpenCLBondedUtilities.cpp
+8
-16
platforms/opencl/src/OpenCLCompact.cpp
platforms/opencl/src/OpenCLCompact.cpp
+4
-9
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+185
-257
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+188
-299
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+68
-102
platforms/opencl/src/OpenCLParallelKernels.cpp
platforms/opencl/src/OpenCLParallelKernels.cpp
+5
-7
platforms/opencl/src/OpenCLSort.cpp
platforms/opencl/src/OpenCLSort.cpp
+20
-31
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.cpp
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.cpp
+20
-34
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
+7
-9
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
+44
-57
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
+7
-8
No files found.
platforms/opencl/include/OpenCLBondedUtilities.h
View file @
b33ee3b0
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -81,7 +81,6 @@ namespace OpenMM {
class
OPENMM_EXPORT_OPENCL
OpenCLBondedUtilities
{
public:
OpenCLBondedUtilities
(
OpenCLContext
&
context
);
~
OpenCLBondedUtilities
();
/**
* Add a bonded interaction.
*
...
...
@@ -143,8 +142,8 @@ private:
std
::
vector
<
std
::
vector
<
int
>
>
forceSets
;
std
::
vector
<
cl
::
Memory
*>
arguments
;
std
::
vector
<
std
::
string
>
argTypes
;
std
::
vector
<
OpenCLArray
*
>
atomIndices
;
std
::
vector
<
OpenCLArray
*
>
bufferIndices
;
std
::
vector
<
OpenCLArray
>
atomIndices
;
std
::
vector
<
OpenCLArray
>
bufferIndices
;
std
::
vector
<
std
::
string
>
prefixCode
;
std
::
vector
<
std
::
string
>
energyParameterDerivatives
;
int
numForceBuffers
,
maxBonds
,
allGroups
;
...
...
platforms/opencl/include/OpenCLCompact.h
View file @
b33ee3b0
...
...
@@ -33,11 +33,10 @@ namespace OpenMM {
class
OPENMM_EXPORT_OPENCL
OpenCLCompact
{
public:
OpenCLCompact
(
OpenCLContext
&
context
);
~
OpenCLCompact
();
void
compactStream
(
OpenCLArray
&
dOut
,
OpenCLArray
&
dIn
,
OpenCLArray
&
dValid
,
OpenCLArray
&
numValid
);
private:
OpenCLContext
&
context
;
OpenCLArray
*
dgBlockCounts
;
OpenCLArray
dgBlockCounts
;
cl
::
Kernel
countKernel
;
cl
::
Kernel
moveValidKernel
;
};
...
...
platforms/opencl/include/OpenCLIntegrationUtilities.h
View file @
b33ee3b0
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -42,25 +42,24 @@ namespace OpenMM {
class
OPENMM_EXPORT_OPENCL
OpenCLIntegrationUtilities
{
public:
OpenCLIntegrationUtilities
(
OpenCLContext
&
context
,
const
System
&
system
);
~
OpenCLIntegrationUtilities
();
/**
* Get the array which contains position deltas.
*/
OpenCLArray
&
getPosDelta
()
{
return
*
posDelta
;
return
posDelta
;
}
/**
* Get the array which contains random values. Each element is a float4, whose components
* are independent, normally distributed random numbers with mean 0 and variance 1.
*/
OpenCLArray
&
getRandom
()
{
return
*
random
;
return
random
;
}
/**
* Get the array which contains the current step size.
*/
OpenCLArray
&
getStepSize
()
{
return
*
stepSize
;
return
stepSize
;
}
/**
* Set the size to use for the next step.
...
...
@@ -131,36 +130,36 @@ private:
cl
::
Kernel
ccmaPosUpdateKernel
,
ccmaVelUpdateKernel
;
cl
::
Kernel
vsitePositionKernel
,
vsiteForceKernel
,
vsiteAddForcesKernel
;
cl
::
Kernel
randomKernel
,
timeShiftKernel
;
OpenCLArray
*
posDelta
;
OpenCLArray
*
settleAtoms
;
OpenCLArray
*
settleParams
;
OpenCLArray
*
shakeAtoms
;
OpenCLArray
*
shakeParams
;
OpenCLArray
*
random
;
OpenCLArray
*
randomSeed
;
OpenCLArray
*
stepSize
;
OpenCLArray
*
ccmaAtoms
;
OpenCLArray
*
ccmaDistance
;
OpenCLArray
*
ccmaReducedMass
;
OpenCLArray
*
ccmaAtomConstraints
;
OpenCLArray
*
ccmaNumAtomConstraints
;
OpenCLArray
*
ccmaConstraintMatrixColumn
;
OpenCLArray
*
ccmaConstraintMatrixValue
;
OpenCLArray
*
ccmaDelta1
;
OpenCLArray
*
ccmaDelta2
;
OpenCLArray
*
ccmaConverged
;
OpenCLArray
*
ccmaConvergedHostBuffer
;
OpenCLArray
*
vsite2AvgAtoms
;
OpenCLArray
*
vsite2AvgWeights
;
OpenCLArray
*
vsite3AvgAtoms
;
OpenCLArray
*
vsite3AvgWeights
;
OpenCLArray
*
vsiteOutOfPlaneAtoms
;
OpenCLArray
*
vsiteOutOfPlaneWeights
;
OpenCLArray
*
vsiteLocalCoordsIndex
;
OpenCLArray
*
vsiteLocalCoordsAtoms
;
OpenCLArray
*
vsiteLocalCoordsWeights
;
OpenCLArray
*
vsiteLocalCoordsPos
;
OpenCLArray
*
vsiteLocalCoordsStartIndex
;
OpenCLArray
posDelta
;
OpenCLArray
settleAtoms
;
OpenCLArray
settleParams
;
OpenCLArray
shakeAtoms
;
OpenCLArray
shakeParams
;
OpenCLArray
random
;
OpenCLArray
randomSeed
;
OpenCLArray
stepSize
;
OpenCLArray
ccmaAtoms
;
OpenCLArray
ccmaDistance
;
OpenCLArray
ccmaReducedMass
;
OpenCLArray
ccmaAtomConstraints
;
OpenCLArray
ccmaNumAtomConstraints
;
OpenCLArray
ccmaConstraintMatrixColumn
;
OpenCLArray
ccmaConstraintMatrixValue
;
OpenCLArray
ccmaDelta1
;
OpenCLArray
ccmaDelta2
;
OpenCLArray
ccmaConverged
;
OpenCLArray
ccmaConvergedHostBuffer
;
OpenCLArray
vsite2AvgAtoms
;
OpenCLArray
vsite2AvgWeights
;
OpenCLArray
vsite3AvgAtoms
;
OpenCLArray
vsite3AvgWeights
;
OpenCLArray
vsiteOutOfPlaneAtoms
;
OpenCLArray
vsiteOutOfPlaneWeights
;
OpenCLArray
vsiteLocalCoordsIndex
;
OpenCLArray
vsiteLocalCoordsAtoms
;
OpenCLArray
vsiteLocalCoordsWeights
;
OpenCLArray
vsiteLocalCoordsPos
;
OpenCLArray
vsiteLocalCoordsStartIndex
;
int
randomPos
;
int
lastSeed
,
numVsites
;
bool
hasInitializedPosConstraintKernels
,
hasInitializedVelConstraintKernels
,
ccmaUseDirectBuffer
,
hasOverlappingVsites
;
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
b33ee3b0
...
...
@@ -1132,12 +1132,8 @@ private:
class
OpenCLCalcGayBerneForceKernel
:
public
CalcGayBerneForceKernel
{
public:
OpenCLCalcGayBerneForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcGayBerneForceKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
),
sortedParticles
(
NULL
),
axisParticleIndices
(
NULL
),
sigParams
(
NULL
),
epsParams
(
NULL
),
scale
(
NULL
),
exceptionParticles
(
NULL
),
exceptionParams
(
NULL
),
aMatrix
(
NULL
),
bMatrix
(
NULL
),
gMatrix
(
NULL
),
exclusions
(
NULL
),
exclusionStartIndex
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
neighbors
(
NULL
),
neighborIndex
(
NULL
),
neighborBlockCount
(
NULL
),
sortedPos
(
NULL
),
torque
(
NULL
)
{
hasInitializedKernels
(
false
)
{
}
~
OpenCLCalcGayBerneForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -1169,25 +1165,25 @@ private:
bool
hasInitializedKernels
;
int
numRealParticles
,
maxNeighborBlocks
;
GayBerneForce
::
NonbondedMethod
nonbondedMethod
;
OpenCLArray
*
sortedParticles
;
OpenCLArray
*
axisParticleIndices
;
OpenCLArray
*
sigParams
;
OpenCLArray
*
epsParams
;
OpenCLArray
*
scale
;
OpenCLArray
*
exceptionParticles
;
OpenCLArray
*
exceptionParams
;
OpenCLArray
*
aMatrix
;
OpenCLArray
*
bMatrix
;
OpenCLArray
*
gMatrix
;
OpenCLArray
*
exclusions
;
OpenCLArray
*
exclusionStartIndex
;
OpenCLArray
*
blockCenter
;
OpenCLArray
*
blockBoundingBox
;
OpenCLArray
*
neighbors
;
OpenCLArray
*
neighborIndex
;
OpenCLArray
*
neighborBlockCount
;
OpenCLArray
*
sortedPos
;
OpenCLArray
*
torque
;
OpenCLArray
sortedParticles
;
OpenCLArray
axisParticleIndices
;
OpenCLArray
sigParams
;
OpenCLArray
epsParams
;
OpenCLArray
scale
;
OpenCLArray
exceptionParticles
;
OpenCLArray
exceptionParams
;
OpenCLArray
aMatrix
;
OpenCLArray
bMatrix
;
OpenCLArray
gMatrix
;
OpenCLArray
exclusions
;
OpenCLArray
exclusionStartIndex
;
OpenCLArray
blockCenter
;
OpenCLArray
blockBoundingBox
;
OpenCLArray
neighbors
;
OpenCLArray
neighborIndex
;
OpenCLArray
neighborBlockCount
;
OpenCLArray
sortedPos
;
OpenCLArray
torque
;
std
::
vector
<
bool
>
isRealParticle
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
excludedPairs
;
...
...
@@ -1200,9 +1196,8 @@ private:
class
OpenCLCalcCustomCVForceKernel
:
public
CalcCustomCVForceKernel
{
public:
OpenCLCalcCustomCVForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcCustomCVForceKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
)
,
invAtomOrder
(
NULL
),
innerInvAtomOrder
(
NULL
)
{
cl
(
cl
),
hasInitializedKernels
(
false
)
{
}
~
OpenCLCalcCustomCVForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -1236,9 +1231,9 @@ private:
std
::
vector
<
std
::
string
>
variableNames
,
paramDerivNames
,
globalParameterNames
;
std
::
vector
<
Lepton
::
ExpressionProgram
>
variableDerivExpressions
;
std
::
vector
<
Lepton
::
ExpressionProgram
>
paramDerivExpressions
;
std
::
vector
<
OpenCLArray
*
>
cvForces
;
OpenCLArray
*
invAtomOrder
;
OpenCLArray
*
innerInvAtomOrder
;
std
::
vector
<
OpenCLArray
>
cvForces
;
OpenCLArray
invAtomOrder
;
OpenCLArray
innerInvAtomOrder
;
cl
::
Kernel
copyStateKernel
,
copyForcesKernel
,
addForcesKernel
;
};
...
...
@@ -1247,10 +1242,8 @@ private:
*/
class
OpenCLCalcRMSDForceKernel
:
public
CalcRMSDForceKernel
{
public:
OpenCLCalcRMSDForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcRMSDForceKernel
(
name
,
platform
),
cl
(
cl
),
referencePos
(
NULL
),
particles
(
NULL
),
buffer
(
NULL
)
{
OpenCLCalcRMSDForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcRMSDForceKernel
(
name
,
platform
),
cl
(
cl
)
{
}
~
OpenCLCalcRMSDForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -1289,9 +1282,9 @@ private:
OpenCLContext
&
cl
;
ForceInfo
*
info
;
double
sumNormRef
;
OpenCLArray
*
referencePos
;
OpenCLArray
*
particles
;
OpenCLArray
*
buffer
;
OpenCLArray
referencePos
;
OpenCLArray
particles
;
OpenCLArray
buffer
;
cl
::
Kernel
kernel1
,
kernel2
;
};
...
...
@@ -1337,9 +1330,8 @@ private:
class
OpenCLIntegrateLangevinStepKernel
:
public
IntegrateLangevinStepKernel
{
public:
OpenCLIntegrateLangevinStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
IntegrateLangevinStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
)
,
params
(
NULL
)
{
hasInitializedKernels
(
false
)
{
}
~
OpenCLIntegrateLangevinStepKernel
();
/**
* Initialize the kernel, setting up the particle masses.
*
...
...
@@ -1365,7 +1357,7 @@ private:
OpenCLContext
&
cl
;
double
prevTemp
,
prevFriction
,
prevStepSize
;
bool
hasInitializedKernels
;
OpenCLArray
*
params
;
OpenCLArray
params
;
cl
::
Kernel
kernel1
,
kernel2
;
};
...
...
@@ -1451,9 +1443,8 @@ private:
class
OpenCLIntegrateVariableLangevinStepKernel
:
public
IntegrateVariableLangevinStepKernel
{
public:
OpenCLIntegrateVariableLangevinStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
IntegrateVariableLangevinStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
)
,
params
(
NULL
)
{
hasInitializedKernels
(
false
)
{
}
~
OpenCLIntegrateVariableLangevinStepKernel
();
/**
* Initialize the kernel, setting up the particle masses.
*
...
...
@@ -1481,7 +1472,7 @@ private:
OpenCLContext
&
cl
;
bool
hasInitializedKernels
;
int
blockSize
;
OpenCLArray
*
params
;
OpenCLArray
params
;
cl
::
Kernel
kernel1
,
kernel2
,
selectSizeKernel
;
double
prevTemp
,
prevFriction
,
prevErrorTol
;
};
...
...
@@ -1493,8 +1484,7 @@ class OpenCLIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
public:
enum
GlobalTargetType
{
DT
,
VARIABLE
,
PARAMETER
};
OpenCLIntegrateCustomStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
IntegrateCustomStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
),
localValuesAreCurrent
(
false
),
globalValues
(
NULL
),
sumBuffer
(
NULL
),
summedValue
(
NULL
),
uniformRandoms
(
NULL
),
randomSeed
(
NULL
),
perDofEnergyParamDerivs
(
NULL
),
perDofValues
(
NULL
),
needsEnergyParamDerivs
(
false
)
{
hasInitializedKernels
(
false
),
localValuesAreCurrent
(
false
),
perDofValues
(
NULL
),
needsEnergyParamDerivs
(
false
)
{
}
~
OpenCLIntegrateCustomStepKernel
();
/**
...
...
@@ -1575,15 +1565,15 @@ private:
int
numGlobalVariables
,
sumWorkGroupSize
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
,
hasAnyConstraints
,
needsEnergyParamDerivs
;
mutable
bool
localValuesAreCurrent
;
OpenCLArray
*
globalValues
;
OpenCLArray
*
sumBuffer
;
OpenCLArray
*
summedValue
;
OpenCLArray
*
uniformRandoms
;
OpenCLArray
*
randomSeed
;
OpenCLArray
*
perDofEnergyParamDerivs
;
std
::
vector
<
OpenCLArray
*
>
tabulatedFunctions
;
OpenCLArray
globalValues
;
OpenCLArray
sumBuffer
;
OpenCLArray
summedValue
;
OpenCLArray
uniformRandoms
;
OpenCLArray
randomSeed
;
OpenCLArray
perDofEnergyParamDerivs
;
std
::
vector
<
OpenCLArray
>
tabulatedFunctions
;
std
::
map
<
int
,
double
>
savedEnergy
;
std
::
map
<
int
,
OpenCLArray
*
>
savedForces
;
std
::
map
<
int
,
OpenCLArray
>
savedForces
;
std
::
set
<
int
>
validSavedForces
;
OpenCLParameterSet
*
perDofValues
;
mutable
std
::
vector
<
std
::
vector
<
cl_float
>
>
localPerDofValuesFloat
;
...
...
@@ -1635,9 +1625,8 @@ public:
class
OpenCLApplyAndersenThermostatKernel
:
public
ApplyAndersenThermostatKernel
{
public:
OpenCLApplyAndersenThermostatKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
ApplyAndersenThermostatKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
)
,
atomGroups
(
NULL
)
{
hasInitializedKernels
(
false
)
{
}
~
OpenCLApplyAndersenThermostatKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -1655,7 +1644,7 @@ private:
OpenCLContext
&
cl
;
bool
hasInitializedKernels
;
int
randomSeed
;
OpenCLArray
*
atomGroups
;
OpenCLArray
atomGroups
;
cl
::
Kernel
kernel
;
};
...
...
@@ -1665,9 +1654,8 @@ private:
class
OpenCLApplyMonteCarloBarostatKernel
:
public
ApplyMonteCarloBarostatKernel
{
public:
OpenCLApplyMonteCarloBarostatKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
ApplyMonteCarloBarostatKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
)
,
savedPositions
(
NULL
),
savedForces
(
NULL
),
moleculeAtoms
(
NULL
),
moleculeStartIndex
(
NULL
)
{
hasInitializedKernels
(
false
)
{
}
~
OpenCLApplyMonteCarloBarostatKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -1699,10 +1687,10 @@ private:
OpenCLContext
&
cl
;
bool
hasInitializedKernels
;
int
numMolecules
;
OpenCLArray
*
savedPositions
;
OpenCLArray
*
savedForces
;
OpenCLArray
*
moleculeAtoms
;
OpenCLArray
*
moleculeStartIndex
;
OpenCLArray
savedPositions
;
OpenCLArray
savedForces
;
OpenCLArray
moleculeAtoms
;
OpenCLArray
moleculeStartIndex
;
cl
::
Kernel
kernel
;
std
::
vector
<
int
>
lastAtomOrder
;
};
...
...
@@ -1712,9 +1700,8 @@ private:
*/
class
OpenCLRemoveCMMotionKernel
:
public
RemoveCMMotionKernel
{
public:
OpenCLRemoveCMMotionKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
RemoveCMMotionKernel
(
name
,
platform
),
cl
(
cl
)
,
cmMomentum
(
NULL
)
{
OpenCLRemoveCMMotionKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
RemoveCMMotionKernel
(
name
,
platform
),
cl
(
cl
)
{
}
~
OpenCLRemoveCMMotionKernel
();
/**
* Initialize the kernel, setting up the particle masses.
*
...
...
@@ -1731,7 +1718,7 @@ public:
private:
OpenCLContext
&
cl
;
int
frequency
;
OpenCLArray
*
cmMomentum
;
OpenCLArray
cmMomentum
;
cl
::
Kernel
kernel1
,
kernel2
;
};
...
...
platforms/opencl/include/OpenCLNonbondedUtilities.h
View file @
b33ee3b0
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -175,55 +175,55 @@ public:
* Get the array containing the center of each atom block.
*/
OpenCLArray
&
getBlockCenters
()
{
return
*
blockCenter
;
return
blockCenter
;
}
/**
* Get the array containing the dimensions of each atom block.
*/
OpenCLArray
&
getBlockBoundingBoxes
()
{
return
*
blockBoundingBox
;
return
blockBoundingBox
;
}
/**
* Get the array whose first element contains the number of tiles with interactions.
*/
OpenCLArray
&
getInteractionCount
()
{
return
*
interactionCount
;
return
interactionCount
;
}
/**
* Get the array containing tiles with interactions.
*/
OpenCLArray
&
getInteractingTiles
()
{
return
*
interactingTiles
;
return
interactingTiles
;
}
/**
* Get the array containing the atoms in each tile with interactions.
*/
OpenCLArray
&
getInteractingAtoms
()
{
return
*
interactingAtoms
;
return
interactingAtoms
;
}
/**
* Get the array containing exclusion flags.
*/
OpenCLArray
&
getExclusions
()
{
return
*
exclusions
;
return
exclusions
;
}
/**
* Get the array containing tiles with exclusions.
*/
OpenCLArray
&
getExclusionTiles
()
{
return
*
exclusionTiles
;
return
exclusionTiles
;
}
/**
* Get the array containing the index into the exclusion array for each tile.
*/
OpenCLArray
&
getExclusionIndices
()
{
return
*
exclusionIndices
;
return
exclusionIndices
;
}
/**
* Get the array listing where the exclusion data starts for each row.
*/
OpenCLArray
&
getExclusionRowIndices
()
{
return
*
exclusionRowIndices
;
return
exclusionRowIndices
;
}
/**
* Get the index of the first tile this context is responsible for processing.
...
...
@@ -275,20 +275,20 @@ private:
class
BlockSortTrait
;
OpenCLContext
&
context
;
std
::
map
<
int
,
KernelSet
>
groupKernels
;
OpenCLArray
*
exclusionTiles
;
OpenCLArray
*
exclusions
;
OpenCLArray
*
exclusionIndices
;
OpenCLArray
*
exclusionRowIndices
;
OpenCLArray
*
interactingTiles
;
OpenCLArray
*
interactingAtoms
;
OpenCLArray
*
interactionCount
;
OpenCLArray
*
blockCenter
;
OpenCLArray
*
blockBoundingBox
;
OpenCLArray
*
sortedBlocks
;
OpenCLArray
*
sortedBlockCenter
;
OpenCLArray
*
sortedBlockBoundingBox
;
OpenCLArray
*
oldPositions
;
OpenCLArray
*
rebuildNeighborList
;
OpenCLArray
exclusionTiles
;
OpenCLArray
exclusions
;
OpenCLArray
exclusionIndices
;
OpenCLArray
exclusionRowIndices
;
OpenCLArray
interactingTiles
;
OpenCLArray
interactingAtoms
;
OpenCLArray
interactionCount
;
OpenCLArray
blockCenter
;
OpenCLArray
blockBoundingBox
;
OpenCLArray
sortedBlocks
;
OpenCLArray
sortedBlockCenter
;
OpenCLArray
sortedBlockBoundingBox
;
OpenCLArray
oldPositions
;
OpenCLArray
rebuildNeighborList
;
OpenCLSort
*
blockSorter
;
cl
::
Event
downloadCountEvent
;
cl
::
Buffer
*
pinnedCountBuffer
;
...
...
platforms/opencl/include/OpenCLParallelKernels.h
View file @
b33ee3b0
...
...
@@ -84,7 +84,7 @@ private:
std
::
vector
<
long
long
>
completionTimes
;
std
::
vector
<
double
>
contextNonbondedFractions
;
std
::
vector
<
int
>
tileCounts
;
OpenCLArray
*
contextForces
;
OpenCLArray
contextForces
;
cl
::
Buffer
*
pinnedPositionBuffer
;
cl
::
Buffer
*
pinnedForceBuffer
;
void
*
pinnedPositionMemory
;
...
...
platforms/opencl/include/OpenCLSort.h
View file @
b33ee3b0
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2010-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -87,11 +87,11 @@ public:
private:
OpenCLContext
&
context
;
SortTrait
*
trait
;
OpenCLArray
*
dataRange
;
OpenCLArray
*
bucketOfElement
;
OpenCLArray
*
offsetInBucket
;
OpenCLArray
*
bucketOffset
;
OpenCLArray
*
buckets
;
OpenCLArray
dataRange
;
OpenCLArray
bucketOfElement
;
OpenCLArray
offsetInBucket
;
OpenCLArray
bucketOffset
;
OpenCLArray
buckets
;
cl
::
Kernel
shortListKernel
,
computeRangeKernel
,
assignElementsKernel
,
computeBucketPositionsKernel
,
copyToBucketsKernel
,
sortBucketsKernel
;
unsigned
int
dataLength
,
rangeKernelSize
,
positionsKernelSize
,
sortKernelSize
;
bool
isShortList
;
...
...
platforms/opencl/src/OpenCLBondedUtilities.cpp
View file @
b33ee3b0
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -36,13 +36,6 @@ using namespace std;
OpenCLBondedUtilities
::
OpenCLBondedUtilities
(
OpenCLContext
&
context
)
:
context
(
context
),
numForceBuffers
(
0
),
maxBonds
(
0
),
allGroups
(
0
),
hasInitializedKernels
(
false
)
{
}
OpenCLBondedUtilities
::~
OpenCLBondedUtilities
()
{
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
.
size
();
i
++
)
delete
atomIndices
[
i
];
for
(
int
i
=
0
;
i
<
(
int
)
bufferIndices
.
size
();
i
++
)
delete
bufferIndices
[
i
];
}
void
OpenCLBondedUtilities
::
addInteraction
(
const
vector
<
vector
<
int
>
>&
atoms
,
const
string
&
source
,
int
group
)
{
if
(
atoms
.
size
()
>
0
)
{
forceAtoms
.
push_back
(
atoms
);
...
...
@@ -92,6 +85,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
vector
<
vector
<
cl_uint
>
>
bufferVec
(
numForces
);
vector
<
vector
<
int
>
>
bufferCounter
(
numForces
,
vector
<
int
>
(
system
.
getNumParticles
(),
0
));
vector
<
int
>
numBuffers
(
numForces
,
0
);
atomIndices
.
resize
(
numForces
);
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
{
int
numBonds
=
forceAtoms
[
i
].
size
();
int
numAtoms
=
forceAtoms
[
i
][
0
].
size
();
...
...
@@ -101,9 +95,8 @@ void OpenCLBondedUtilities::initialize(const System& system) {
for
(
int
atom
=
0
;
atom
<
numAtoms
;
atom
++
)
indexVec
[
bond
*
width
+
atom
]
=
forceAtoms
[
i
][
bond
][
atom
];
}
OpenCLArray
*
indices
=
OpenCLArray
::
create
<
cl_uint
>
(
context
,
indexVec
.
size
(),
"bondedIndices"
);
indices
->
upload
(
indexVec
);
atomIndices
.
push_back
(
indices
);
atomIndices
[
i
].
initialize
<
cl_uint
>
(
context
,
indexVec
.
size
(),
"bondedIndices"
);
atomIndices
[
i
].
upload
(
indexVec
);
bufferVec
[
i
].
resize
(
width
*
numBonds
,
0
);
for
(
int
bond
=
0
;
bond
<
numBonds
;
bond
++
)
{
for
(
int
atom
=
0
;
atom
<
numAtoms
;
atom
++
)
...
...
@@ -177,9 +170,8 @@ void OpenCLBondedUtilities::initialize(const System& system) {
for
(
int
bond
=
0
;
bond
<
numBonds
;
bond
++
)
for
(
int
atom
=
0
;
atom
<
numAtoms
;
atom
++
)
bufferVec
[
force
][
bond
*
width
+
atom
]
+=
bufferCounter
[
forceSets
[
i
][
k
]][
forceAtoms
[
force
][
bond
][
atom
]];
OpenCLArray
*
buffers
=
OpenCLArray
::
create
<
cl_uint
>
(
context
,
bufferVec
[
force
].
size
(),
"bondedBufferIndices"
);
buffers
->
upload
(
bufferVec
[
force
]);
bufferIndices
[
force
]
=
buffers
;
bufferIndices
[
force
].
initialize
<
cl_uint
>
(
context
,
bufferVec
[
force
].
size
(),
"bondedBufferIndices"
);
bufferIndices
[
force
].
upload
(
bufferVec
[
force
]);
}
// Create the kernels.
...
...
@@ -291,8 +283,8 @@ void OpenCLBondedUtilities::computeInteractions(int groups) {
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getPosq
().
getDeviceBuffer
());
index
+=
6
;
for
(
int
j
=
0
;
j
<
(
int
)
forceSets
[
i
].
size
();
j
++
)
{
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
atomIndices
[
forceSets
[
i
][
j
]]
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
bufferIndices
[
forceSets
[
i
][
j
]]
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
atomIndices
[
forceSets
[
i
][
j
]]
.
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
bufferIndices
[
forceSets
[
i
][
j
]]
.
getDeviceBuffer
());
}
for
(
int
j
=
0
;
j
<
(
int
)
arguments
.
size
();
j
++
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
*
arguments
[
j
]);
...
...
platforms/opencl/src/OpenCLCompact.cpp
View file @
b33ee3b0
...
...
@@ -29,18 +29,13 @@
using
namespace
OpenMM
;
OpenCLCompact
::
OpenCLCompact
(
OpenCLContext
&
context
)
:
context
(
context
)
,
dgBlockCounts
(
NULL
)
{
dgBlockCounts
=
OpenCLArray
::
creat
e
<
cl_uint
>
(
context
,
context
.
getNumThreadBlocks
(),
"dgBlockCounts"
);
OpenCLCompact
::
OpenCLCompact
(
OpenCLContext
&
context
)
:
context
(
context
)
{
dgBlockCounts
.
initializ
e
<
cl_uint
>
(
context
,
context
.
getNumThreadBlocks
(),
"dgBlockCounts"
);
cl
::
Program
program
=
context
.
createProgram
(
OpenCLKernelSources
::
compact
);
countKernel
=
cl
::
Kernel
(
program
,
"countElts"
);
moveValidKernel
=
cl
::
Kernel
(
program
,
"moveValidElementsStaged"
);
}
OpenCLCompact
::~
OpenCLCompact
()
{
if
(
dgBlockCounts
!=
NULL
)
delete
dgBlockCounts
;
}
void
OpenCLCompact
::
compactStream
(
OpenCLArray
&
dOut
,
OpenCLArray
&
dIn
,
OpenCLArray
&
dValid
,
OpenCLArray
&
numValid
)
{
// Figure out # elements per block
unsigned
int
len
=
dIn
.
getSize
();
...
...
@@ -51,7 +46,7 @@ void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArr
// TODO: implement loop over blocks of 10M
// Phase 1: Calculate number of valid elements per thread block
countKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
dgBlockCounts
->
getDeviceBuffer
());
countKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
dgBlockCounts
.
getDeviceBuffer
());
countKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
dValid
.
getDeviceBuffer
());
countKernel
.
setArg
<
cl_uint
>
(
2
,
len
);
countKernel
.
setArg
(
3
,
128
*
sizeof
(
cl_uint
),
NULL
);
...
...
@@ -61,7 +56,7 @@ void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArr
moveValidKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
dIn
.
getDeviceBuffer
());
moveValidKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
dOut
.
getDeviceBuffer
());
moveValidKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
dValid
.
getDeviceBuffer
());
moveValidKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
dgBlockCounts
->
getDeviceBuffer
());
moveValidKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
dgBlockCounts
.
getDeviceBuffer
());
moveValidKernel
.
setArg
<
cl_uint
>
(
4
,
len
);
moveValidKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
numValid
.
getDeviceBuffer
());
moveValidKernel
.
setArg
(
6
,
128
*
sizeof
(
cl_uint
),
NULL
);
...
...
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
View file @
b33ee3b0
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -25,7 +25,6 @@
* -------------------------------------------------------------------------- */
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLArray.h"
#include "OpenCLKernelSources.h"
#include "openmm/internal/OSRngSeed.h"
#include "openmm/HarmonicAngleForce.h"
...
...
@@ -97,31 +96,24 @@ static void setPosqCorrectionArg(OpenCLContext& cl, cl::Kernel& kernel, int inde
}
OpenCLIntegrationUtilities
::
OpenCLIntegrationUtilities
(
OpenCLContext
&
context
,
const
System
&
system
)
:
context
(
context
),
posDelta
(
NULL
),
settleAtoms
(
NULL
),
settleParams
(
NULL
),
shakeAtoms
(
NULL
),
shakeParams
(
NULL
),
random
(
NULL
),
randomSeed
(
NULL
),
randomPos
(
0
),
stepSize
(
NULL
),
ccmaAtoms
(
NULL
),
ccmaDistance
(
NULL
),
ccmaReducedMass
(
NULL
),
ccmaAtomConstraints
(
NULL
),
ccmaNumAtomConstraints
(
NULL
),
ccmaConstraintMatrixColumn
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConvergedHostBuffer
(
NULL
),
vsite2AvgAtoms
(
NULL
),
vsite2AvgWeights
(
NULL
),
vsite3AvgAtoms
(
NULL
),
vsite3AvgWeights
(
NULL
),
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
),
vsiteLocalCoordsIndex
(
NULL
),
vsiteLocalCoordsAtoms
(
NULL
),
vsiteLocalCoordsWeights
(
NULL
),
vsiteLocalCoordsPos
(
NULL
),
vsiteLocalCoordsStartIndex
(
NULL
),
hasInitializedPosConstraintKernels
(
false
),
hasInitializedVelConstraintKernels
(
false
),
hasOverlappingVsites
(
false
)
{
randomPos
(
0
),
hasInitializedPosConstraintKernels
(
false
),
hasInitializedVelConstraintKernels
(
false
),
hasOverlappingVsites
(
false
)
{
// Create workspace arrays.
lastStepSize
=
mm_double2
(
0.0
,
0.0
);
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
{
posDelta
=
OpenCLArray
::
creat
e
<
mm_double4
>
(
context
,
context
.
getPaddedNumAtoms
(),
"posDelta"
);
vector
<
mm_double4
>
deltas
(
posDelta
->
getSize
(),
mm_double4
(
0.0
,
0.0
,
0.0
,
0.0
));
posDelta
->
upload
(
deltas
);
stepSize
=
OpenCLArray
::
creat
e
<
mm_double2
>
(
context
,
1
,
"stepSize"
);
stepSize
->
upload
(
&
lastStepSize
);
posDelta
.
initializ
e
<
mm_double4
>
(
context
,
context
.
getPaddedNumAtoms
(),
"posDelta"
);
vector
<
mm_double4
>
deltas
(
posDelta
.
getSize
(),
mm_double4
(
0.0
,
0.0
,
0.0
,
0.0
));
posDelta
.
upload
(
deltas
);
stepSize
.
initializ
e
<
mm_double2
>
(
context
,
1
,
"stepSize"
);
stepSize
.
upload
(
&
lastStepSize
);
}
else
{
posDelta
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
context
,
context
.
getPaddedNumAtoms
(),
"posDelta"
);
vector
<
mm_float4
>
deltas
(
posDelta
->
getSize
(),
mm_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
));
posDelta
->
upload
(
deltas
);
stepSize
=
OpenCLArray
::
creat
e
<
mm_float2
>
(
context
,
1
,
"stepSize"
);
posDelta
.
initializ
e
<
mm_float4
>
(
context
,
context
.
getPaddedNumAtoms
(),
"posDelta"
);
vector
<
mm_float4
>
deltas
(
posDelta
.
getSize
(),
mm_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
));
posDelta
.
upload
(
deltas
);
stepSize
.
initializ
e
<
mm_float2
>
(
context
,
1
,
"stepSize"
);
mm_float2
lastStepSizeFloat
=
mm_float2
(
0.0
f
,
0.0
f
);
stepSize
->
upload
(
&
lastStepSizeFloat
);
stepSize
.
upload
(
&
lastStepSizeFloat
);
}
// Create the time shift kernel for calculating kinetic energy.
...
...
@@ -227,10 +219,10 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
isShakeAtom
[
atom3
]
=
true
;
}
if
(
atoms
.
size
()
>
0
)
{
settleAtoms
=
OpenCLArray
::
creat
e
<
mm_int4
>
(
context
,
atoms
.
size
(),
"settleAtoms"
);
settleParams
=
OpenCLArray
::
creat
e
<
mm_float2
>
(
context
,
params
.
size
(),
"settleParams"
);
settleAtoms
->
upload
(
atoms
);
settleParams
->
upload
(
params
);
settleAtoms
.
initializ
e
<
mm_int4
>
(
context
,
atoms
.
size
(),
"settleAtoms"
);
settleParams
.
initializ
e
<
mm_float2
>
(
context
,
params
.
size
(),
"settleParams"
);
settleAtoms
.
upload
(
atoms
);
settleParams
.
upload
(
params
);
}
}
...
...
@@ -310,10 +302,10 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
isShakeAtom
[
cluster
.
peripheralID
[
2
]]
=
true
;
++
index
;
}
shakeAtoms
=
OpenCLArray
::
creat
e
<
mm_int4
>
(
context
,
atoms
.
size
(),
"shakeAtoms"
);
shakeParams
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
context
,
params
.
size
(),
"shakeParams"
);
shakeAtoms
->
upload
(
atoms
);
shakeParams
->
upload
(
params
);
shakeAtoms
.
initializ
e
<
mm_int4
>
(
context
,
atoms
.
size
(),
"shakeAtoms"
);
shakeParams
.
initializ
e
<
mm_float4
>
(
context
,
params
.
size
(),
"shakeParams"
);
shakeAtoms
.
upload
(
atoms
);
shakeParams
.
upload
(
params
);
}
// Find connected constraints for CCMA.
...
...
@@ -390,28 +382,28 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// Record the CCMA data structures.
ccmaAtoms
=
OpenCLArray
::
creat
e
<
mm_int2
>
(
context
,
numCCMA
,
"CcmaAtoms"
);
ccmaAtomConstraints
=
OpenCLArray
::
creat
e
<
cl_int
>
(
context
,
numAtoms
*
maxAtomConstraints
,
"CcmaAtomConstraints"
);
ccmaNumAtomConstraints
=
OpenCLArray
::
creat
e
<
cl_int
>
(
context
,
numAtoms
,
"CcmaAtomConstraintsIndex"
);
ccmaConstraintMatrixColumn
=
OpenCLArray
::
creat
e
<
cl_int
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixColumn"
);
ccmaConverged
=
OpenCLArray
::
creat
e
<
cl_int
>
(
context
,
2
,
"CcmaConverged"
);
ccmaConvergedHostBuffer
=
OpenCLArray
::
creat
e
<
cl_int
>
(
context
,
1
,
"CcmaConvergedHostBuffer"
,
CL_MEM_WRITE_ONLY
|
CL_MEM_ALLOC_HOST_PTR
);
ccmaAtoms
.
initializ
e
<
mm_int2
>
(
context
,
numCCMA
,
"CcmaAtoms"
);
ccmaAtomConstraints
.
initializ
e
<
cl_int
>
(
context
,
numAtoms
*
maxAtomConstraints
,
"CcmaAtomConstraints"
);
ccmaNumAtomConstraints
.
initializ
e
<
cl_int
>
(
context
,
numAtoms
,
"CcmaAtomConstraintsIndex"
);
ccmaConstraintMatrixColumn
.
initializ
e
<
cl_int
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixColumn"
);
ccmaConverged
.
initializ
e
<
cl_int
>
(
context
,
2
,
"CcmaConverged"
);
ccmaConvergedHostBuffer
.
initializ
e
<
cl_int
>
(
context
,
1
,
"CcmaConvergedHostBuffer"
,
CL_MEM_WRITE_ONLY
|
CL_MEM_ALLOC_HOST_PTR
);
// Different communication mechanisms give optimal performance on AMD and on NVIDIA.
string
vendor
=
context
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
ccmaUseDirectBuffer
=
(
vendor
.
size
()
>=
28
&&
vendor
.
substr
(
0
,
28
)
==
"Advanced Micro Devices, Inc."
);
vector
<
mm_int2
>
atomsVec
(
ccmaAtoms
->
getSize
());
vector
<
cl_int
>
atomConstraintsVec
(
ccmaAtomConstraints
->
getSize
());
vector
<
cl_int
>
numAtomConstraintsVec
(
ccmaNumAtomConstraints
->
getSize
());
vector
<
cl_int
>
constraintMatrixColumnVec
(
ccmaConstraintMatrixColumn
->
getSize
());
vector
<
mm_int2
>
atomsVec
(
ccmaAtoms
.
getSize
());
vector
<
cl_int
>
atomConstraintsVec
(
ccmaAtomConstraints
.
getSize
());
vector
<
cl_int
>
numAtomConstraintsVec
(
ccmaNumAtomConstraints
.
getSize
());
vector
<
cl_int
>
constraintMatrixColumnVec
(
ccmaConstraintMatrixColumn
.
getSize
());
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
{
ccmaDistance
=
OpenCLArray
::
creat
e
<
mm_double4
>
(
context
,
numCCMA
,
"CcmaDistance"
);
ccmaDelta1
=
OpenCLArray
::
creat
e
<
cl_double
>
(
context
,
numCCMA
,
"CcmaDelta1"
);
ccmaDelta2
=
OpenCLArray
::
creat
e
<
cl_double
>
(
context
,
numCCMA
,
"CcmaDelta2"
);
ccmaReducedMass
=
OpenCLArray
::
creat
e
<
cl_double
>
(
context
,
numCCMA
,
"CcmaReducedMass"
);
ccmaConstraintMatrixValue
=
OpenCLArray
::
creat
e
<
cl_double
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixValue"
);
vector
<
mm_double4
>
distanceVec
(
ccmaDistance
->
getSize
());
vector
<
cl_double
>
reducedMassVec
(
ccmaReducedMass
->
getSize
());
vector
<
cl_double
>
constraintMatrixValueVec
(
ccmaConstraintMatrixValue
->
getSize
());
ccmaDistance
.
initializ
e
<
mm_double4
>
(
context
,
numCCMA
,
"CcmaDistance"
);
ccmaDelta1
.
initializ
e
<
cl_double
>
(
context
,
numCCMA
,
"CcmaDelta1"
);
ccmaDelta2
.
initializ
e
<
cl_double
>
(
context
,
numCCMA
,
"CcmaDelta2"
);
ccmaReducedMass
.
initializ
e
<
cl_double
>
(
context
,
numCCMA
,
"CcmaReducedMass"
);
ccmaConstraintMatrixValue
.
initializ
e
<
cl_double
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixValue"
);
vector
<
mm_double4
>
distanceVec
(
ccmaDistance
.
getSize
());
vector
<
cl_double
>
reducedMassVec
(
ccmaReducedMass
.
getSize
());
vector
<
cl_double
>
constraintMatrixValueVec
(
ccmaConstraintMatrixValue
.
getSize
());
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
int
index
=
constraintOrder
[
i
];
int
c
=
ccmaConstraints
[
index
];
...
...
@@ -432,19 +424,19 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
atomConstraintsVec
[
i
+
j
*
numAtoms
]
=
(
forward
?
inverseOrder
[
atomConstraints
[
i
][
j
]]
+
1
:
-
inverseOrder
[
atomConstraints
[
i
][
j
]]
-
1
);
}
}
ccmaDistance
->
upload
(
distanceVec
);
ccmaReducedMass
->
upload
(
reducedMassVec
);
ccmaConstraintMatrixValue
->
upload
(
constraintMatrixValueVec
);
ccmaDistance
.
upload
(
distanceVec
);
ccmaReducedMass
.
upload
(
reducedMassVec
);
ccmaConstraintMatrixValue
.
upload
(
constraintMatrixValueVec
);
}
else
{
ccmaDistance
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
context
,
numCCMA
,
"CcmaDistance"
);
ccmaDelta1
=
OpenCLArray
::
creat
e
<
cl_float
>
(
context
,
numCCMA
,
"CcmaDelta1"
);
ccmaDelta2
=
OpenCLArray
::
creat
e
<
cl_float
>
(
context
,
numCCMA
,
"CcmaDelta2"
);
ccmaReducedMass
=
OpenCLArray
::
creat
e
<
cl_float
>
(
context
,
numCCMA
,
"CcmaReducedMass"
);
ccmaConstraintMatrixValue
=
OpenCLArray
::
creat
e
<
cl_float
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixValue"
);
vector
<
mm_float4
>
distanceVec
(
ccmaDistance
->
getSize
());
vector
<
cl_float
>
reducedMassVec
(
ccmaReducedMass
->
getSize
());
vector
<
cl_float
>
constraintMatrixValueVec
(
ccmaConstraintMatrixValue
->
getSize
());
ccmaDistance
.
initializ
e
<
mm_float4
>
(
context
,
numCCMA
,
"CcmaDistance"
);
ccmaDelta1
.
initializ
e
<
cl_float
>
(
context
,
numCCMA
,
"CcmaDelta1"
);
ccmaDelta2
.
initializ
e
<
cl_float
>
(
context
,
numCCMA
,
"CcmaDelta2"
);
ccmaReducedMass
.
initializ
e
<
cl_float
>
(
context
,
numCCMA
,
"CcmaReducedMass"
);
ccmaConstraintMatrixValue
.
initializ
e
<
cl_float
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixValue"
);
vector
<
mm_float4
>
distanceVec
(
ccmaDistance
.
getSize
());
vector
<
cl_float
>
reducedMassVec
(
ccmaReducedMass
.
getSize
());
vector
<
cl_float
>
constraintMatrixValueVec
(
ccmaConstraintMatrixValue
.
getSize
());
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
int
index
=
constraintOrder
[
i
];
int
c
=
ccmaConstraints
[
index
];
...
...
@@ -465,14 +457,14 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
atomConstraintsVec
[
i
+
j
*
numAtoms
]
=
(
forward
?
inverseOrder
[
atomConstraints
[
i
][
j
]]
+
1
:
-
inverseOrder
[
atomConstraints
[
i
][
j
]]
-
1
);
}
}
ccmaDistance
->
upload
(
distanceVec
);
ccmaReducedMass
->
upload
(
reducedMassVec
);
ccmaConstraintMatrixValue
->
upload
(
constraintMatrixValueVec
);
ccmaDistance
.
upload
(
distanceVec
);
ccmaReducedMass
.
upload
(
reducedMassVec
);
ccmaConstraintMatrixValue
.
upload
(
constraintMatrixValueVec
);
}
ccmaAtoms
->
upload
(
atomsVec
);
ccmaAtomConstraints
->
upload
(
atomConstraintsVec
);
ccmaNumAtomConstraints
->
upload
(
numAtomConstraintsVec
);
ccmaConstraintMatrixColumn
->
upload
(
constraintMatrixColumnVec
);
ccmaAtoms
.
upload
(
atomsVec
);
ccmaAtomConstraints
.
upload
(
atomConstraintsVec
);
ccmaNumAtomConstraints
.
upload
(
numAtomConstraintsVec
);
ccmaConstraintMatrixColumn
.
upload
(
constraintMatrixColumnVec
);
// Create the CCMA kernels.
...
...
@@ -554,73 +546,73 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
int
numOutOfPlane
=
vsiteOutOfPlaneAtomVec
.
size
();
int
numLocalCoords
=
vsiteLocalCoordsPosVec
.
size
();
numVsites
=
num2Avg
+
num3Avg
+
numOutOfPlane
+
numLocalCoords
;
vsite2AvgAtoms
=
OpenCLArray
::
creat
e
<
mm_int4
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgAtoms"
);
vsite3AvgAtoms
=
OpenCLArray
::
creat
e
<
mm_int4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgAtoms"
);
vsiteOutOfPlaneAtoms
=
OpenCLArray
::
creat
e
<
mm_int4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneAtoms"
);
vsiteLocalCoordsIndex
=
OpenCLArray
::
creat
e
<
cl_int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsIndexVec
.
size
()),
"vsiteLocalCoordsIndex"
);
vsiteLocalCoordsAtoms
=
OpenCLArray
::
creat
e
<
cl_int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsAtomVec
.
size
()),
"vsiteLocalCoordsAtoms"
);
vsiteLocalCoordsStartIndex
=
OpenCLArray
::
creat
e
<
cl_int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsStartVec
.
size
()),
"vsiteLocalCoordsStartIndex"
);
vsite2AvgAtoms
.
initializ
e
<
mm_int4
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgAtoms"
);
vsite3AvgAtoms
.
initializ
e
<
mm_int4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgAtoms"
);
vsiteOutOfPlaneAtoms
.
initializ
e
<
mm_int4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneAtoms"
);
vsiteLocalCoordsIndex
.
initializ
e
<
cl_int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsIndexVec
.
size
()),
"vsiteLocalCoordsIndex"
);
vsiteLocalCoordsAtoms
.
initializ
e
<
cl_int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsAtomVec
.
size
()),
"vsiteLocalCoordsAtoms"
);
vsiteLocalCoordsStartIndex
.
initializ
e
<
cl_int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsStartVec
.
size
()),
"vsiteLocalCoordsStartIndex"
);
if
(
num2Avg
>
0
)
vsite2AvgAtoms
->
upload
(
vsite2AvgAtomVec
);
vsite2AvgAtoms
.
upload
(
vsite2AvgAtomVec
);
if
(
num3Avg
>
0
)
vsite3AvgAtoms
->
upload
(
vsite3AvgAtomVec
);
vsite3AvgAtoms
.
upload
(
vsite3AvgAtomVec
);
if
(
numOutOfPlane
>
0
)
vsiteOutOfPlaneAtoms
->
upload
(
vsiteOutOfPlaneAtomVec
);
vsiteOutOfPlaneAtoms
.
upload
(
vsiteOutOfPlaneAtomVec
);
if
(
numLocalCoords
>
0
)
{
vsiteLocalCoordsIndex
->
upload
(
vsiteLocalCoordsIndexVec
);
vsiteLocalCoordsAtoms
->
upload
(
vsiteLocalCoordsAtomVec
);
vsiteLocalCoordsStartIndex
->
upload
(
vsiteLocalCoordsStartVec
);
vsiteLocalCoordsIndex
.
upload
(
vsiteLocalCoordsIndexVec
);
vsiteLocalCoordsAtoms
.
upload
(
vsiteLocalCoordsAtomVec
);
vsiteLocalCoordsStartIndex
.
upload
(
vsiteLocalCoordsStartVec
);
}
if
(
context
.
getUseDoublePrecision
())
{
vsite2AvgWeights
=
OpenCLArray
::
creat
e
<
mm_double2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
=
OpenCLArray
::
creat
e
<
mm_double4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
=
OpenCLArray
::
creat
e
<
mm_double4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsWeights
=
OpenCLArray
::
creat
e
<
cl_double
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
=
OpenCLArray
::
creat
e
<
mm_double4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
vsite2AvgWeights
.
initializ
e
<
mm_double2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
.
initializ
e
<
mm_double4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
.
initializ
e
<
mm_double4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsWeights
.
initializ
e
<
cl_double
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
.
initializ
e
<
mm_double4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
if
(
num2Avg
>
0
)
vsite2AvgWeights
->
upload
(
vsite2AvgWeightVec
);
vsite2AvgWeights
.
upload
(
vsite2AvgWeightVec
);
if
(
num3Avg
>
0
)
vsite3AvgWeights
->
upload
(
vsite3AvgWeightVec
);
vsite3AvgWeights
.
upload
(
vsite3AvgWeightVec
);
if
(
numOutOfPlane
>
0
)
vsiteOutOfPlaneWeights
->
upload
(
vsiteOutOfPlaneWeightVec
);
vsiteOutOfPlaneWeights
.
upload
(
vsiteOutOfPlaneWeightVec
);
if
(
numLocalCoords
>
0
)
{
vsiteLocalCoordsWeights
->
upload
(
vsiteLocalCoordsWeightVec
);
vsiteLocalCoordsPos
->
upload
(
vsiteLocalCoordsPosVec
);
vsiteLocalCoordsWeights
.
upload
(
vsiteLocalCoordsWeightVec
);
vsiteLocalCoordsPos
.
upload
(
vsiteLocalCoordsPosVec
);
}
}
else
{
vsite2AvgWeights
=
OpenCLArray
::
creat
e
<
mm_float2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsWeights
=
OpenCLArray
::
creat
e
<
cl_float
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
vsite2AvgWeights
.
initializ
e
<
mm_float2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
.
initializ
e
<
mm_float4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
.
initializ
e
<
mm_float4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsWeights
.
initializ
e
<
cl_float
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
.
initializ
e
<
mm_float4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
if
(
num2Avg
>
0
)
{
vector
<
mm_float2
>
floatWeights
(
num2Avg
);
for
(
int
i
=
0
;
i
<
num2Avg
;
i
++
)
floatWeights
[
i
]
=
mm_float2
((
float
)
vsite2AvgWeightVec
[
i
].
x
,
(
float
)
vsite2AvgWeightVec
[
i
].
y
);
vsite2AvgWeights
->
upload
(
floatWeights
);
vsite2AvgWeights
.
upload
(
floatWeights
);
}
if
(
num3Avg
>
0
)
{
vector
<
mm_float4
>
floatWeights
(
num3Avg
);
for
(
int
i
=
0
;
i
<
num3Avg
;
i
++
)
floatWeights
[
i
]
=
mm_float4
((
float
)
vsite3AvgWeightVec
[
i
].
x
,
(
float
)
vsite3AvgWeightVec
[
i
].
y
,
(
float
)
vsite3AvgWeightVec
[
i
].
z
,
0.0
f
);
vsite3AvgWeights
->
upload
(
floatWeights
);
vsite3AvgWeights
.
upload
(
floatWeights
);
}
if
(
numOutOfPlane
>
0
)
{
vector
<
mm_float4
>
floatWeights
(
numOutOfPlane
);
for
(
int
i
=
0
;
i
<
numOutOfPlane
;
i
++
)
floatWeights
[
i
]
=
mm_float4
((
float
)
vsiteOutOfPlaneWeightVec
[
i
].
x
,
(
float
)
vsiteOutOfPlaneWeightVec
[
i
].
y
,
(
float
)
vsiteOutOfPlaneWeightVec
[
i
].
z
,
0.0
f
);
vsiteOutOfPlaneWeights
->
upload
(
floatWeights
);
vsiteOutOfPlaneWeights
.
upload
(
floatWeights
);
}
if
(
numLocalCoords
>
0
)
{
vector
<
cl_float
>
floatWeights
(
vsiteLocalCoordsWeightVec
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsWeightVec
.
size
();
i
++
)
floatWeights
[
i
]
=
(
cl_float
)
vsiteLocalCoordsWeightVec
[
i
];
vsiteLocalCoordsWeights
->
upload
(
floatWeights
);
vsiteLocalCoordsWeights
.
upload
(
floatWeights
);
vector
<
mm_float4
>
floatPos
(
vsiteLocalCoordsPosVec
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsPosVec
.
size
();
i
++
)
floatPos
[
i
]
=
mm_float4
((
float
)
vsiteLocalCoordsPosVec
[
i
].
x
,
(
float
)
vsiteLocalCoordsPosVec
[
i
].
y
,
(
float
)
vsiteLocalCoordsPosVec
[
i
].
z
,
0.0
f
);
vsiteLocalCoordsPos
->
upload
(
floatPos
);
vsiteLocalCoordsPos
.
upload
(
floatPos
);
}
}
...
...
@@ -655,17 +647,17 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getPosq
().
getDeviceBuffer
());
if
(
context
.
getUseMixedPrecision
())
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getPosqCorrection
().
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite2AvgAtoms
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite2AvgWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite3AvgAtoms
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite3AvgWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneAtoms
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsIndex
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsAtoms
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsPos
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsStartIndex
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite2AvgAtoms
.
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite2AvgWeights
.
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite3AvgAtoms
.
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite3AvgWeights
.
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneAtoms
.
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneWeights
.
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsIndex
.
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsAtoms
.
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsWeights
.
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsPos
.
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsStartIndex
.
getDeviceBuffer
());
vsiteForceKernel
=
cl
::
Kernel
(
vsiteProgram
,
"distributeForces"
);
index
=
0
;
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getPosq
().
getDeviceBuffer
());
...
...
@@ -674,102 +666,39 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
index
++
;
// Skip argument 2: the force array hasn't been created yet.
if
(
context
.
getUseMixedPrecision
())
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getPosqCorrection
().
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite2AvgAtoms
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite2AvgWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite3AvgAtoms
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite3AvgWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneAtoms
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsIndex
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsAtoms
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsPos
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsStartIndex
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite2AvgAtoms
.
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite2AvgWeights
.
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite3AvgAtoms
.
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite3AvgWeights
.
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneAtoms
.
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneWeights
.
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsIndex
.
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsAtoms
.
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsWeights
.
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsPos
.
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsStartIndex
.
getDeviceBuffer
());
if
(
hasOverlappingVsites
&&
context
.
getSupports64BitGlobalAtomics
())
vsiteAddForcesKernel
=
cl
::
Kernel
(
vsiteProgram
,
"addDistributedForces"
);
}
OpenCLIntegrationUtilities
::~
OpenCLIntegrationUtilities
()
{
if
(
posDelta
!=
NULL
)
delete
posDelta
;
if
(
settleAtoms
!=
NULL
)
delete
settleAtoms
;
if
(
settleParams
!=
NULL
)
delete
settleParams
;
if
(
shakeAtoms
!=
NULL
)
delete
shakeAtoms
;
if
(
shakeParams
!=
NULL
)
delete
shakeParams
;
if
(
random
!=
NULL
)
delete
random
;
if
(
randomSeed
!=
NULL
)
delete
randomSeed
;
if
(
stepSize
!=
NULL
)
delete
stepSize
;
if
(
ccmaAtoms
!=
NULL
)
delete
ccmaAtoms
;
if
(
ccmaDistance
!=
NULL
)
delete
ccmaDistance
;
if
(
ccmaReducedMass
!=
NULL
)
delete
ccmaReducedMass
;
if
(
ccmaAtomConstraints
!=
NULL
)
delete
ccmaAtomConstraints
;
if
(
ccmaNumAtomConstraints
!=
NULL
)
delete
ccmaNumAtomConstraints
;
if
(
ccmaConstraintMatrixColumn
!=
NULL
)
delete
ccmaConstraintMatrixColumn
;
if
(
ccmaConstraintMatrixValue
!=
NULL
)
delete
ccmaConstraintMatrixValue
;
if
(
ccmaDelta1
!=
NULL
)
delete
ccmaDelta1
;
if
(
ccmaDelta2
!=
NULL
)
delete
ccmaDelta2
;
if
(
ccmaConverged
!=
NULL
)
delete
ccmaConverged
;
if
(
ccmaConvergedHostBuffer
!=
NULL
)
delete
ccmaConvergedHostBuffer
;
if
(
vsite2AvgAtoms
!=
NULL
)
delete
vsite2AvgAtoms
;
if
(
vsite2AvgWeights
!=
NULL
)
delete
vsite2AvgWeights
;
if
(
vsite3AvgAtoms
!=
NULL
)
delete
vsite3AvgAtoms
;
if
(
vsite3AvgWeights
!=
NULL
)
delete
vsite3AvgWeights
;
if
(
vsiteOutOfPlaneAtoms
!=
NULL
)
delete
vsiteOutOfPlaneAtoms
;
if
(
vsiteOutOfPlaneWeights
!=
NULL
)
delete
vsiteOutOfPlaneWeights
;
if
(
vsiteLocalCoordsIndex
!=
NULL
)
delete
vsiteLocalCoordsIndex
;
if
(
vsiteLocalCoordsAtoms
!=
NULL
)
delete
vsiteLocalCoordsAtoms
;
if
(
vsiteLocalCoordsWeights
!=
NULL
)
delete
vsiteLocalCoordsWeights
;
if
(
vsiteLocalCoordsPos
!=
NULL
)
delete
vsiteLocalCoordsPos
;
if
(
vsiteLocalCoordsStartIndex
!=
NULL
)
delete
vsiteLocalCoordsStartIndex
;
}
void
OpenCLIntegrationUtilities
::
setNextStepSize
(
double
size
)
{
if
(
size
!=
lastStepSize
.
x
||
size
!=
lastStepSize
.
y
)
{
lastStepSize
=
mm_double2
(
size
,
size
);
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
stepSize
->
upload
(
&
lastStepSize
);
stepSize
.
upload
(
&
lastStepSize
);
else
{
mm_float2
lastStepSizeFloat
=
mm_float2
((
float
)
size
,
(
float
)
size
);
stepSize
->
upload
(
&
lastStepSizeFloat
);
stepSize
.
upload
(
&
lastStepSizeFloat
);
}
}
}
double
OpenCLIntegrationUtilities
::
getLastStepSize
()
{
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
stepSize
->
download
(
&
lastStepSize
);
stepSize
.
download
(
&
lastStepSize
);
else
{
mm_float2
lastStepSizeFloat
;
stepSize
->
download
(
&
lastStepSizeFloat
);
stepSize
.
download
(
&
lastStepSizeFloat
);
lastStepSize
=
mm_double2
(
lastStepSizeFloat
.
x
,
lastStepSizeFloat
.
y
);
}
return
lastStepSize
.
y
;
...
...
@@ -802,102 +731,102 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
ccmaUpdateKernel
=
ccmaPosUpdateKernel
;
hasInitializedPosConstraintKernels
=
true
;
}
if
(
settleAtoms
!=
NULL
)
{
if
(
settleAtoms
.
isInitialized
()
)
{
if
(
!
hasInitialized
)
{
settleKernel
.
setArg
<
cl_int
>
(
0
,
settleAtoms
->
getSize
());
settleKernel
.
setArg
<
cl_int
>
(
0
,
settleAtoms
.
getSize
());
settleKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getPosq
().
getDeviceBuffer
());
if
(
context
.
getUseMixedPrecision
())
settleKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosqCorrection
().
getDeviceBuffer
());
else
settleKernel
.
setArg
<
void
*>
(
3
,
NULL
);
settleKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
posDelta
->
getDeviceBuffer
());
settleKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
posDelta
.
getDeviceBuffer
());
settleKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
context
.
getVelm
().
getDeviceBuffer
());
settleKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
settleAtoms
->
getDeviceBuffer
());
settleKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
settleParams
->
getDeviceBuffer
());
settleKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
settleAtoms
.
getDeviceBuffer
());
settleKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
settleParams
.
getDeviceBuffer
());
}
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
settleKernel
.
setArg
<
cl_double
>
(
1
,
(
cl_double
)
tol
);
else
settleKernel
.
setArg
<
cl_float
>
(
1
,
(
cl_float
)
tol
);
context
.
executeKernel
(
settleKernel
,
settleAtoms
->
getSize
());
context
.
executeKernel
(
settleKernel
,
settleAtoms
.
getSize
());
}
if
(
shakeAtoms
!=
NULL
)
{
if
(
shakeAtoms
.
isInitialized
()
)
{
if
(
!
hasInitialized
)
{
shakeKernel
.
setArg
<
cl_int
>
(
0
,
shakeAtoms
->
getSize
());
shakeKernel
.
setArg
<
cl_int
>
(
0
,
shakeAtoms
.
getSize
());
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getPosq
().
getDeviceBuffer
());
if
(
context
.
getUseMixedPrecision
())
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosqCorrection
().
getDeviceBuffer
());
else
shakeKernel
.
setArg
<
void
*>
(
3
,
NULL
);
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
constrainVelocities
?
context
.
getVelm
().
getDeviceBuffer
()
:
posDelta
->
getDeviceBuffer
());
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
shakeAtoms
->
getDeviceBuffer
());
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
shakeParams
->
getDeviceBuffer
());
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
constrainVelocities
?
context
.
getVelm
().
getDeviceBuffer
()
:
posDelta
.
getDeviceBuffer
());
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
shakeAtoms
.
getDeviceBuffer
());
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
shakeParams
.
getDeviceBuffer
());
}
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
shakeKernel
.
setArg
<
cl_double
>
(
1
,
(
cl_double
)
tol
);
else
shakeKernel
.
setArg
<
cl_float
>
(
1
,
(
cl_float
)
tol
);
context
.
executeKernel
(
shakeKernel
,
shakeAtoms
->
getSize
());
context
.
executeKernel
(
shakeKernel
,
shakeAtoms
.
getSize
());
}
if
(
ccmaAtoms
!=
NULL
)
{
if
(
ccmaAtoms
.
isInitialized
()
)
{
if
(
!
hasInitialized
)
{
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaAtoms
->
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDistance
->
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaAtoms
.
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDistance
.
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getPosq
().
getDeviceBuffer
());
if
(
context
.
getUseMixedPrecision
())
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosqCorrection
().
getDeviceBuffer
());
else
ccmaDirectionsKernel
.
setArg
<
void
*>
(
3
,
NULL
);
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
ccmaConverged
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaAtoms
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDistance
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
constrainVelocities
?
context
.
getVelm
().
getDeviceBuffer
()
:
posDelta
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
ccmaReducedMass
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
ccmaDelta1
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
ccmaConverged
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
ccmaConvergedHostBuffer
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaDelta1
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDelta2
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
ccmaConstraintMatrixColumn
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
ccmaConstraintMatrixValue
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
ccmaConverged
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaNumAtomConstraints
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaAtomConstraints
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
ccmaDistance
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
constrainVelocities
?
context
.
getVelm
().
getDeviceBuffer
()
:
posDelta
->
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
ccmaConverged
.
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaAtoms
.
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDistance
.
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
constrainVelocities
?
context
.
getVelm
().
getDeviceBuffer
()
:
posDelta
.
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
ccmaReducedMass
.
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
ccmaDelta1
.
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
ccmaConverged
.
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
ccmaConvergedHostBuffer
.
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaDelta1
.
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDelta2
.
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
ccmaConstraintMatrixColumn
.
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
ccmaConstraintMatrixValue
.
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
ccmaConverged
.
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaNumAtomConstraints
.
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaAtomConstraints
.
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
ccmaDistance
.
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
constrainVelocities
?
context
.
getVelm
().
getDeviceBuffer
()
:
posDelta
.
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
context
.
getVelm
().
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
ccmaDelta1
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
ccmaDelta2
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
ccmaConverged
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
ccmaDelta1
.
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
ccmaDelta2
.
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
ccmaConverged
.
getDeviceBuffer
());
}
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
ccmaForceKernel
.
setArg
<
cl_double
>
(
7
,
(
cl_double
)
tol
);
else
ccmaForceKernel
.
setArg
<
cl_float
>
(
7
,
(
cl_float
)
tol
);
context
.
executeKernel
(
ccmaDirectionsKernel
,
ccmaAtoms
->
getSize
());
context
.
executeKernel
(
ccmaDirectionsKernel
,
ccmaAtoms
.
getSize
());
const
int
checkInterval
=
4
;
int
*
converged
=
(
int
*
)
context
.
getPinnedBuffer
();
int
*
ccmaConvergedHostMemory
=
(
int
*
)
context
.
getQueue
().
enqueueMapBuffer
(
ccmaConvergedHostBuffer
->
getDeviceBuffer
(),
CL_TRUE
,
CL_MAP_WRITE
,
0
,
sizeof
(
cl_int
));
int
*
ccmaConvergedHostMemory
=
(
int
*
)
context
.
getQueue
().
enqueueMapBuffer
(
ccmaConvergedHostBuffer
.
getDeviceBuffer
(),
CL_TRUE
,
CL_MAP_WRITE
,
0
,
sizeof
(
cl_int
));
ccmaConvergedHostMemory
[
0
]
=
0
;
context
.
getQueue
().
enqueueUnmapMemObject
(
ccmaConvergedHostBuffer
->
getDeviceBuffer
(),
ccmaConvergedHostMemory
);
context
.
getQueue
().
enqueueUnmapMemObject
(
ccmaConvergedHostBuffer
.
getDeviceBuffer
(),
ccmaConvergedHostMemory
);
for
(
int
i
=
0
;
i
<
150
;
i
++
)
{
ccmaForceKernel
.
setArg
<
cl_int
>
(
8
,
i
);
context
.
executeKernel
(
ccmaForceKernel
,
ccmaAtoms
->
getSize
());
context
.
executeKernel
(
ccmaForceKernel
,
ccmaAtoms
.
getSize
());
cl
::
Event
event
;
if
((
i
+
1
)
%
checkInterval
==
0
&&
!
ccmaUseDirectBuffer
)
context
.
getQueue
().
enqueueReadBuffer
(
ccmaConverged
->
getDeviceBuffer
(),
CL_FALSE
,
0
,
2
*
sizeof
(
cl_int
),
converged
,
NULL
,
&
event
);
context
.
getQueue
().
enqueueReadBuffer
(
ccmaConverged
.
getDeviceBuffer
(),
CL_FALSE
,
0
,
2
*
sizeof
(
cl_int
),
converged
,
NULL
,
&
event
);
ccmaMultiplyKernel
.
setArg
<
cl_int
>
(
5
,
i
);
context
.
executeKernel
(
ccmaMultiplyKernel
,
ccmaAtoms
->
getSize
());
context
.
executeKernel
(
ccmaMultiplyKernel
,
ccmaAtoms
.
getSize
());
ccmaUpdateKernel
.
setArg
<
cl_int
>
(
8
,
i
);
context
.
executeKernel
(
ccmaUpdateKernel
,
context
.
getNumAtoms
());
if
((
i
+
1
)
%
checkInterval
==
0
)
{
if
(
ccmaUseDirectBuffer
)
{
ccmaConvergedHostMemory
=
(
int
*
)
context
.
getQueue
().
enqueueMapBuffer
(
ccmaConvergedHostBuffer
->
getDeviceBuffer
(),
CL_FALSE
,
CL_MAP_READ
,
0
,
sizeof
(
cl_int
),
NULL
,
&
event
);
ccmaConvergedHostMemory
=
(
int
*
)
context
.
getQueue
().
enqueueMapBuffer
(
ccmaConvergedHostBuffer
.
getDeviceBuffer
(),
CL_FALSE
,
CL_MAP_READ
,
0
,
sizeof
(
cl_int
),
NULL
,
&
event
);
context
.
getQueue
().
flush
();
while
(
event
.
getInfo
<
CL_EVENT_COMMAND_EXECUTION_STATUS
>
()
!=
CL_COMPLETE
)
;
converged
[
i
%
2
]
=
ccmaConvergedHostMemory
[
0
];
context
.
getQueue
().
enqueueUnmapMemObject
(
ccmaConvergedHostBuffer
->
getDeviceBuffer
(),
ccmaConvergedHostMemory
);
context
.
getQueue
().
enqueueUnmapMemObject
(
ccmaConvergedHostBuffer
.
getDeviceBuffer
(),
ccmaConvergedHostMemory
);
}
else
event
.
wait
();
...
...
@@ -938,7 +867,7 @@ void OpenCLIntegrationUtilities::distributeForcesFromVirtualSites() {
}
void
OpenCLIntegrationUtilities
::
initRandomNumberGenerator
(
unsigned
int
randomNumberSeed
)
{
if
(
random
!=
NULL
)
{
if
(
random
.
isInitialized
()
)
{
if
(
randomNumberSeed
!=
lastSeed
)
throw
OpenMMException
(
"OpenCLIntegrationUtilities::initRandomNumberGenerator(): Requested two different values for the random number seed"
);
return
;
...
...
@@ -947,23 +876,23 @@ void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNu
// Create the random number arrays.
lastSeed
=
randomNumberSeed
;
random
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
context
,
4
*
context
.
getPaddedNumAtoms
(),
"random"
);
randomSeed
=
OpenCLArray
::
creat
e
<
mm_int4
>
(
context
,
context
.
getNumThreadBlocks
()
*
OpenCLContext
::
ThreadBlockSize
,
"randomSeed"
);
randomPos
=
random
->
getSize
();
random
.
initializ
e
<
mm_float4
>
(
context
,
4
*
context
.
getPaddedNumAtoms
(),
"random"
);
randomSeed
.
initializ
e
<
mm_int4
>
(
context
,
context
.
getNumThreadBlocks
()
*
OpenCLContext
::
ThreadBlockSize
,
"randomSeed"
);
randomPos
=
random
.
getSize
();
// Use a quick and dirty RNG to pick seeds for the real random number generator.
vector
<
mm_int4
>
seed
(
randomSeed
->
getSize
());
vector
<
mm_int4
>
seed
(
randomSeed
.
getSize
());
unsigned
int
r
=
randomNumberSeed
;
// A seed of 0 means use a unique one
if
(
r
==
0
)
r
=
(
unsigned
int
)
osrngseed
();
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
for
(
int
i
=
0
;
i
<
randomSeed
.
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
randomSeed
.
upload
(
seed
);
// Create the kernel.
...
...
@@ -972,45 +901,44 @@ void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNu
}
int
OpenCLIntegrationUtilities
::
prepareRandomNumbers
(
int
numValues
)
{
if
(
randomPos
+
numValues
<=
random
->
getSize
())
{
if
(
randomPos
+
numValues
<=
random
.
getSize
())
{
int
oldPos
=
randomPos
;
randomPos
+=
numValues
;
return
oldPos
;
}
if
(
numValues
>
random
->
getSize
())
{
delete
random
;
random
=
OpenCLArray
::
create
<
mm_float4
>
(
context
,
numValues
,
"random"
);
if
(
numValues
>
random
.
getSize
())
{
random
.
resize
(
numValues
);
}
randomKernel
.
setArg
<
cl_int
>
(
0
,
random
->
getSize
());
randomKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
random
->
getDeviceBuffer
());
randomKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
randomSeed
->
getDeviceBuffer
());
context
.
executeKernel
(
randomKernel
,
random
->
getSize
());
randomKernel
.
setArg
<
cl_int
>
(
0
,
random
.
getSize
());
randomKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
random
.
getDeviceBuffer
());
randomKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
randomSeed
.
getDeviceBuffer
());
context
.
executeKernel
(
randomKernel
,
random
.
getSize
());
randomPos
=
numValues
;
return
0
;
}
void
OpenCLIntegrationUtilities
::
createCheckpoint
(
ostream
&
stream
)
{
if
(
random
==
NULL
)
if
(
!
random
.
isInitialized
()
)
return
;
stream
.
write
((
char
*
)
&
randomPos
,
sizeof
(
int
));
vector
<
mm_float4
>
randomVec
;
random
->
download
(
randomVec
);
stream
.
write
((
char
*
)
&
randomVec
[
0
],
sizeof
(
mm_float4
)
*
random
->
getSize
());
random
.
download
(
randomVec
);
stream
.
write
((
char
*
)
&
randomVec
[
0
],
sizeof
(
mm_float4
)
*
random
.
getSize
());
vector
<
mm_int4
>
randomSeedVec
;
randomSeed
->
download
(
randomSeedVec
);
stream
.
write
((
char
*
)
&
randomSeedVec
[
0
],
sizeof
(
mm_int4
)
*
randomSeed
->
getSize
());
randomSeed
.
download
(
randomSeedVec
);
stream
.
write
((
char
*
)
&
randomSeedVec
[
0
],
sizeof
(
mm_int4
)
*
randomSeed
.
getSize
());
}
void
OpenCLIntegrationUtilities
::
loadCheckpoint
(
istream
&
stream
)
{
if
(
random
==
NULL
)
if
(
!
random
.
isInitialized
()
)
return
;
stream
.
read
((
char
*
)
&
randomPos
,
sizeof
(
int
));
vector
<
mm_float4
>
randomVec
(
random
->
getSize
());
stream
.
read
((
char
*
)
&
randomVec
[
0
],
sizeof
(
mm_float4
)
*
random
->
getSize
());
random
->
upload
(
randomVec
);
vector
<
mm_int4
>
randomSeedVec
(
randomSeed
->
getSize
());
stream
.
read
((
char
*
)
&
randomSeedVec
[
0
],
sizeof
(
mm_int4
)
*
randomSeed
->
getSize
());
randomSeed
->
upload
(
randomSeedVec
);
vector
<
mm_float4
>
randomVec
(
random
.
getSize
());
stream
.
read
((
char
*
)
&
randomVec
[
0
],
sizeof
(
mm_float4
)
*
random
.
getSize
());
random
.
upload
(
randomVec
);
vector
<
mm_int4
>
randomSeedVec
(
randomSeed
.
getSize
());
stream
.
read
((
char
*
)
&
randomSeedVec
[
0
],
sizeof
(
mm_int4
)
*
randomSeed
.
getSize
());
randomSeed
.
upload
(
randomSeedVec
);
}
double
OpenCLIntegrationUtilities
::
computeKineticEnergy
(
double
timeShift
)
{
...
...
@@ -1018,7 +946,7 @@ double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) {
if
(
timeShift
!=
0
)
{
// Copy the velocities into the posDelta array while we temporarily modify them.
context
.
getVelm
().
copyTo
(
*
posDelta
);
context
.
getVelm
().
copyTo
(
posDelta
);
// Apply the time shift.
...
...
@@ -1057,6 +985,6 @@ double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) {
// Restore the velocities.
if
(
timeShift
!=
0
)
posDelta
->
copyTo
(
context
.
getVelm
());
posDelta
.
copyTo
(
context
.
getVelm
());
return
0.5
*
energy
;
}
platforms/opencl/src/OpenCLKernels.cpp
View file @
b33ee3b0
...
...
@@ -6357,47 +6357,6 @@ private:
OpenCLCalcGayBerneForceKernel& owner;
};
OpenCLCalcGayBerneForceKernel::~OpenCLCalcGayBerneForceKernel() {
if (sortedParticles != NULL)
delete sortedParticles;
if (axisParticleIndices != NULL)
delete axisParticleIndices;
if (sigParams != NULL)
delete sigParams;
if (epsParams != NULL)
delete epsParams;
if (scale != NULL)
delete scale;
if (exceptionParticles != NULL)
delete exceptionParticles;
if (exceptionParams != NULL)
delete exceptionParams;
if (aMatrix != NULL)
delete aMatrix;
if (bMatrix != NULL)
delete bMatrix;
if (gMatrix != NULL)
delete gMatrix;
if (exclusions != NULL)
delete exclusions;
if (exclusionStartIndex != NULL)
delete exclusionStartIndex;
if (blockCenter != NULL)
delete blockCenter;
if (blockBoundingBox != NULL)
delete blockBoundingBox;
if (neighbors != NULL)
delete neighbors;
if (neighborIndex != NULL)
delete neighborIndex;
if (neighborBlockCount != NULL)
delete neighborBlockCount;
if (sortedPos != NULL)
delete sortedPos;
if (torque != NULL)
delete torque;
}
void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBerneForce& force) {
if (!cl.getSupports64BitGlobalAtomics())
throw OpenMMException("GayBerneForce requires a device that supports 64 bit atomic operations");
...
...
@@ -6405,14 +6364,14 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
// Initialize interactions.
int numParticles = force.getNumParticles();
sigParams
= OpenCLArray::creat
e<mm_float4>(cl, cl.getPaddedNumAtoms(), "sigParams");
epsParams
= OpenCLArray::creat
e<mm_float2>(cl, cl.getPaddedNumAtoms(), "epsParams");
scale
= OpenCLArray::creat
e<mm_float4>(cl, cl.getPaddedNumAtoms(), "scale");
axisParticleIndices
= OpenCLArray::creat
e<mm_int2>(cl, cl.getPaddedNumAtoms(), "axisParticleIndices");
sortedParticles
= OpenCLArray::creat
e<cl_int>(cl, cl.getPaddedNumAtoms(), "sortedParticles");
aMatrix
= OpenCLArray::creat
e<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "aMatrix");
bMatrix
= OpenCLArray::creat
e<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "bMatrix");
gMatrix
= OpenCLArray::creat
e<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "gMatrix");
sigParams
.initializ
e<mm_float4>(cl, cl.getPaddedNumAtoms(), "sigParams");
epsParams
.initializ
e<mm_float2>(cl, cl.getPaddedNumAtoms(), "epsParams");
scale
.initializ
e<mm_float4>(cl, cl.getPaddedNumAtoms(), "scale");
axisParticleIndices
.initializ
e<mm_int2>(cl, cl.getPaddedNumAtoms(), "axisParticleIndices");
sortedParticles
.initializ
e<cl_int>(cl, cl.getPaddedNumAtoms(), "sortedParticles");
aMatrix
.initializ
e<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "aMatrix");
bMatrix
.initializ
e<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "bMatrix");
gMatrix
.initializ
e<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "gMatrix");
vector<mm_float4> sigParamsVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector<mm_float2> epsParamsVector(cl.getPaddedNumAtoms(), mm_float2(0, 0));
vector<mm_float4> scaleVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
...
...
@@ -6428,10 +6387,10 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
scaleVector[i] = mm_float4((float) (1/sqrt(ex)), (float) (1/sqrt(ey)), (float) (1/sqrt(ez)), 0);
isRealParticle[i] = (epsilon != 0.0);
}
sigParams
->
upload(sigParamsVector);
epsParams
->
upload(epsParamsVector);
scale
->
upload(scaleVector);
axisParticleIndices
->
upload(axisParticleVector);
sigParams
.
upload(sigParamsVector);
epsParams
.
upload(epsParamsVector);
scale
.
upload(scaleVector);
axisParticleIndices
.
upload(axisParticleVector);
// Record exceptions and exclusions.
...
...
@@ -6454,29 +6413,29 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
if (isRealParticle[i])
numRealParticles++;
int numExceptions = exceptionParamsVec.size();
exclusions
= OpenCLArray::creat
e<cl_int>(cl, max(1, (int) excludedPairs.size()), "exclusions");
exclusionStartIndex
= OpenCLArray::creat
e<cl_int>(cl, numRealParticles+1, "exclusionStartIndex");
exceptionParticles
= OpenCLArray::creat
e<mm_int4>(cl, max(1, numExceptions), "exceptionParticles");
exceptionParams
= OpenCLArray::creat
e<mm_float2>(cl, max(1, numExceptions), "exceptionParams");
exclusions
.initializ
e<cl_int>(cl, max(1, (int) excludedPairs.size()), "exclusions");
exclusionStartIndex
.initializ
e<cl_int>(cl, numRealParticles+1, "exclusionStartIndex");
exceptionParticles
.initializ
e<mm_int4>(cl, max(1, numExceptions), "exceptionParticles");
exceptionParams
.initializ
e<mm_float2>(cl, max(1, numExceptions), "exceptionParams");
if (numExceptions > 0)
exceptionParams
->
upload(exceptionParamsVec);
exceptionParams
.
upload(exceptionParamsVec);
// Create data structures used for the neighbor list.
int numAtomBlocks = (numRealParticles+31)/32;
int elementSize = (cl.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
blockCenter
= new OpenCLArray
(cl, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox
= new OpenCLArray
(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox");
sortedPos
= new OpenCLArray
(cl, numRealParticles, 4*elementSize, "sortedPos");
blockCenter
.initialize
(cl, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox
.initialize
(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox");
sortedPos
.initialize
(cl, numRealParticles, 4*elementSize, "sortedPos");
maxNeighborBlocks = numRealParticles*2;
neighbors
= OpenCLArray::creat
e<cl_int>(cl, maxNeighborBlocks*32, "neighbors");
neighborIndex
= OpenCLArray::creat
e<cl_int>(cl, maxNeighborBlocks, "neighborIndex");
neighborBlockCount
= OpenCLArray::creat
e<cl_int>(cl, 1, "neighborBlockCount");
neighbors
.initializ
e<cl_int>(cl, maxNeighborBlocks*32, "neighbors");
neighborIndex
.initializ
e<cl_int>(cl, maxNeighborBlocks, "neighborIndex");
neighborBlockCount
.initializ
e<cl_int>(cl, 1, "neighborBlockCount");
// Create array for accumulating torques.
torque
= OpenCLArray::creat
e<cl_long>(cl, 3*cl.getPaddedNumAtoms(), "torque");
cl.addAutoclearBuffer(
*
torque);
torque
.initializ
e<cl_long>(cl, 3*cl.getPaddedNumAtoms(), "torque");
cl.addAutoclearBuffer(torque);
// Create the kernels.
...
...
@@ -6519,60 +6478,60 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
sortAtoms();
framesKernel.setArg<cl_int>(0, numRealParticles);
framesKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(2, axisParticleIndices
->
getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(3, sigParams
->
getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(4, scale
->
getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(5, aMatrix
->
getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(6, bMatrix
->
getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(7, gMatrix
->
getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(8, sortedParticles
->
getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(2, axisParticleIndices
.
getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(3, sigParams
.
getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(4, scale
.
getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(5, aMatrix
.
getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(6, bMatrix
.
getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(7, gMatrix
.
getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(8, sortedParticles
.
getDeviceBuffer());
blockBoundsKernel.setArg<cl_int>(0, numRealParticles);
blockBoundsKernel.setArg<cl::Buffer>(6, sortedParticles
->
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(6, sortedParticles
.
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(7, cl.getPosq().getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(8, sortedPos
->
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(9, blockCenter
->
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(10, blockBoundingBox
->
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(11, neighborBlockCount
->
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(8, sortedPos
.
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(9, blockCenter
.
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(10, blockBoundingBox
.
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(11, neighborBlockCount
.
getDeviceBuffer());
neighborsKernel.setArg<cl_int>(0, numRealParticles);
neighborsKernel.setArg<cl_int>(1, maxNeighborBlocks);
neighborsKernel.setArg<cl::Buffer>(7, sortedPos
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(8, blockCenter
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(9, blockBoundingBox
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(10, neighbors
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(11, neighborIndex
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(12, neighborBlockCount
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(13, exclusions
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(14, exclusionStartIndex
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(7, sortedPos
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(8, blockCenter
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(9, blockBoundingBox
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(10, neighbors
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(11, neighborIndex
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(12, neighborBlockCount
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(13, exclusions
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(14, exclusionStartIndex
.
getDeviceBuffer());
int index = 0;
forceKernel.setArg<cl::Buffer>(index++, cl.getLongForceBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, torque
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, torque
.
getDeviceBuffer());
forceKernel.setArg<cl_int>(index++, numRealParticles);
forceKernel.setArg<cl_int>(index++, exceptionAtoms.size());
forceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sortedPos
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sigParams
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, epsParams
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sortedParticles
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, aMatrix
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, bMatrix
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, gMatrix
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusions
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exceptionParticles
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exceptionParams
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sortedPos
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sigParams
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, epsParams
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sortedParticles
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, aMatrix
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, bMatrix
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, gMatrix
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusions
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exceptionParticles
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exceptionParams
.
getDeviceBuffer());
if (nonbondedMethod != GayBerneForce::NoCutoff) {
forceKernel.setArg<cl_int>(index++, maxNeighborBlocks);
forceKernel.setArg<cl::Buffer>(index++, neighbors
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborIndex
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborBlockCount
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighbors
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborIndex
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborBlockCount
.
getDeviceBuffer());
}
index = 0;
torqueKernel.setArg<cl::Buffer>(index++, cl.getLongForceBuffer().getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, torque
->
getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, torque
.
getDeviceBuffer());
torqueKernel.setArg<cl_int>(index++, numRealParticles);
torqueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, axisParticleIndices
->
getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, sortedParticles
->
getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, axisParticleIndices
.
getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, sortedParticles
.
getDeviceBuffer());
}
cl.executeKernel(framesKernel, numRealParticles);
setPeriodicBoxArgs(cl, blockBoundsKernel, 1);
...
...
@@ -6586,7 +6545,7 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
cl.executeKernel(neighborsKernel, numRealParticles);
cl_int* count = (cl_int*) cl.getPinnedBuffer();
cl::Event event;
cl.getQueue().enqueueReadBuffer(neighborBlockCount
->
getDeviceBuffer(), CL_FALSE, 0, neighborBlockCount
->
getSize()*neighborBlockCount
->
getElementSize(), count, NULL, &event);
cl.getQueue().enqueueReadBuffer(neighborBlockCount
.
getDeviceBuffer(), CL_FALSE, 0, neighborBlockCount
.
getSize()*neighborBlockCount
.
getElementSize(), count, NULL, &event);
setPeriodicBoxArgs(cl, forceKernel, 20);
cl.executeKernel(forceKernel, cl.getNonbondedUtilities().getNumForceThreadBlocks()*cl.getNonbondedUtilities().getForceThreadBlockSize());
event.wait();
...
...
@@ -6595,17 +6554,13 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
// There wasn't enough room for the neighbor list, so we need to recreate it.
delete neighbors;
neighbors = NULL;
delete neighborIndex;
neighborIndex = NULL;
maxNeighborBlocks = (int) ceil((*count)*1.1);
neighbors
= OpenCLArray::create<cl_int>(cl,
maxNeighborBlocks*32
, "neighbors"
);
neighborIndex
= OpenCLArray::create<cl_int>(cl, maxNeighborBlocks, "neighborIndex"
);
neighborsKernel.setArg<cl::Buffer>(10, neighbors
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(11, neighborIndex
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(17, neighbors
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(18, neighborIndex
->
getDeviceBuffer());
neighbors
.resize(
maxNeighborBlocks*32);
neighborIndex
.resize(maxNeighborBlocks
);
neighborsKernel.setArg<cl::Buffer>(10, neighbors
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(11, neighborIndex
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(17, neighbors
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(18, neighborIndex
.
getDeviceBuffer());
}
}
cl.executeKernel(torqueKernel, numRealParticles);
...
...
@@ -6644,9 +6599,9 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context
if (epsilon != 0.0 && !isRealParticle[i])
throw OpenMMException("updateParametersInContext: The set of ignored particles (ones with epsilon=0) has changed");
}
sigParams
->
upload(sigParamsVector);
epsParams
->
upload(epsParamsVector);
scale
->
upload(scaleVector);
sigParams
.
upload(sigParamsVector);
epsParams
.
upload(epsParamsVector);
scale
.
upload(scaleVector);
// Record the exceptions.
...
...
@@ -6658,7 +6613,7 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context
force.getExceptionParameters(exceptions[i], atom1, atom2, sigma, epsilon);
exceptionParamsVec[i] = mm_float2((float) sigma, (float) epsilon);
}
exceptionParams
->
upload(exceptionParamsVec);
exceptionParams
.
upload(exceptionParamsVec);
}
cl.invalidateMolecules(info);
sortAtoms();
...
...
@@ -6679,7 +6634,7 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
particles[nextIndex++] = atom;
}
}
sortedParticles
->
upload(particles);
sortedParticles
.
upload(particles);
// Update the list of exception particles.
...
...
@@ -6688,7 +6643,7 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
vector<mm_int4> exceptionParticlesVec(numExceptions);
for (int i = 0; i < numExceptions; i++)
exceptionParticlesVec[i] = mm_int4(exceptionAtoms[i].first, exceptionAtoms[i].second, inverseOrder[exceptionAtoms[i].first], inverseOrder[exceptionAtoms[i].second]);
exceptionParticles
->
upload(exceptionParticlesVec);
exceptionParticles
.
upload(exceptionParticlesVec);
}
// Rebuild the list of exclusions.
...
...
@@ -6700,16 +6655,16 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
excludedAtoms[first].push_back(second);
}
int index = 0;
vector<int> exclusionVec(exclusions
->
getSize());
vector<int> startIndexVec(exclusionStartIndex
->
getSize());
vector<int> exclusionVec(exclusions
.
getSize());
vector<int> startIndexVec(exclusionStartIndex
.
getSize());
for (int i = 0; i < numRealParticles; i++) {
startIndexVec[i] = index;
for (int j = 0; j < excludedAtoms[i].size(); j++)
exclusionVec[index++] = excludedAtoms[i][j];
}
startIndexVec[numRealParticles] = index;
exclusions
->
upload(exclusionVec);
exclusionStartIndex
->
upload(startIndexVec);
exclusions
.
upload(exclusionVec);
exclusionStartIndex
.
upload(startIndexVec);
}
class OpenCLCalcCustomCVForceKernel::ReorderListener : public OpenCLContext::ReorderListener {
...
...
@@ -6728,15 +6683,6 @@ private:
OpenCLArray& invAtomOrder;
};
OpenCLCalcCustomCVForceKernel::~OpenCLCalcCustomCVForceKernel() {
for (auto force : cvForces)
delete force;
if (invAtomOrder != NULL)
delete invAtomOrder;
if (innerInvAtomOrder != NULL)
delete innerInvAtomOrder;
}
void OpenCLCalcCustomCVForceKernel::initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext) {
int numCVs = force.getNumCollectiveVariables();
cl.addForce(new OpenCLForceInfo(1));
...
...
@@ -6779,10 +6725,11 @@ void OpenCLCalcCustomCVForceKernel::initialize(const System& system, const Custo
// Create arrays for storing information.
int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
cvForces.resize(numCVs);
for (int i = 0; i < numCVs; i++)
cvForces
.push_back(new OpenCLArray
(cl, cl.getNumAtoms(), 4*elementSize, "cvForce")
)
;
invAtomOrder
= OpenCLArray::creat
e<cl_int>(cl, cl.getPaddedNumAtoms(), "invAtomOrder");
innerInvAtomOrder
= OpenCLArray::creat
e<cl_int>(cl, cl.getPaddedNumAtoms(), "innerInvAtomOrder");
cvForces
[i].initialize
(cl, cl.getNumAtoms(), 4*elementSize, "cvForce");
invAtomOrder
.initializ
e<cl_int>(cl, cl.getPaddedNumAtoms(), "invAtomOrder");
innerInvAtomOrder
.initializ
e<cl_int>(cl, cl.getPaddedNumAtoms(), "innerInvAtomOrder");
// Create the kernels.
...
...
@@ -6809,7 +6756,7 @@ double OpenCLCalcCustomCVForceKernel::execute(ContextImpl& context, ContextImpl&
vector<map<string, double> > cvDerivs(numCVs);
for (int i = 0; i < numCVs; i++) {
cvValues.push_back(innerContext.calcForcesAndEnergy(true, true, 1<<i));
copyForcesKernel.setArg<cl::Buffer>(0, cvForces[i]
->
getDeviceBuffer());
copyForcesKernel.setArg<cl::Buffer>(0, cvForces[i]
.
getDeviceBuffer());
cl.executeKernel(copyForcesKernel, numAtoms);
innerContext.getEnergyParameterDerivatives(cvDerivs[i]);
}
...
...
@@ -6852,8 +6799,8 @@ void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl&
// Initialize the listeners.
ReorderListener* listener1 = new ReorderListener(cl,
*
invAtomOrder);
ReorderListener* listener2 = new ReorderListener(cl2,
*
innerInvAtomOrder);
ReorderListener* listener1 = new ReorderListener(cl, invAtomOrder);
ReorderListener* listener2 = new ReorderListener(cl2, innerInvAtomOrder);
cl.addReorderListener(listener1);
cl2.addReorderListener(listener2);
listener1->execute();
...
...
@@ -6866,7 +6813,7 @@ void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl&
copyStateKernel.setArg<cl::Buffer>(3, cl.getAtomIndexArray().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(4, cl2.getPosq().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(6, cl2.getVelm().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(7, innerInvAtomOrder
->
getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(7, innerInvAtomOrder
.
getDeviceBuffer());
copyStateKernel.setArg<cl_int>(8, numAtoms);
if (cl.getUseMixedPrecision()) {
copyStateKernel.setArg<cl::Buffer>(1, cl.getPosqCorrection().getDeviceBuffer());
...
...
@@ -6877,7 +6824,7 @@ void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl&
copyStateKernel.setArg<void*>(5, NULL);
}
copyForcesKernel.setArg<cl::Buffer>(1, invAtomOrder
->
getDeviceBuffer());
copyForcesKernel.setArg<cl::Buffer>(1, invAtomOrder
.
getDeviceBuffer());
copyForcesKernel.setArg<cl::Buffer>(2, cl2.getForce().getDeviceBuffer());
copyForcesKernel.setArg<cl::Buffer>(3, cl2.getAtomIndexArray().getDeviceBuffer());
copyForcesKernel.setArg<cl_int>(4, numAtoms);
...
...
@@ -6885,7 +6832,7 @@ void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl&
addForcesKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
addForcesKernel.setArg<cl_int>(1, numAtoms);
for (int i = 0; i < cvForces.size(); i++)
addForcesKernel.setArg<cl::Buffer>(2*i+2, cvForces[i]
->
getDeviceBuffer());
addForcesKernel.setArg<cl::Buffer>(2*i+2, cvForces[i]
.
getDeviceBuffer());
}
cl.executeKernel(copyStateKernel, numAtoms);
Vec3 a, b, c;
...
...
@@ -6917,15 +6864,6 @@ private:
set<int> particles;
};
OpenCLCalcRMSDForceKernel::~OpenCLCalcRMSDForceKernel() {
if (referencePos != NULL)
delete referencePos;
if (particles != NULL)
delete particles;
if (buffer != NULL)
delete buffer;
}
void OpenCLCalcRMSDForceKernel::initialize(const System& system, const RMSDForce& force) {
// Create data structures.
...
...
@@ -6934,9 +6872,9 @@ void OpenCLCalcRMSDForceKernel::initialize(const System& system, const RMSDForce
int numParticles = force.getParticles().size();
if (numParticles == 0)
numParticles = system.getNumParticles();
referencePos
= new OpenCLArray
(cl, system.getNumParticles(), 4*elementSize, "referencePos");
particles
= OpenCLArray::creat
e<cl_int>(cl, numParticles, "particles");
buffer
= new OpenCLArray
(cl, 13, elementSize, "buffer");
referencePos
.initialize
(cl, system.getNumParticles(), 4*elementSize, "referencePos");
particles
.initializ
e<cl_int>(cl, numParticles, "particles");
buffer
.initialize
(cl, 13, elementSize, "buffer");
recordParameters(force);
info = new ForceInfo(force);
cl.addForce(info);
...
...
@@ -6965,18 +6903,18 @@ void OpenCLCalcRMSDForceKernel::recordParameters(const RMSDForce& force) {
// Upload them to the device.
particles
->
upload(particleVec);
particles
.
upload(particleVec);
if (cl.getUseDoublePrecision()) {
vector<mm_double4> pos;
for (Vec3 p : centeredPositions)
pos.push_back(mm_double4(p[0], p[1], p[2], 0));
referencePos
->
upload(pos);
referencePos
.
upload(pos);
}
else {
vector<mm_float4> pos;
for (Vec3 p : centeredPositions)
pos.push_back(mm_float4(p[0], p[1], p[2], 0));
referencePos
->
upload(pos);
referencePos
.
upload(pos);
}
// Record the sum of the norms of the reference positions.
...
...
@@ -6998,13 +6936,13 @@ template <class REAL>
double OpenCLCalcRMSDForceKernel::executeImpl(ContextImpl& context) {
// Execute the first kernel.
int numParticles = particles
->
getSize();
int numParticles = particles
.
getSize();
int blockSize = min(256, (int) kernel1.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(cl.getDevice()));
kernel1.setArg<cl_int>(0, numParticles);
kernel1.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, referencePos
->
getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, particles
->
getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, buffer
->
getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, referencePos
.
getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, particles
.
getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, buffer
.
getDeviceBuffer());
kernel1.setArg(5, blockSize*sizeof(REAL), NULL);
cl.executeKernel(kernel1, blockSize, blockSize);
...
...
@@ -7012,7 +6950,7 @@ double OpenCLCalcRMSDForceKernel::executeImpl(ContextImpl& context) {
// and eigenvector.
vector<REAL> b;
buffer
->
download(b);
buffer
.
download(b);
Array2D<double> F(4, 4);
F[0][0] = b[0*3+0] + b[1*3+1] + b[2*3+2];
F[1][0] = b[1*3+2] - b[2*3+1];
...
...
@@ -7066,30 +7004,25 @@ double OpenCLCalcRMSDForceKernel::executeImpl(ContextImpl& context) {
// Upload it to the device and invoke the kernel to apply forces.
buffer
->
upload(b);
buffer
.
upload(b);
kernel2.setArg<cl_int>(0, numParticles);
kernel2.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(2, referencePos
->
getDeviceBuffer());
kernel2.setArg<cl::Buffer>(3, particles
->
getDeviceBuffer());
kernel2.setArg<cl::Buffer>(4, buffer
->
getDeviceBuffer());
kernel2.setArg<cl::Buffer>(2, referencePos
.
getDeviceBuffer());
kernel2.setArg<cl::Buffer>(3, particles
.
getDeviceBuffer());
kernel2.setArg<cl::Buffer>(4, buffer
.
getDeviceBuffer());
kernel2.setArg<cl::Buffer>(5, cl.getForceBuffers().getDeviceBuffer());
cl.executeKernel(kernel2, numParticles);
return rmsd;
}
void OpenCLCalcRMSDForceKernel::copyParametersToContext(ContextImpl& context, const RMSDForce& force) {
if (referencePos
->
getSize() != force.getReferencePositions().size())
if (referencePos
.
getSize() != force.getReferencePositions().size())
throw OpenMMException("updateParametersInContext: The number of reference positions has changed");
int numParticles = force.getParticles().size();
if (numParticles == 0)
numParticles = context.getSystem().getNumParticles();
if (numParticles != particles->getSize()) {
// Recreate the particles array.
delete particles;
particles = NULL;
particles = OpenCLArray::create<cl_int>(cl, numParticles, "particles");
}
if (numParticles != particles.getSize())
particles.resize(numParticles);
recordParameters(force);
// Mark that the current reordering may be invalid.
...
...
@@ -7160,11 +7093,6 @@ double OpenCLIntegrateVerletStepKernel::computeKineticEnergy(ContextImpl& contex
return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
}
OpenCLIntegrateLangevinStepKernel::~OpenCLIntegrateLangevinStepKernel() {
if (params != NULL)
delete params;
}
void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) {
cl.getPlatformData().initializeContexts(system);
cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
...
...
@@ -7174,7 +7102,7 @@ void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const L
cl::Program program = cl.createProgram(OpenCLKernelSources::langevin, defines, "");
kernel1 = cl::Kernel(program, "integrateLangevinPart1");
kernel2 = cl::Kernel(program, "integrateLangevinPart2");
params
= new OpenCLArray
(cl, 3, cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(cl_double) : sizeof(cl_float), "langevinParams");
params
.initialize
(cl, 3, cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(cl_double) : sizeof(cl_float), "langevinParams");
prevStepSize = -1.0;
}
...
...
@@ -7186,7 +7114,7 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, params
->
getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, params
.
getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(5, integration.getRandom().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
...
...
@@ -7207,18 +7135,18 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
double fscale = (friction == 0 ? stepSize : (1-vscale)/friction);
double noisescale = sqrt(kT*(1-vscale*vscale));
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
vector<cl_double> p(params
->
getSize());
vector<cl_double> p(params
.
getSize());
p[0] = vscale;
p[1] = fscale;
p[2] = noisescale;
params
->
upload(p);
params
.
upload(p);
}
else {
vector<cl_float> p(params
->
getSize());
vector<cl_float> p(params
.
getSize());
p[0] = (cl_float) vscale;
p[1] = (cl_float) fscale;
p[2] = (cl_float) noisescale;
params
->
upload(p);
params
.
upload(p);
}
prevTemp = temperature;
prevFriction = friction;
...
...
@@ -7429,11 +7357,6 @@ double OpenCLIntegrateVariableVerletStepKernel::computeKineticEnergy(ContextImpl
return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
}
OpenCLIntegrateVariableLangevinStepKernel::~OpenCLIntegrateVariableLangevinStepKernel() {
if (params != NULL)
delete params;
}
void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) {
cl.getPlatformData().initializeContexts(system);
cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
...
...
@@ -7444,9 +7367,9 @@ void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system,
kernel1 = cl::Kernel(program, "integrateLangevinPart1");
kernel2 = cl::Kernel(program, "integrateLangevinPart2");
selectSizeKernel = cl::Kernel(program, "selectLangevinStepSize");
params
= new OpenCLArray
(cl, 3, cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(cl_double) : sizeof(cl_float), "langevinParams");
params
.initialize
(cl, 3, cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(cl_double) : sizeof(cl_float), "langevinParams");
blockSize = min(256, system.getNumParticles());
blockSize = max(blockSize, params
->
getSize());
blockSize = max(blockSize, params
.
getSize());
blockSize = min(blockSize, (int) selectSizeKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(cl.getDevice()));
}
...
...
@@ -7459,7 +7382,7 @@ double OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context,
kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, params
->
getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, params
.
getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(5, integration.getRandom().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
...
...
@@ -7470,9 +7393,9 @@ double OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context,
selectSizeKernel.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
selectSizeKernel.setArg<cl::Buffer>(5, cl.getVelm().getDeviceBuffer());
selectSizeKernel.setArg<cl::Buffer>(6, cl.getForce().getDeviceBuffer());
selectSizeKernel.setArg<cl::Buffer>(7, params
->
getDeviceBuffer());
selectSizeKernel.setArg<cl::Buffer>(7, params
.
getDeviceBuffer());
int elementSize = (useDouble ? sizeof(cl_double) : sizeof(cl_float));
selectSizeKernel.setArg(8, params
->
getSize()*elementSize, NULL);
selectSizeKernel.setArg(8, params
.
getSize()*elementSize, NULL);
selectSizeKernel.setArg(9, blockSize*elementSize, NULL);
}
...
...
@@ -7619,24 +7542,8 @@ private:
};
OpenCLIntegrateCustomStepKernel::~OpenCLIntegrateCustomStepKernel() {
if (globalValues != NULL)
delete globalValues;
if (sumBuffer != NULL)
delete sumBuffer;
if (summedValue != NULL)
delete summedValue;
if (uniformRandoms != NULL)
delete uniformRandoms;
if (randomSeed != NULL)
delete randomSeed;
if (perDofEnergyParamDerivs != NULL)
delete perDofEnergyParamDerivs;
if (perDofValues != NULL)
delete perDofValues;
for (auto function : tabulatedFunctions)
delete function;
for (auto& f : savedForces)
delete f.second;
}
void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const CustomIntegrator& integrator) {
...
...
@@ -7644,8 +7551,8 @@ void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const Cus
cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
numGlobalVariables = integrator.getNumGlobalVariables();
int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
sumBuffer
= new OpenCLArray
(cl, ((3*system.getNumParticles()+3)/4)*4, elementSize, "sumBuffer");
summedValue
= new OpenCLArray
(cl, 1, elementSize, "summedValue");
sumBuffer
.initialize
(cl, ((3*system.getNumParticles()+3)/4)*4, elementSize, "sumBuffer");
summedValue
.initialize
(cl, 1, elementSize, "summedValue");
perDofValues = new OpenCLParameterSet(cl, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables", false, cl.getUseDoublePrecision() || cl.getUseMixedPrecision());
cl.addReorderListener(new ReorderListener(cl, *perDofValues, localPerDofValuesFloat, localPerDofValuesDouble, deviceValuesAreCurrent));
SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
...
...
@@ -7728,6 +7635,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
vector<pair<string, string> > functionNames;
vector<const TabulatedFunction*> functionList;
vector<string> tableTypes;
tabulatedFunctions.resize(integrator.getNumTabulatedFunctions());
for (int i = 0; i < integrator.getNumTabulatedFunctions(); i++) {
functionList.push_back(&integrator.getTabulatedFunction(i));
string name = integrator.getTabulatedFunctionName(i);
...
...
@@ -7736,8 +7644,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
functions[name] = createReferenceTabulatedFunction(integrator.getTabulatedFunction(i));
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(integrator.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(OpenCLArray::creat
e<float>(cl, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
tabulatedFunctions
[i].initializ
e<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[
i].
upload(f);
if (width == 1)
tableTypes.push_back("float");
else
...
...
@@ -7801,8 +7709,10 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
forceGroupFlags[step] = 1<<forceGroup[step];
if (forceGroupFlags[step] == -2 && step > 0)
forceGroupFlags[step] = forceGroupFlags[step-1];
if (forceGroupFlags[step] != -2 && savedForces.find(forceGroupFlags[step]) == savedForces.end())
savedForces[forceGroupFlags[step]] = new OpenCLArray(cl, cl.getForce().getSize(), cl.getForce().getElementSize(), "savedForces");
if (forceGroupFlags[step] != -2 && savedForces.find(forceGroupFlags[step]) == savedForces.end()) {
savedForces[forceGroupFlags[step]] = OpenCLArray();
savedForces[forceGroupFlags[step]].initialize(cl, cl.getForce().getSize(), cl.getForce().getElementSize(), "savedForces");
}
}
// Allocate space for storing global values, both on the host and the device.
...
...
@@ -7810,7 +7720,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
globalValuesFloat.resize(expressionSet.getNumVariables());
globalValuesDouble.resize(expressionSet.getNumVariables());
int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
globalValues
= new OpenCLArray
(cl, expressionSet.getNumVariables(), elementSize, "globalValues");
globalValues
.initialize
(cl, expressionSet.getNumVariables(), elementSize, "globalValues");
for (int i = 0; i < integrator.getNumGlobalVariables(); i++) {
globalValuesDouble[globalVariableIndex[i]] = initialGlobalVariables[i];
expressionSet.setVariable(globalVariableIndex[i], initialGlobalVariables[i]);
...
...
@@ -7823,7 +7733,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
int numContextParams = context.getParameters().size();
localPerDofEnergyParamDerivsFloat.resize(numContextParams);
localPerDofEnergyParamDerivsDouble.resize(numContextParams);
perDofEnergyParamDerivs
= new OpenCLArray
(cl, max(1, numContextParams), elementSize, "perDofEnergyParamDerivs");
perDofEnergyParamDerivs
.initialize
(cl, max(1, numContextParams), elementSize, "perDofEnergyParamDerivs");
// Record information about the targets of steps that will be stored in global variables.
...
...
@@ -7974,14 +7884,14 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel.setArg<cl::Buffer>(index++, cl.getVelm().getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, cl.getForce().getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, integration.getStepSize().getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, globalValues
->
getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, sumBuffer
->
getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, globalValues
.
getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, sumBuffer
.
getDeviceBuffer());
index += 4;
kernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs
->
getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs
.
getDeviceBuffer());
for (auto& buffer : perDofValues->getBuffers())
kernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto array : tabulatedFunctions)
kernel.setArg<cl::Buffer>(index++, array
->
getDeviceBuffer());
for (auto
&
array : tabulatedFunctions)
kernel.setArg<cl::Buffer>(index++, array
.
getDeviceBuffer());
if (stepType[step] == CustomIntegrator::ComputeSum) {
// Create a second kernel for this step that sums the values.
...
...
@@ -7989,8 +7899,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel = cl::Kernel(program, useDouble ? "computeDoubleSum" : "computeFloatSum");
kernels[step].push_back(kernel);
index = 0;
kernel.setArg<cl::Buffer>(index++, sumBuffer
->
getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, summedValue
->
getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, sumBuffer
.
getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, summedValue
.
getDeviceBuffer());
kernel.setArg<cl_int>(index++, 3*numAtoms);
}
}
...
...
@@ -8012,9 +7922,9 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
int maxUniformRandoms = 1;
for (int required : requiredUniform)
maxUniformRandoms = max(maxUniformRandoms, required);
uniformRandoms
= OpenCLArray::creat
e<mm_float4>(cl, maxUniformRandoms, "uniformRandoms");
randomSeed
= OpenCLArray::creat
e<mm_int4>(cl, cl.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed");
vector<mm_int4> seed(randomSeed
->
getSize());
uniformRandoms
.initializ
e<mm_float4>(cl, maxUniformRandoms, "uniformRandoms");
randomSeed
.initializ
e<mm_int4>(cl, cl.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed");
vector<mm_int4> seed(randomSeed
.
getSize());
int rseed = integrator.getRandomNumberSeed();
// A random seed of 0 means use a unique one
if (rseed == 0)
...
...
@@ -8026,12 +7936,12 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
s.z = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
s.w = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
}
randomSeed
->
upload(seed);
randomSeed
.
upload(seed);
cl::Program randomProgram = cl.createProgram(OpenCLKernelSources::customIntegrator, defines);
randomKernel = cl::Kernel(randomProgram, "generateRandomNumbers");
randomKernel.setArg<cl_int>(0, maxUniformRandoms);
randomKernel.setArg<cl::Buffer>(1, uniformRandoms
->
getDeviceBuffer());
randomKernel.setArg<cl::Buffer>(2, randomSeed
->
getDeviceBuffer());
randomKernel.setArg<cl::Buffer>(1, uniformRandoms
.
getDeviceBuffer());
randomKernel.setArg<cl::Buffer>(2, randomSeed
.
getDeviceBuffer());
// Create the kernel for computing kinetic energy.
...
...
@@ -8067,19 +7977,19 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kineticEnergyKernel.setArg<cl::Buffer>(index++, cl.getVelm().getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, cl.getForce().getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, integration.getStepSize().getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, globalValues
->
getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer
->
getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, globalValues
.
getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer
.
getDeviceBuffer());
index += 2;
kineticEnergyKernel.setArg<cl::Buffer>(index++, uniformRandoms
->
getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, uniformRandoms
.
getDeviceBuffer());
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision())
kineticEnergyKernel.setArg<cl_double>(index++, 0.0);
else
kineticEnergyKernel.setArg<cl_float>(index++, 0.0f);
kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs
->
getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs
.
getDeviceBuffer());
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kineticEnergyKernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
for (auto array : tabulatedFunctions)
kineticEnergyKernel.setArg<cl::Buffer>(index++, array
->
getDeviceBuffer());
for (auto
&
array : tabulatedFunctions)
kineticEnergyKernel.setArg<cl::Buffer>(index++, array
.
getDeviceBuffer());
keNeedsForce = usesVariable(keExpression, "f");
// Create a second kernel to sum the values.
...
...
@@ -8087,8 +7997,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
program = cl.createProgram(OpenCLKernelSources::customIntegrator, defines);
sumKineticEnergyKernel = cl::Kernel(program, useDouble ? "computeDoubleSum" : "computeFloatSum");
index = 0;
sumKineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer
->
getDeviceBuffer());
sumKineticEnergyKernel.setArg<cl::Buffer>(index++, summedValue
->
getDeviceBuffer());
sumKineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer
.
getDeviceBuffer());
sumKineticEnergyKernel.setArg<cl::Buffer>(index++, summedValue
.
getDeviceBuffer());
sumKineticEnergyKernel.setArg<cl_int>(index++, 3*numAtoms);
// Delete the custom functions.
...
...
@@ -8181,7 +8091,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
// The forces are still valid. We just need a different force group right now. Save the old
// forces in case we need them again.
cl.getForce().copyTo(
*
savedForces[lastForceGroups]);
cl.getForce().copyTo(savedForces[lastForceGroups]);
validSavedForces.insert(lastForceGroups);
}
}
...
...
@@ -8196,7 +8106,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
if (!computeEnergy && validSavedForces.find(forceGroups) != validSavedForces.end()) {
// We can just restore the forces we saved earlier.
savedForces[forceGroups]
->
copyTo(cl.getForce());
savedForces[forceGroups]
.
copyTo(cl.getForce());
context.getLastForceGroups() = forceGroups;
}
else {
...
...
@@ -8209,12 +8119,12 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
for (int i = 0; i < perDofEnergyParamDerivNames.size(); i++)
localPerDofEnergyParamDerivsDouble[i] = energyParamDerivs[perDofEnergyParamDerivNames[i]];
perDofEnergyParamDerivs
->
upload(localPerDofEnergyParamDerivsDouble);
perDofEnergyParamDerivs
.
upload(localPerDofEnergyParamDerivsDouble);
}
else {
for (int i = 0; i < perDofEnergyParamDerivNames.size(); i++)
localPerDofEnergyParamDerivsFloat[i] = (float) energyParamDerivs[perDofEnergyParamDerivNames[i]];
perDofEnergyParamDerivs
->
upload(localPerDofEnergyParamDerivsFloat);
perDofEnergyParamDerivs
.
upload(localPerDofEnergyParamDerivsFloat);
}
}
}
...
...
@@ -8227,18 +8137,18 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
// Upload the global values to the device.
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision())
globalValues
->
upload(globalValuesDouble);
globalValues
.
upload(globalValuesDouble);
else {
for (int j = 0; j < (int) globalValuesDouble.size(); j++)
globalValuesFloat[j] = (float) globalValuesDouble[j];
globalValues
->
upload(globalValuesFloat);
globalValues
.
upload(globalValuesFloat);
}
}
bool stepInvalidatesForces = invalidatesForces[step];
if (stepType[step] == CustomIntegrator::ComputePerDof && !merged[step]) {
kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step]));
kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer());
kernels[step][0].setArg<cl::Buffer>(10, uniformRandoms
->
getDeviceBuffer());
kernels[step][0].setArg<cl::Buffer>(10, uniformRandoms
.
getDeviceBuffer());
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision())
kernels[step][0].setArg<cl_double>(11, energy);
else
...
...
@@ -8256,24 +8166,24 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
else if (stepType[step] == CustomIntegrator::ComputeSum) {
kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step]));
kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer());
kernels[step][0].setArg<cl::Buffer>(10, uniformRandoms
->
getDeviceBuffer());
kernels[step][0].setArg<cl::Buffer>(10, uniformRandoms
.
getDeviceBuffer());
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision())
kernels[step][0].setArg<cl_double>(11, energy);
else
kernels[step][0].setArg<cl_float>(11, (cl_float) energy);
if (requiredUniform[step] > 0)
cl.executeKernel(randomKernel, numAtoms);
cl.clearBuffer(
*
sumBuffer);
cl.clearBuffer(sumBuffer);
cl.executeKernel(kernels[step][0], numAtoms, 128);
cl.executeKernel(kernels[step][1], sumWorkGroupSize, sumWorkGroupSize);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double value;
summedValue
->
download(&value);
summedValue
.
download(&value);
recordGlobalValue(value, stepTarget[step], integrator);
}
else {
float value;
summedValue
->
download(&value);
summedValue
.
download(&value);
recordGlobalValue(value, stepTarget[step], integrator);
}
}
...
...
@@ -8363,19 +8273,19 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
energy = context.calcForcesAndEnergy(true, willNeedEnergy, -1);
forcesAreValid = true;
}
cl.clearBuffer(
*
sumBuffer);
cl.clearBuffer(sumBuffer);
kineticEnergyKernel.setArg<cl::Buffer>(8, cl.getIntegrationUtilities().getRandom().getDeviceBuffer());
kineticEnergyKernel.setArg<cl_uint>(9, 0);
cl.executeKernel(kineticEnergyKernel, cl.getNumAtoms());
cl.executeKernel(sumKineticEnergyKernel, sumWorkGroupSize, sumWorkGroupSize);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double ke;
summedValue
->
download(&ke);
summedValue
.
download(&ke);
return ke;
}
else {
float ke;
summedValue
->
download(&ke);
summedValue
.
download(&ke);
return ke;
}
}
...
...
@@ -8410,7 +8320,7 @@ void OpenCLIntegrateCustomStepKernel::recordChangedParameters(ContextImpl& conte
}
void OpenCLIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vector<double>& values) const {
if (globalValues
== NULL
) {
if (
!
globalValues
.isInitialized()
) {
// The data structures haven't been created yet, so just return the list of values that was given earlier.
values = initialGlobalVariables;
...
...
@@ -8424,7 +8334,7 @@ void OpenCLIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, v
void OpenCLIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, const vector<double>& values) {
if (numGlobalVariables == 0)
return;
if (globalValues
== NULL
) {
if (
!
globalValues
.isInitialized()
) {
// The data structures haven't been created yet, so just store the list of values.
initialGlobalVariables = values;
...
...
@@ -8483,11 +8393,6 @@ void OpenCLIntegrateCustomStepKernel::setPerDofVariable(ContextImpl& context, in
deviceValuesAreCurrent = false;
}
OpenCLApplyAndersenThermostatKernel::~OpenCLApplyAndersenThermostatKernel() {
if (atomGroups != NULL)
delete atomGroups;
}
void OpenCLApplyAndersenThermostatKernel::initialize(const System& system, const AndersenThermostat& thermostat) {
randomSeed = thermostat.getRandomNumberSeed();
map<string, string> defines;
...
...
@@ -8499,13 +8404,13 @@ void OpenCLApplyAndersenThermostatKernel::initialize(const System& system, const
// Create the arrays with the group definitions.
vector<vector<int> > groups = AndersenThermostatImpl::calcParticleGroups(system);
atomGroups
= OpenCLArray::creat
e<int>(cl, cl.getNumAtoms(), "atomGroups");
vector<int> atoms(atomGroups
->
getSize());
atomGroups
.initializ
e<int>(cl, cl.getNumAtoms(), "atomGroups");
vector<int> atoms(atomGroups
.
getSize());
for (int i = 0; i < (int) groups.size(); i++) {
for (int j = 0; j < (int) groups[i].size(); j++)
atoms[groups[i][j]] = i;
}
atomGroups
->
upload(atoms);
atomGroups
.
upload(atoms);
}
void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) {
...
...
@@ -8514,7 +8419,7 @@ void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) {
kernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
kernel.setArg<cl::Buffer>(3, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
kernel.setArg<cl::Buffer>(4, cl.getIntegrationUtilities().getRandom().getDeviceBuffer());
kernel.setArg<cl::Buffer>(6, atomGroups
->
getDeviceBuffer());
kernel.setArg<cl::Buffer>(6, atomGroups
.
getDeviceBuffer());
}
kernel.setArg<cl_float>(0, (cl_float) context.getParameter(AndersenThermostat::CollisionFrequency()));
kernel.setArg<cl_float>(1, (cl_float) (BOLTZ*context.getParameter(AndersenThermostat::Temperature())));
...
...
@@ -8522,20 +8427,9 @@ void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) {
cl.executeKernel(kernel, cl.getNumAtoms());
}
OpenCLApplyMonteCarloBarostatKernel::~OpenCLApplyMonteCarloBarostatKernel() {
if (savedPositions != NULL)
delete savedPositions;
if (savedForces != NULL)
delete savedForces;
if (moleculeAtoms != NULL)
delete moleculeAtoms;
if (moleculeStartIndex != NULL)
delete moleculeStartIndex;
}
void OpenCLApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
savedPositions
= new OpenCLArray
(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedPositions");
savedForces
= new OpenCLArray
(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedForces");
savedPositions
.initialize
(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedPositions");
savedForces
.initialize
(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedForces");
cl::Program program = cl.createProgram(OpenCLKernelSources::monteCarloBarostat);
kernel = cl::Kernel(program, "scalePositions");
}
...
...
@@ -8548,10 +8442,10 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
vector<vector<int> > molecules = context.getMolecules();
numMolecules = molecules.size();
moleculeAtoms
= OpenCLArray::creat
e<int>(cl, cl.getNumAtoms(), "moleculeAtoms");
moleculeStartIndex
= OpenCLArray::creat
e<int>(cl, numMolecules+1, "moleculeStartIndex");
vector<int> atoms(moleculeAtoms
->
getSize());
vector<int> startIndex(moleculeStartIndex
->
getSize());
moleculeAtoms
.initializ
e<int>(cl, cl.getNumAtoms(), "moleculeAtoms");
moleculeStartIndex
.initializ
e<int>(cl, numMolecules+1, "moleculeStartIndex");
vector<int> atoms(moleculeAtoms
.
getSize());
vector<int> startIndex(moleculeStartIndex
.
getSize());
int index = 0;
for (int i = 0; i < numMolecules; i++) {
startIndex[i] = index;
...
...
@@ -8559,19 +8453,19 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
atoms[index++] = molecule;
}
startIndex[numMolecules] = index;
moleculeAtoms
->
upload(atoms);
moleculeStartIndex
->
upload(startIndex);
moleculeAtoms
.
upload(atoms);
moleculeStartIndex
.
upload(startIndex);
// Initialize the kernel arguments.
kernel.setArg<cl_int>(3, numMolecules);
kernel.setArg<cl::Buffer>(9, cl.getPosq().getDeviceBuffer());
kernel.setArg<cl::Buffer>(10, moleculeAtoms
->
getDeviceBuffer());
kernel.setArg<cl::Buffer>(11, moleculeStartIndex
->
getDeviceBuffer());
kernel.setArg<cl::Buffer>(10, moleculeAtoms
.
getDeviceBuffer());
kernel.setArg<cl::Buffer>(11, moleculeStartIndex
.
getDeviceBuffer());
}
int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueCopyBuffer(cl.getPosq().getDeviceBuffer(), savedPositions
->
getDeviceBuffer(), 0, 0, bytesToCopy);
cl.getQueue().enqueueCopyBuffer(cl.getForce().getDeviceBuffer(), savedForces
->
getDeviceBuffer(), 0, 0, bytesToCopy);
cl.getQueue().enqueueCopyBuffer(cl.getPosq().getDeviceBuffer(), savedPositions
.
getDeviceBuffer(), 0, 0, bytesToCopy);
cl.getQueue().enqueueCopyBuffer(cl.getForce().getDeviceBuffer(), savedForces
.
getDeviceBuffer(), 0, 0, bytesToCopy);
kernel.setArg<cl_float>(0, (cl_float) scaleX);
kernel.setArg<cl_float>(1, (cl_float) scaleY);
kernel.setArg<cl_float>(2, (cl_float) scaleZ);
...
...
@@ -8584,19 +8478,14 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
void OpenCLApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueCopyBuffer(savedPositions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(), 0, 0, bytesToCopy);
cl.getQueue().enqueueCopyBuffer(savedForces->getDeviceBuffer(), cl.getForce().getDeviceBuffer(), 0, 0, bytesToCopy);
}
OpenCLRemoveCMMotionKernel::~OpenCLRemoveCMMotionKernel() {
if (cmMomentum != NULL)
delete cmMomentum;
cl.getQueue().enqueueCopyBuffer(savedPositions.getDeviceBuffer(), cl.getPosq().getDeviceBuffer(), 0, 0, bytesToCopy);
cl.getQueue().enqueueCopyBuffer(savedForces.getDeviceBuffer(), cl.getForce().getDeviceBuffer(), 0, 0, bytesToCopy);
}
void OpenCLRemoveCMMotionKernel::initialize(const System& system, const CMMotionRemover& force) {
frequency = force.getFrequency();
int numAtoms = cl.getNumAtoms();
cmMomentum
= OpenCLArray::creat
e<mm_float4>(cl, (numAtoms+OpenCLContext::ThreadBlockSize-1)/OpenCLContext::ThreadBlockSize, "cmMomentum");
cmMomentum
.initializ
e<mm_float4>(cl, (numAtoms+OpenCLContext::ThreadBlockSize-1)/OpenCLContext::ThreadBlockSize, "cmMomentum");
double totalMass = 0.0;
for (int i = 0; i < numAtoms; i++)
totalMass += system.getParticleMass(i);
...
...
@@ -8606,12 +8495,12 @@ void OpenCLRemoveCMMotionKernel::initialize(const System& system, const CMMotion
kernel1 = cl::Kernel(program, "calcCenterOfMassMomentum");
kernel1.setArg<cl_int>(0, numAtoms);
kernel1.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, cmMomentum
->
getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, cmMomentum
.
getDeviceBuffer());
kernel1.setArg(3, OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
kernel2 = cl::Kernel(program, "removeCenterOfMassMomentum");
kernel2.setArg<cl_int>(0, numAtoms);
kernel2.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(2, cmMomentum
->
getDeviceBuffer());
kernel2.setArg<cl::Buffer>(2, cmMomentum
.
getDeviceBuffer());
kernel2.setArg(3, OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
}
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
b33ee3b0
...
...
@@ -55,9 +55,7 @@ private:
};
OpenCLNonbondedUtilities
::
OpenCLNonbondedUtilities
(
OpenCLContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
numForceBuffers
(
0
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
pinnedCountBuffer
(
NULL
),
pinnedCountMemory
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
numForceBuffers
(
0
),
blockSorter
(
NULL
),
pinnedCountBuffer
(
NULL
),
pinnedCountMemory
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
// Decide how many thread blocks and force buffers to use.
deviceIsCpu
=
(
context
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
...
...
@@ -95,34 +93,6 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
}
OpenCLNonbondedUtilities
::~
OpenCLNonbondedUtilities
()
{
if
(
exclusionIndices
!=
NULL
)
delete
exclusionIndices
;
if
(
exclusionRowIndices
!=
NULL
)
delete
exclusionRowIndices
;
if
(
exclusionTiles
!=
NULL
)
delete
exclusionTiles
;
if
(
exclusions
!=
NULL
)
delete
exclusions
;
if
(
interactingTiles
!=
NULL
)
delete
interactingTiles
;
if
(
interactingAtoms
!=
NULL
)
delete
interactingAtoms
;
if
(
interactionCount
!=
NULL
)
delete
interactionCount
;
if
(
blockCenter
!=
NULL
)
delete
blockCenter
;
if
(
blockBoundingBox
!=
NULL
)
delete
blockBoundingBox
;
if
(
sortedBlocks
!=
NULL
)
delete
sortedBlocks
;
if
(
sortedBlockCenter
!=
NULL
)
delete
sortedBlockCenter
;
if
(
sortedBlockBoundingBox
!=
NULL
)
delete
sortedBlockBoundingBox
;
if
(
oldPositions
!=
NULL
)
delete
oldPositions
;
if
(
rebuildNeighborList
!=
NULL
)
delete
rebuildNeighborList
;
if
(
blockSorter
!=
NULL
)
delete
blockSorter
;
if
(
pinnedCountBuffer
!=
NULL
)
...
...
@@ -246,8 +216,8 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
for
(
set
<
pair
<
int
,
int
>
>::
const_iterator
iter
=
tilesWithExclusions
.
begin
();
iter
!=
tilesWithExclusions
.
end
();
++
iter
)
exclusionTilesVec
.
push_back
(
mm_ushort2
((
unsigned
short
)
iter
->
first
,
(
unsigned
short
)
iter
->
second
));
sort
(
exclusionTilesVec
.
begin
(),
exclusionTilesVec
.
end
(),
context
.
getSIMDWidth
()
<=
32
?
compareUshort2
:
compareUshort2LargeSIMD
);
exclusionTiles
=
OpenCLArray
::
creat
e
<
mm_ushort2
>
(
context
,
exclusionTilesVec
.
size
(),
"exclusionTiles"
);
exclusionTiles
->
upload
(
exclusionTilesVec
);
exclusionTiles
.
initializ
e
<
mm_ushort2
>
(
context
,
exclusionTilesVec
.
size
(),
"exclusionTiles"
);
exclusionTiles
.
upload
(
exclusionTilesVec
);
map
<
pair
<
int
,
int
>
,
int
>
exclusionTileMap
;
for
(
int
i
=
0
;
i
<
(
int
)
exclusionTilesVec
.
size
();
i
++
)
{
mm_ushort2
tile
=
exclusionTilesVec
[
i
];
...
...
@@ -268,17 +238,17 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
maxExclusions
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
exclusionBlocksForBlock
.
size
();
i
++
)
maxExclusions
=
(
maxExclusions
>
exclusionBlocksForBlock
[
i
].
size
()
?
maxExclusions
:
exclusionBlocksForBlock
[
i
].
size
());
exclusionIndices
=
OpenCLArray
::
creat
e
<
cl_uint
>
(
context
,
exclusionIndicesVec
.
size
(),
"exclusionIndices"
);
exclusionRowIndices
=
OpenCLArray
::
creat
e
<
cl_uint
>
(
context
,
exclusionRowIndicesVec
.
size
(),
"exclusionRowIndices"
);
exclusionIndices
->
upload
(
exclusionIndicesVec
);
exclusionRowIndices
->
upload
(
exclusionRowIndicesVec
);
exclusionIndices
.
initializ
e
<
cl_uint
>
(
context
,
exclusionIndicesVec
.
size
(),
"exclusionIndices"
);
exclusionRowIndices
.
initializ
e
<
cl_uint
>
(
context
,
exclusionRowIndicesVec
.
size
(),
"exclusionRowIndices"
);
exclusionIndices
.
upload
(
exclusionIndicesVec
);
exclusionRowIndices
.
upload
(
exclusionRowIndicesVec
);
// Record the exclusion data.
exclusions
=
OpenCLArray
::
creat
e
<
cl_uint
>
(
context
,
tilesWithExclusions
.
size
()
*
OpenCLContext
::
TileSize
,
"exclusions"
);
exclusions
.
initializ
e
<
cl_uint
>
(
context
,
tilesWithExclusions
.
size
()
*
OpenCLContext
::
TileSize
,
"exclusions"
);
cl_uint
allFlags
=
(
cl_uint
)
-
1
;
vector
<
cl_uint
>
exclusionVec
(
exclusions
->
getSize
(),
allFlags
);
for
(
int
i
=
0
;
i
<
exclusions
->
getSize
();
++
i
)
vector
<
cl_uint
>
exclusionVec
(
exclusions
.
getSize
(),
allFlags
);
for
(
int
i
=
0
;
i
<
exclusions
.
getSize
();
++
i
)
exclusionVec
[
i
]
=
0xFFFFFFFF
;
for
(
int
atom1
=
0
;
atom1
<
(
int
)
atomExclusions
.
size
();
++
atom1
)
{
int
x
=
atom1
/
OpenCLContext
::
TileSize
;
...
...
@@ -298,7 +268,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
}
}
atomExclusions
.
clear
();
// We won't use this again, so free the memory it used
exclusions
->
upload
(
exclusionVec
);
exclusions
.
upload
(
exclusionVec
);
// Create data structures for the neighbor list.
...
...
@@ -312,20 +282,20 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
if
(
maxTiles
<
1
)
maxTiles
=
1
;
int
numAtoms
=
context
.
getNumAtoms
();
interactingTiles
=
OpenCLArray
::
creat
e
<
cl_int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
=
OpenCLArray
::
creat
e
<
cl_int
>
(
context
,
OpenCLContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
interactionCount
=
OpenCLArray
::
creat
e
<
cl_uint
>
(
context
,
1
,
"interactionCount"
);
interactingTiles
.
initializ
e
<
cl_int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
.
initializ
e
<
cl_int
>
(
context
,
OpenCLContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
interactionCount
.
initializ
e
<
cl_uint
>
(
context
,
1
,
"interactionCount"
);
int
elementSize
=
(
context
.
getUseDoublePrecision
()
?
sizeof
(
cl_double
)
:
sizeof
(
cl_float
));
blockCenter
=
new
OpenCLArray
(
context
,
numAtomBlocks
,
4
*
elementSize
,
"blockCenter"
);
blockBoundingBox
=
new
OpenCLArray
(
context
,
numAtomBlocks
,
4
*
elementSize
,
"blockBoundingBox"
);
sortedBlocks
=
new
OpenCLArray
(
context
,
numAtomBlocks
,
2
*
elementSize
,
"sortedBlocks"
);
sortedBlockCenter
=
new
OpenCLArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockCenter"
);
sortedBlockBoundingBox
=
new
OpenCLArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockBoundingBox"
);
oldPositions
=
new
OpenCLArray
(
context
,
numAtoms
,
4
*
elementSize
,
"oldPositions"
);
rebuildNeighborList
=
OpenCLArray
::
creat
e
<
int
>
(
context
,
1
,
"rebuildNeighborList"
);
blockCenter
.
initialize
(
context
,
numAtomBlocks
,
4
*
elementSize
,
"blockCenter"
);
blockBoundingBox
.
initialize
(
context
,
numAtomBlocks
,
4
*
elementSize
,
"blockBoundingBox"
);
sortedBlocks
.
initialize
(
context
,
numAtomBlocks
,
2
*
elementSize
,
"sortedBlocks"
);
sortedBlockCenter
.
initialize
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockCenter"
);
sortedBlockBoundingBox
.
initialize
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockBoundingBox"
);
oldPositions
.
initialize
(
context
,
numAtoms
,
4
*
elementSize
,
"oldPositions"
);
rebuildNeighborList
.
initializ
e
<
int
>
(
context
,
1
,
"rebuildNeighborList"
);
blockSorter
=
new
OpenCLSort
(
context
,
new
BlockSortTrait
(
context
.
getUseDoublePrecision
()),
numAtomBlocks
);
vector
<
cl_uint
>
count
(
1
,
0
);
interactionCount
->
upload
(
count
);
interactionCount
.
upload
(
count
);
}
}
...
...
@@ -376,14 +346,14 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
forceRebuildNeighborList
=
true
;
setPeriodicBoxArgs
(
context
,
kernels
.
findBlockBoundsKernel
,
1
);
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
blockSorter
->
sort
(
sortedBlocks
);
kernels
.
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
forceRebuildNeighborList
);
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
context
.
getNumAtoms
());
setPeriodicBoxArgs
(
context
,
kernels
.
findInteractingBlocksKernel
,
0
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
forceRebuildNeighborList
=
false
;
lastCutoff
=
kernels
.
cutoffDistance
;
context
.
getQueue
().
enqueueReadBuffer
(
interactionCount
->
getDeviceBuffer
(),
CL_FALSE
,
0
,
sizeof
(
int
),
pinnedCountMemory
,
NULL
,
&
downloadCountEvent
);
context
.
getQueue
().
enqueueReadBuffer
(
interactionCount
.
getDeviceBuffer
(),
CL_FALSE
,
0
,
sizeof
(
int
),
pinnedCountMemory
,
NULL
,
&
downloadCountEvent
);
}
void
OpenCLNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
...
...
@@ -407,7 +377,7 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include
bool
OpenCLNonbondedUtilities
::
updateNeighborListSize
()
{
if
(
!
useCutoff
)
return
false
;
if
(
pinnedCountMemory
[
0
]
<=
(
unsigned
int
)
interactingTiles
->
getSize
())
if
(
pinnedCountMemory
[
0
]
<=
(
unsigned
int
)
interactingTiles
.
getSize
())
return
false
;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
...
...
@@ -417,31 +387,27 @@ bool OpenCLNonbondedUtilities::updateNeighborListSize() {
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
if
(
maxTiles
>
totalTiles
)
maxTiles
=
totalTiles
;
delete
interactingTiles
;
delete
interactingAtoms
;
interactingTiles
=
NULL
;
// Avoid an error in the destructor if the following allocation fails
interactingAtoms
=
NULL
;
interactingTiles
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
OpenCLContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
interactingTiles
.
resize
(
maxTiles
);
interactingAtoms
.
resize
(
OpenCLContext
::
TileSize
*
maxTiles
);
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
KernelSet
&
kernels
=
iter
->
second
;
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceKernel
)
!=
NULL
)
{
kernels
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
.
getDeviceBuffer
());
kernels
.
forceKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
kernels
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
kernels
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
.
getDeviceBuffer
());
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
energyKernel
)
!=
NULL
)
{
kernels
.
energyKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
energyKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
.
getDeviceBuffer
());
kernels
.
energyKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
kernels
.
energyKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
kernels
.
energyKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
.
getDeviceBuffer
());
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceEnergyKernel
)
!=
NULL
)
{
kernels
.
forceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
forceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
.
getDeviceBuffer
());
kernels
.
forceEnergyKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
kernels
.
forceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
kernels
.
forceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
.
getDeviceBuffer
());
}
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
.
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
.
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
}
forceRebuildNeighborList
=
true
;
...
...
@@ -506,7 +472,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
defines
[
"PADDING"
]
=
context
.
doubleToString
(
padding
);
defines
[
"PADDED_CUTOFF"
]
=
context
.
doubleToString
(
paddedCutoff
);
defines
[
"PADDED_CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
paddedCutoff
*
paddedCutoff
);
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
exclusionTiles
->
getSize
());
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
exclusionTiles
.
getSize
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"SIMD_WIDTH"
]
=
context
.
intToString
(
context
.
getSIMDWidth
());
if
(
usePeriodic
)
...
...
@@ -523,36 +489,36 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
kernels
.
findBlockBoundsKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlockBounds"
);
kernels
.
findBlockBoundsKernel
.
setArg
<
cl_int
>
(
0
,
context
.
getNumAtoms
());
kernels
.
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
context
.
getPosq
().
getDeviceBuffer
());
kernels
.
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
blockCenter
->
getDeviceBuffer
());
kernels
.
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
blockBoundingBox
->
getDeviceBuffer
());
kernels
.
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
9
,
rebuildNeighborList
->
getDeviceBuffer
());
kernels
.
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
10
,
sortedBlocks
->
getDeviceBuffer
());
kernels
.
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
blockCenter
.
getDeviceBuffer
());
kernels
.
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
blockBoundingBox
.
getDeviceBuffer
());
kernels
.
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
9
,
rebuildNeighborList
.
getDeviceBuffer
());
kernels
.
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
10
,
sortedBlocks
.
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"sortBoxData"
);
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
sortedBlocks
->
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
blockCenter
->
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
blockBoundingBox
->
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
sortedBlockCenter
->
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
sortedBlockBoundingBox
->
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
sortedBlocks
.
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
blockCenter
.
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
blockBoundingBox
.
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
sortedBlockCenter
.
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
sortedBlockBoundingBox
.
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
context
.
getPosq
().
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
oldPositions
->
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionCount
->
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
rebuildNeighborList
->
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
oldPositions
.
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionCount
.
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
rebuildNeighborList
.
getDeviceBuffer
());
kernels
.
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
true
);
kernels
.
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
interactionCount
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
interactionCount
.
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
.
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
.
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
context
.
getPosq
().
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
interactingTiles
->
getSize
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
interactingTiles
.
getSize
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
12
,
sortedBlocks
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
13
,
sortedBlockCenter
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
14
,
sortedBlockBoundingBox
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
15
,
exclusionIndices
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
16
,
exclusionRowIndices
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
oldPositions
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
18
,
rebuildNeighborList
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
12
,
sortedBlocks
.
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
13
,
sortedBlockCenter
.
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
14
,
sortedBlockBoundingBox
.
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
15
,
exclusionIndices
.
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
16
,
exclusionRowIndices
.
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
oldPositions
.
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
18
,
rebuildNeighborList
.
getDeviceBuffer
());
if
(
kernels
.
findInteractingBlocksKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
())
<
groupSize
)
{
// The device can't handle this block size, so reduce it.
...
...
@@ -700,7 +666,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"TILE_SIZE"
]
=
context
.
intToString
(
OpenCLContext
::
TileSize
);
int
numExclusionTiles
=
exclusionTiles
->
getSize
();
int
numExclusionTiles
=
exclusionTiles
.
getSize
();
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
numExclusionTiles
);
int
numContexts
=
context
.
getPlatformData
().
contexts
.
size
();
int
startExclusionIndex
=
context
.
getContextIndex
()
*
numExclusionTiles
/
numContexts
;
...
...
@@ -726,18 +692,18 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getForceBuffers
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getEnergyBuffer
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getPosq
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusions
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusionTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusions
.
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusionTiles
.
getDeviceBuffer
());
kernel
.
setArg
<
cl_uint
>
(
index
++
,
startTileIndex
);
kernel
.
setArg
<
cl_uint
>
(
index
++
,
numTiles
);
if
(
useCutoff
)
{
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionCount
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
.
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionCount
.
getDeviceBuffer
());
index
+=
5
;
// The periodic box size arguments are set when the kernel is executed.
kernel
.
setArg
<
cl_uint
>
(
index
++
,
interactingTiles
->
getSize
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
blockCenter
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
blockBoundingBox
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingAtoms
->
getDeviceBuffer
());
kernel
.
setArg
<
cl_uint
>
(
index
++
,
interactingTiles
.
getSize
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
blockCenter
.
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
blockBoundingBox
.
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingAtoms
.
getDeviceBuffer
());
}
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
params
[
i
].
getMemory
());
...
...
platforms/opencl/src/OpenCLParallelKernels.cpp
View file @
b33ee3b0
...
...
@@ -118,14 +118,12 @@ private:
OpenCLParallelCalcForcesAndEnergyKernel
::
OpenCLParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
OpenCLPlatform
::
PlatformData
&
data
)
:
CalcForcesAndEnergyKernel
(
name
,
platform
),
data
(
data
),
completionTimes
(
data
.
contexts
.
size
()),
contextNonbondedFractions
(
data
.
contexts
.
size
()),
tileCounts
(
data
.
contexts
.
size
()),
contextForces
(
NULL
),
pinnedPositionBuffer
(
NULL
),
pinnedPositionMemory
(
NULL
),
pinnedForceBuffer
(
NULL
),
pinnedForceMemory
(
NULL
)
{
tileCounts
(
data
.
contexts
.
size
()),
pinnedPositionBuffer
(
NULL
),
pinnedPositionMemory
(
NULL
),
pinnedForceBuffer
(
NULL
),
pinnedForceMemory
(
NULL
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
OpenCLCalcForcesAndEnergyKernel
(
name
,
platform
,
*
data
.
contexts
[
i
])));
}
OpenCLParallelCalcForcesAndEnergyKernel
::~
OpenCLParallelCalcForcesAndEnergyKernel
()
{
if
(
contextForces
!=
NULL
)
delete
contextForces
;
if
(
pinnedPositionBuffer
!=
NULL
)
delete
pinnedPositionBuffer
;
if
(
pinnedForceBuffer
!=
NULL
)
...
...
@@ -142,8 +140,8 @@ void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
void
OpenCLParallelCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
OpenCLContext
&
cl0
=
*
data
.
contexts
[
0
];
int
elementSize
=
(
cl0
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
if
(
contextForces
==
NULL
)
{
contextForces
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
cl0
,
&
cl0
.
getForceBuffers
().
getDeviceBuffer
(),
if
(
!
contextForces
.
isInitialized
()
)
{
contextForces
.
initializ
e
<
mm_float4
>
(
cl0
,
&
cl0
.
getForceBuffers
().
getDeviceBuffer
(),
data
.
contexts
.
size
()
*
cl0
.
getPaddedNumAtoms
(),
"contextForces"
);
int
bufferBytes
=
(
data
.
contexts
.
size
()
-
1
)
*
cl0
.
getPaddedNumAtoms
()
*
elementSize
;
pinnedPositionBuffer
=
new
cl
::
Buffer
(
cl0
.
getContext
(),
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
...
...
@@ -179,9 +177,9 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
OpenCLContext
&
cl
=
*
data
.
contexts
[
0
];
int
numAtoms
=
cl
.
getPaddedNumAtoms
();
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
cl
.
getQueue
().
enqueueWriteBuffer
(
contextForces
->
getDeviceBuffer
(),
CL_FALSE
,
numAtoms
*
elementSize
,
cl
.
getQueue
().
enqueueWriteBuffer
(
contextForces
.
getDeviceBuffer
(),
CL_FALSE
,
numAtoms
*
elementSize
,
numAtoms
*
(
data
.
contexts
.
size
()
-
1
)
*
elementSize
,
pinnedForceMemory
);
cl
.
reduceBuffer
(
*
contextForces
,
data
.
contexts
.
size
());
cl
.
reduceBuffer
(
contextForces
,
data
.
contexts
.
size
());
// Balance work between the contexts by transferring a little nonbonded work from the context that
// finished last to the one that finished first.
...
...
platforms/opencl/src/OpenCLSort.cpp
View file @
b33ee3b0
...
...
@@ -31,8 +31,7 @@
using
namespace
OpenMM
;
using
namespace
std
;
OpenCLSort
::
OpenCLSort
(
OpenCLContext
&
context
,
SortTrait
*
trait
,
unsigned
int
length
)
:
context
(
context
),
trait
(
trait
),
dataRange
(
NULL
),
bucketOfElement
(
NULL
),
offsetInBucket
(
NULL
),
bucketOffset
(
NULL
),
buckets
(
NULL
),
dataLength
(
length
)
{
OpenCLSort
::
OpenCLSort
(
OpenCLContext
&
context
,
SortTrait
*
trait
,
unsigned
int
length
)
:
context
(
context
),
trait
(
trait
),
dataLength
(
length
)
{
// Create kernels.
std
::
map
<
std
::
string
,
std
::
string
>
replacements
;
...
...
@@ -81,26 +80,16 @@ OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int le
// Create workspace arrays.
if
(
!
isShortList
)
{
dataRange
=
new
OpenCLArray
(
context
,
2
,
trait
->
getKeySize
(),
"sortDataRange"
);
bucketOffset
=
OpenCLArray
::
creat
e
<
cl_uint
>
(
context
,
numBuckets
,
"bucketOffset"
);
bucketOfElement
=
OpenCLArray
::
creat
e
<
cl_uint
>
(
context
,
length
,
"bucketOfElement"
);
offsetInBucket
=
OpenCLArray
::
creat
e
<
cl_uint
>
(
context
,
length
,
"offsetInBucket"
);
buckets
=
new
OpenCLArray
(
context
,
length
,
trait
->
getDataSize
(),
"buckets"
);
dataRange
.
initialize
(
context
,
2
,
trait
->
getKeySize
(),
"sortDataRange"
);
bucketOffset
.
initializ
e
<
cl_uint
>
(
context
,
numBuckets
,
"bucketOffset"
);
bucketOfElement
.
initializ
e
<
cl_uint
>
(
context
,
length
,
"bucketOfElement"
);
offsetInBucket
.
initializ
e
<
cl_uint
>
(
context
,
length
,
"offsetInBucket"
);
buckets
.
initialize
(
context
,
length
,
trait
->
getDataSize
(),
"buckets"
);
}
}
OpenCLSort
::~
OpenCLSort
()
{
delete
trait
;
if
(
dataRange
!=
NULL
)
delete
dataRange
;
if
(
bucketOfElement
!=
NULL
)
delete
bucketOfElement
;
if
(
offsetInBucket
!=
NULL
)
delete
offsetInBucket
;
if
(
bucketOffset
!=
NULL
)
delete
bucketOffset
;
if
(
buckets
!=
NULL
)
delete
buckets
;
}
void
OpenCLSort
::
sort
(
OpenCLArray
&
data
)
{
...
...
@@ -119,14 +108,14 @@ void OpenCLSort::sort(OpenCLArray& data) {
else
{
// Compute the range of data values.
unsigned
int
numBuckets
=
bucketOffset
->
getSize
();
unsigned
int
numBuckets
=
bucketOffset
.
getSize
();
computeRangeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
computeRangeKernel
.
setArg
<
cl_uint
>
(
1
,
data
.
getSize
());
computeRangeKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
dataRange
->
getDeviceBuffer
());
computeRangeKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
dataRange
.
getDeviceBuffer
());
computeRangeKernel
.
setArg
(
3
,
rangeKernelSize
*
trait
->
getKeySize
(),
NULL
);
computeRangeKernel
.
setArg
(
4
,
rangeKernelSize
*
trait
->
getKeySize
(),
NULL
);
computeRangeKernel
.
setArg
<
cl_int
>
(
5
,
numBuckets
);
computeRangeKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
bucketOffset
->
getDeviceBuffer
());
computeRangeKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
bucketOffset
.
getDeviceBuffer
());
context
.
executeKernel
(
computeRangeKernel
,
rangeKernelSize
,
rangeKernelSize
);
// Assign array elements to buckets.
...
...
@@ -134,35 +123,35 @@ void OpenCLSort::sort(OpenCLArray& data) {
assignElementsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
assignElementsKernel
.
setArg
<
cl_int
>
(
1
,
data
.
getSize
());
assignElementsKernel
.
setArg
<
cl_int
>
(
2
,
numBuckets
);
assignElementsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
dataRange
->
getDeviceBuffer
());
assignElementsKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
bucketOffset
->
getDeviceBuffer
());
assignElementsKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
bucketOfElement
->
getDeviceBuffer
());
assignElementsKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
offsetInBucket
->
getDeviceBuffer
());
assignElementsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
dataRange
.
getDeviceBuffer
());
assignElementsKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
bucketOffset
.
getDeviceBuffer
());
assignElementsKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
bucketOfElement
.
getDeviceBuffer
());
assignElementsKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
offsetInBucket
.
getDeviceBuffer
());
context
.
executeKernel
(
assignElementsKernel
,
data
.
getSize
());
// Compute the position of each bucket.
computeBucketPositionsKernel
.
setArg
<
cl_int
>
(
0
,
numBuckets
);
computeBucketPositionsKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
bucketOffset
->
getDeviceBuffer
());
computeBucketPositionsKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
bucketOffset
.
getDeviceBuffer
());
computeBucketPositionsKernel
.
setArg
(
2
,
positionsKernelSize
*
sizeof
(
cl_int
),
NULL
);
context
.
executeKernel
(
computeBucketPositionsKernel
,
positionsKernelSize
,
positionsKernelSize
);
// Copy the data into the buckets.
copyToBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
copyToBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
buckets
->
getDeviceBuffer
());
copyToBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
buckets
.
getDeviceBuffer
());
copyToBucketsKernel
.
setArg
<
cl_int
>
(
2
,
data
.
getSize
());
copyToBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
bucketOffset
->
getDeviceBuffer
());
copyToBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
bucketOfElement
->
getDeviceBuffer
());
copyToBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
offsetInBucket
->
getDeviceBuffer
());
copyToBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
bucketOffset
.
getDeviceBuffer
());
copyToBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
bucketOfElement
.
getDeviceBuffer
());
copyToBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
offsetInBucket
.
getDeviceBuffer
());
context
.
executeKernel
(
copyToBucketsKernel
,
data
.
getSize
());
// Sort each bucket.
sortBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
sortBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
buckets
->
getDeviceBuffer
());
sortBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
buckets
.
getDeviceBuffer
());
sortBucketsKernel
.
setArg
<
cl_int
>
(
2
,
numBuckets
);
sortBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
bucketOffset
->
getDeviceBuffer
());
sortBucketsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
bucketOffset
.
getDeviceBuffer
());
sortBucketsKernel
.
setArg
(
4
,
sortKernelSize
*
trait
->
getDataSize
(),
NULL
);
context
.
executeKernel
(
sortBucketsKernel
,
((
data
.
getSize
()
+
sortKernelSize
-
1
)
/
sortKernelSize
)
*
sortKernelSize
,
sortKernelSize
);
}
...
...
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.cpp
View file @
b33ee3b0
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2013-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -107,13 +107,6 @@ private:
const
DrudeForce
&
force
;
};
OpenCLCalcDrudeForceKernel
::~
OpenCLCalcDrudeForceKernel
()
{
if
(
particleParams
!=
NULL
)
delete
particleParams
;
if
(
pairParams
!=
NULL
)
delete
pairParams
;
}
void
OpenCLCalcDrudeForceKernel
::
initialize
(
const
System
&
system
,
const
DrudeForce
&
force
)
{
int
numContexts
=
cl
.
getPlatformData
().
contexts
.
size
();
int
startParticleIndex
=
cl
.
getContextIndex
()
*
force
.
getNumParticles
()
/
numContexts
;
...
...
@@ -123,7 +116,7 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
// Create the harmonic interaction .
vector
<
vector
<
int
>
>
atoms
(
numParticles
,
vector
<
int
>
(
5
));
particleParams
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
cl
,
numParticles
,
"drudeParticleParams"
);
particleParams
.
initializ
e
<
mm_float4
>
(
cl
,
numParticles
,
"drudeParticleParams"
);
vector
<
mm_float4
>
paramVector
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
double
charge
,
polarizability
,
aniso12
,
aniso34
;
...
...
@@ -145,9 +138,9 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
}
paramVector
[
i
]
=
mm_float4
((
float
)
k1
,
(
float
)
k2
,
(
float
)
k3
,
0.0
f
);
}
particleParams
->
upload
(
paramVector
);
particleParams
.
upload
(
paramVector
);
map
<
string
,
string
>
replacements
;
replacements
[
"PARAMS"
]
=
cl
.
getBondedUtilities
().
addArgument
(
particleParams
->
getDeviceBuffer
(),
"float4"
);
replacements
[
"PARAMS"
]
=
cl
.
getBondedUtilities
().
addArgument
(
particleParams
.
getDeviceBuffer
(),
"float4"
);
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
cl
.
replaceStrings
(
OpenCLDrudeKernelSources
::
drudeParticleForce
,
replacements
),
force
.
getForceGroup
());
}
int
startPairIndex
=
cl
.
getContextIndex
()
*
force
.
getNumScreenedPairs
()
/
numContexts
;
...
...
@@ -157,7 +150,7 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
// Create the screened interaction between dipole pairs.
vector
<
vector
<
int
>
>
atoms
(
numPairs
,
vector
<
int
>
(
4
));
pairParams
=
OpenCLArray
::
creat
e
<
mm_float2
>
(
cl
,
numPairs
,
"drudePairParams"
);
pairParams
.
initializ
e
<
mm_float2
>
(
cl
,
numPairs
,
"drudePairParams"
);
vector
<
mm_float2
>
paramVector
(
numPairs
);
for
(
int
i
=
0
;
i
<
numPairs
;
i
++
)
{
int
drude1
,
drude2
;
...
...
@@ -171,9 +164,9 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
double
energyScale
=
ONE_4PI_EPS0
*
charge1
*
charge2
;
paramVector
[
i
]
=
mm_float2
((
float
)
screeningScale
,
(
float
)
energyScale
);
}
pairParams
->
upload
(
paramVector
);
pairParams
.
upload
(
paramVector
);
map
<
string
,
string
>
replacements
;
replacements
[
"PARAMS"
]
=
cl
.
getBondedUtilities
().
addArgument
(
pairParams
->
getDeviceBuffer
(),
"float2"
);
replacements
[
"PARAMS"
]
=
cl
.
getBondedUtilities
().
addArgument
(
pairParams
.
getDeviceBuffer
(),
"float2"
);
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
cl
.
replaceStrings
(
OpenCLDrudeKernelSources
::
drudePairForce
,
replacements
),
force
.
getForceGroup
());
}
cl
.
addForce
(
new
OpenCLDrudeForceInfo
(
force
));
...
...
@@ -192,7 +185,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
int
endParticleIndex
=
(
cl
.
getContextIndex
()
+
1
)
*
force
.
getNumParticles
()
/
numContexts
;
int
numParticles
=
endParticleIndex
-
startParticleIndex
;
if
(
numParticles
>
0
)
{
if
(
particleParams
==
NULL
||
numParticles
!=
particleParams
->
getSize
())
if
(
!
particleParams
.
isInitialized
()
||
numParticles
!=
particleParams
.
getSize
())
throw
OpenMMException
(
"updateParametersInContext: The number of Drude particles has changed"
);
vector
<
mm_float4
>
paramVector
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
...
...
@@ -211,7 +204,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
k2
=
0
;
paramVector
[
i
]
=
mm_float4
((
float
)
k1
,
(
float
)
k2
,
(
float
)
k3
,
0.0
f
);
}
particleParams
->
upload
(
paramVector
);
particleParams
.
upload
(
paramVector
);
}
// Set the pair parameters.
...
...
@@ -220,7 +213,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
int
endPairIndex
=
(
cl
.
getContextIndex
()
+
1
)
*
force
.
getNumScreenedPairs
()
/
numContexts
;
int
numPairs
=
endPairIndex
-
startPairIndex
;
if
(
numPairs
>
0
)
{
if
(
pairParams
==
NULL
||
numPairs
!=
pairParams
->
getSize
())
if
(
!
pairParams
.
isInitialized
()
||
numPairs
!=
pairParams
.
getSize
())
throw
OpenMMException
(
"updateParametersInContext: The number of screened pairs has changed"
);
vector
<
mm_float2
>
paramVector
(
numPairs
);
for
(
int
i
=
0
;
i
<
numPairs
;
i
++
)
{
...
...
@@ -235,17 +228,10 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
double
energyScale
=
ONE_4PI_EPS0
*
charge1
*
charge2
;
paramVector
[
i
]
=
mm_float2
((
float
)
screeningScale
,
(
float
)
energyScale
);
}
pairParams
->
upload
(
paramVector
);
pairParams
.
upload
(
paramVector
);
}
}
OpenCLIntegrateDrudeLangevinStepKernel
::~
OpenCLIntegrateDrudeLangevinStepKernel
()
{
if
(
normalParticles
!=
NULL
)
delete
normalParticles
;
if
(
pairParticles
!=
NULL
)
delete
pairParticles
;
}
void
OpenCLIntegrateDrudeLangevinStepKernel
::
initialize
(
const
System
&
system
,
const
DrudeLangevinIntegrator
&
integrator
,
const
DrudeForce
&
force
)
{
cl
.
getPlatformData
().
initializeContexts
(
system
);
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
((
unsigned
int
)
integrator
.
getRandomNumberSeed
());
...
...
@@ -266,12 +252,12 @@ void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, co
pairParticleVec
.
push_back
(
mm_int2
(
p
,
p1
));
}
normalParticleVec
.
insert
(
normalParticleVec
.
begin
(),
particles
.
begin
(),
particles
.
end
());
normalParticles
=
OpenCLArray
::
creat
e
<
int
>
(
cl
,
max
((
int
)
normalParticleVec
.
size
(),
1
),
"drudeNormalParticles"
);
pairParticles
=
OpenCLArray
::
creat
e
<
cl_int2
>
(
cl
,
max
((
int
)
pairParticleVec
.
size
(),
1
),
"drudePairParticles"
);
normalParticles
.
initializ
e
<
int
>
(
cl
,
max
((
int
)
normalParticleVec
.
size
(),
1
),
"drudeNormalParticles"
);
pairParticles
.
initializ
e
<
cl_int2
>
(
cl
,
max
((
int
)
pairParticleVec
.
size
(),
1
),
"drudePairParticles"
);
if
(
normalParticleVec
.
size
()
>
0
)
normalParticles
->
upload
(
normalParticleVec
);
normalParticles
.
upload
(
normalParticleVec
);
if
(
pairParticleVec
.
size
()
>
0
)
pairParticles
->
upload
(
pairParticleVec
);
pairParticles
.
upload
(
pairParticleVec
);
// Create kernels.
...
...
@@ -296,8 +282,8 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
kernel1
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getVelm
().
getDeviceBuffer
());
kernel1
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getForce
().
getDeviceBuffer
());
kernel1
.
setArg
<
cl
::
Buffer
>
(
2
,
integration
.
getPosDelta
().
getDeviceBuffer
());
kernel1
.
setArg
<
cl
::
Buffer
>
(
3
,
normalParticles
->
getDeviceBuffer
());
kernel1
.
setArg
<
cl
::
Buffer
>
(
4
,
pairParticles
->
getDeviceBuffer
());
kernel1
.
setArg
<
cl
::
Buffer
>
(
3
,
normalParticles
.
getDeviceBuffer
());
kernel1
.
setArg
<
cl
::
Buffer
>
(
4
,
pairParticles
.
getDeviceBuffer
());
kernel1
.
setArg
<
cl
::
Buffer
>
(
5
,
integration
.
getStepSize
().
getDeviceBuffer
());
kernel1
.
setArg
<
cl
::
Buffer
>
(
12
,
integration
.
getRandom
().
getDeviceBuffer
());
kernel2
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
...
...
@@ -314,7 +300,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
else
hardwallKernel
.
setArg
<
void
*>
(
1
,
NULL
);
hardwallKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getVelm
().
getDeviceBuffer
());
hardwallKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pairParticles
->
getDeviceBuffer
());
hardwallKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pairParticles
.
getDeviceBuffer
());
hardwallKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
integration
.
getStepSize
().
getDeviceBuffer
());
}
...
...
@@ -363,7 +349,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
// Call the first integration kernel.
kernel1
.
setArg
<
cl_uint
>
(
13
,
integration
.
prepareRandomNumbers
(
normalParticles
->
getSize
()
+
2
*
pairParticles
->
getSize
()));
kernel1
.
setArg
<
cl_uint
>
(
13
,
integration
.
prepareRandomNumbers
(
normalParticles
.
getSize
()
+
2
*
pairParticles
.
getSize
()));
cl
.
executeKernel
(
kernel1
,
numAtoms
);
// Apply constraints.
...
...
@@ -377,7 +363,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
// Apply hard wall constraints.
if
(
maxDrudeDistance
>
0
)
cl
.
executeKernel
(
hardwallKernel
,
pairParticles
->
getSize
());
cl
.
executeKernel
(
hardwallKernel
,
pairParticles
.
getSize
());
integration
.
computeVirtualSites
();
// Update the time and step count.
...
...
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
View file @
b33ee3b0
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2013-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -45,9 +45,8 @@ namespace OpenMM {
class
OpenCLCalcDrudeForceKernel
:
public
CalcDrudeForceKernel
{
public:
OpenCLCalcDrudeForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcDrudeForceKernel
(
name
,
platform
),
cl
(
cl
)
,
particleParams
(
NULL
),
pairParams
(
NULL
)
{
CalcDrudeForceKernel
(
name
,
platform
),
cl
(
cl
)
{
}
~
OpenCLCalcDrudeForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -73,8 +72,8 @@ public:
void
copyParametersToContext
(
ContextImpl
&
context
,
const
DrudeForce
&
force
);
private:
OpenCLContext
&
cl
;
OpenCLArray
*
particleParams
;
OpenCLArray
*
pairParams
;
OpenCLArray
particleParams
;
OpenCLArray
pairParams
;
};
/**
...
...
@@ -83,9 +82,8 @@ private:
class
OpenCLIntegrateDrudeLangevinStepKernel
:
public
IntegrateDrudeLangevinStepKernel
{
public:
OpenCLIntegrateDrudeLangevinStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
IntegrateDrudeLangevinStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
)
,
normalParticles
(
NULL
),
pairParticles
(
NULL
)
{
IntegrateDrudeLangevinStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
)
{
}
~
OpenCLIntegrateDrudeLangevinStepKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -112,8 +110,8 @@ private:
OpenCLContext
&
cl
;
bool
hasInitializedKernels
;
double
prevStepSize
;
OpenCLArray
*
normalParticles
;
OpenCLArray
*
pairParticles
;
OpenCLArray
normalParticles
;
OpenCLArray
pairParticles
;
cl
::
Kernel
kernel1
,
kernel2
,
hardwallKernel
;
};
...
...
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
View file @
b33ee3b0
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -41,19 +41,6 @@
using
namespace
OpenMM
;
using
namespace
std
;
OpenCLIntegrateRPMDStepKernel
::~
OpenCLIntegrateRPMDStepKernel
()
{
if
(
forces
!=
NULL
)
delete
forces
;
if
(
positions
!=
NULL
)
delete
positions
;
if
(
velocities
!=
NULL
)
delete
velocities
;
if
(
contractedForces
!=
NULL
)
delete
contractedForces
;
if
(
contractedPositions
!=
NULL
)
delete
contractedPositions
;
}
void
OpenCLIntegrateRPMDStepKernel
::
initialize
(
const
System
&
system
,
const
RPMDIntegrator
&
integrator
)
{
cl
.
getPlatformData
().
initializeContexts
(
system
);
numCopies
=
integrator
.
getNumCopies
();
...
...
@@ -63,32 +50,32 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
throw
OpenMMException
(
"RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5."
);
int
paddedParticles
=
cl
.
getPaddedNumAtoms
();
int
forceElementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
forces
=
new
OpenCLArray
(
cl
,
numCopies
*
paddedParticles
,
forceElementSize
,
"rpmdForces"
);
forces
.
initialize
(
cl
,
numCopies
*
paddedParticles
,
forceElementSize
,
"rpmdForces"
);
bool
useDoublePrecision
=
(
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
());
int
elementSize
=
(
useDoublePrecision
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
positions
=
new
OpenCLArray
(
cl
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdPositions"
);
velocities
=
new
OpenCLArray
(
cl
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdVelocities"
);
positions
.
initialize
(
cl
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdPositions"
);
velocities
.
initialize
(
cl
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdVelocities"
);
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
((
unsigned
int
)
integrator
.
getRandomNumberSeed
());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans.
if
(
useDoublePrecision
)
{
vector
<
mm_double4
>
temp
(
positions
->
getSize
());
for
(
int
i
=
0
;
i
<
positions
->
getSize
();
i
++
)
vector
<
mm_double4
>
temp
(
positions
.
getSize
());
for
(
int
i
=
0
;
i
<
positions
.
getSize
();
i
++
)
temp
[
i
]
=
mm_double4
(
0
,
0
,
0
,
0
);
positions
->
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
->
getSize
();
i
++
)
positions
.
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
.
getSize
();
i
++
)
temp
[
i
]
=
mm_double4
(
0
,
0
,
0
,
1
);
velocities
->
upload
(
temp
);
velocities
.
upload
(
temp
);
}
else
{
vector
<
mm_float4
>
temp
(
positions
->
getSize
());
for
(
int
i
=
0
;
i
<
positions
->
getSize
();
i
++
)
vector
<
mm_float4
>
temp
(
positions
.
getSize
());
for
(
int
i
=
0
;
i
<
positions
.
getSize
();
i
++
)
temp
[
i
]
=
mm_float4
(
0
,
0
,
0
,
0
);
positions
->
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
->
getSize
();
i
++
)
positions
.
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
.
getSize
();
i
++
)
temp
[
i
]
=
mm_float4
(
0
,
0
,
0
,
1
);
velocities
->
upload
(
temp
);
velocities
.
upload
(
temp
);
}
// Build a list of contractions.
...
...
@@ -117,8 +104,8 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
}
}
if
(
maxContractedCopies
>
0
)
{
contractedForces
=
new
OpenCLArray
(
cl
,
maxContractedCopies
*
paddedParticles
,
forceElementSize
,
"rpmdContractedForces"
);
contractedPositions
=
new
OpenCLArray
(
cl
,
maxContractedCopies
*
paddedParticles
,
elementSize
,
"rpmdContractedPositions"
);
contractedForces
.
initialize
(
cl
,
maxContractedCopies
*
paddedParticles
,
forceElementSize
,
"rpmdContractedForces"
);
contractedPositions
.
initialize
(
cl
,
maxContractedCopies
*
paddedParticles
,
elementSize
,
"rpmdContractedPositions"
);
}
// Create kernels.
...
...
@@ -164,30 +151,30 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
void
OpenCLIntegrateRPMDStepKernel
::
initializeKernels
(
ContextImpl
&
context
)
{
hasInitializedKernel
=
true
;
pileKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
->
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
positions
->
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
velocities
->
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
forces
->
getDeviceBuffer
());
velocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
->
getDeviceBuffer
());
velocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forces
->
getDeviceBuffer
());
translateKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
positions
->
getDeviceBuffer
());
pileKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
.
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
positions
.
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
velocities
.
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
forces
.
getDeviceBuffer
());
velocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
.
getDeviceBuffer
());
velocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forces
.
getDeviceBuffer
());
translateKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
positions
.
getDeviceBuffer
());
translateKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getPosq
().
getDeviceBuffer
());
translateKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getAtomIndexArray
().
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
->
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
.
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getVelm
().
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
cl
.
getPosq
().
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getAtomIndexArray
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getForce
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getVelm
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
velocities
->
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
velocities
.
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getPosq
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
cl
.
getAtomIndexArray
().
getDeviceBuffer
());
for
(
auto
&
g
:
groupsByCopies
)
{
int
copies
=
g
.
first
;
positionContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
0
,
positions
->
getDeviceBuffer
());
positionContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
1
,
contractedPositions
->
getDeviceBuffer
());
forceContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
0
,
forces
->
getDeviceBuffer
());
forceContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
1
,
contractedForces
->
getDeviceBuffer
());
positionContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
0
,
positions
.
getDeviceBuffer
());
positionContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
1
,
contractedPositions
.
getDeviceBuffer
());
forceContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
0
,
forces
.
getDeviceBuffer
());
forceContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
1
,
contractedForces
.
getDeviceBuffer
());
}
}
...
...
@@ -261,9 +248,9 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
void
OpenCLIntegrateRPMDStepKernel
::
computeForces
(
ContextImpl
&
context
)
{
// Compute forces from all groups that didn't have a specified contraction.
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
->
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forces
->
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
positions
->
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
.
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forces
.
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
positions
.
getDeviceBuffer
());
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
{
copyToContextKernel
.
setArg
<
cl_int
>
(
5
,
i
);
cl
.
executeKernel
(
copyToContextKernel
,
cl
.
getNumAtoms
());
...
...
@@ -283,9 +270,9 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Now loop over contractions and compute forces from them.
if
(
groupsByCopies
.
size
()
>
0
)
{
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
contractedPositions
->
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
contractedForces
->
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
contractedPositions
->
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
contractedPositions
.
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
contractedForces
.
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
contractedPositions
.
getDeviceBuffer
());
for
(
auto
&
g
:
groupsByCopies
)
{
int
copies
=
g
.
first
;
int
groupFlags
=
g
.
second
;
...
...
@@ -313,7 +300,7 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
if
(
groupsByCopies
.
size
()
>
0
)
{
// Ensure the Context contains the positions from the last copy, since we'll assume that later.
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
->
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
.
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl_int
>
(
5
,
numCopies
-
1
);
cl
.
executeKernel
(
copyToContextKernel
,
cl
.
getNumAtoms
());
}
...
...
@@ -324,7 +311,7 @@ double OpenCLIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context,
}
void
OpenCLIntegrateRPMDStepKernel
::
setPositions
(
int
copy
,
const
vector
<
Vec3
>&
pos
)
{
if
(
positions
==
NULL
)
if
(
!
positions
.
isInitialized
()
)
throw
OpenMMException
(
"RPMDIntegrator: Cannot set positions before the integrator is added to a Context"
);
if
(
pos
.
size
()
!=
numParticles
)
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setPositions()"
);
...
...
@@ -346,7 +333,7 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
cl
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
mm_double4
(
offsetPos
[
i
][
0
],
offsetPos
[
i
][
1
],
offsetPos
[
i
][
2
],
posq
[
i
].
w
);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
->
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
posq
[
0
]);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
.
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
posq
[
0
]);
}
else
if
(
cl
.
getUseMixedPrecision
())
{
vector
<
mm_float4
>
posqf
(
cl
.
getPaddedNumAtoms
());
...
...
@@ -354,19 +341,19 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
vector
<
mm_double4
>
posq
(
cl
.
getPaddedNumAtoms
());
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
mm_double4
(
offsetPos
[
i
][
0
],
offsetPos
[
i
][
1
],
offsetPos
[
i
][
2
],
posqf
[
i
].
w
);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
->
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
posq
[
0
]);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
.
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
posq
[
0
]);
}
else
{
vector
<
mm_float4
>
posq
(
cl
.
getPaddedNumAtoms
());
cl
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
mm_float4
((
cl_float
)
offsetPos
[
i
][
0
],
(
cl_float
)
offsetPos
[
i
][
1
],
(
cl_float
)
offsetPos
[
i
][
2
],
posq
[
i
].
w
);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
->
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
),
numParticles
*
sizeof
(
mm_float4
),
&
posq
[
0
]);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
.
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
),
numParticles
*
sizeof
(
mm_float4
),
&
posq
[
0
]);
}
}
void
OpenCLIntegrateRPMDStepKernel
::
setVelocities
(
int
copy
,
const
vector
<
Vec3
>&
vel
)
{
if
(
velocities
==
NULL
)
if
(
!
velocities
.
isInitialized
()
)
throw
OpenMMException
(
"RPMDIntegrator: Cannot set velocities before the integrator is added to a Context"
);
if
(
vel
.
size
()
!=
numParticles
)
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setVelocities()"
);
...
...
@@ -375,21 +362,21 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>&
cl
.
getVelm
().
download
(
velm
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
velm
[
i
]
=
mm_double4
(
vel
[
i
][
0
],
vel
[
i
][
1
],
vel
[
i
][
2
],
velm
[
i
].
w
);
cl
.
getQueue
().
enqueueWriteBuffer
(
velocities
->
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
velm
[
0
]);
cl
.
getQueue
().
enqueueWriteBuffer
(
velocities
.
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
velm
[
0
]);
}
else
{
vector
<
mm_float4
>
velm
(
cl
.
getPaddedNumAtoms
());
cl
.
getVelm
().
download
(
velm
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
velm
[
i
]
=
mm_float4
((
cl_float
)
vel
[
i
][
0
],
(
cl_float
)
vel
[
i
][
1
],
(
cl_float
)
vel
[
i
][
2
],
velm
[
i
].
w
);
cl
.
getQueue
().
enqueueWriteBuffer
(
velocities
->
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
),
numParticles
*
sizeof
(
mm_float4
),
&
velm
[
0
]);
cl
.
getQueue
().
enqueueWriteBuffer
(
velocities
.
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
),
numParticles
*
sizeof
(
mm_float4
),
&
velm
[
0
]);
}
}
void
OpenCLIntegrateRPMDStepKernel
::
copyToContext
(
int
copy
,
ContextImpl
&
context
)
{
if
(
!
hasInitializedKernel
)
initializeKernels
(
context
);
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
->
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
.
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl_int
>
(
5
,
copy
);
cl
.
executeKernel
(
copyToContextKernel
,
cl
.
getNumAtoms
());
}
...
...
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
View file @
b33ee3b0
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -45,9 +45,8 @@ namespace OpenMM {
class
OpenCLIntegrateRPMDStepKernel
:
public
IntegrateRPMDStepKernel
{
public:
OpenCLIntegrateRPMDStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
IntegrateRPMDStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernel
(
false
)
,
forces
(
NULL
),
positions
(
NULL
),
velocities
(
NULL
),
contractedForces
(
NULL
),
contractedPositions
(
NULL
)
{
IntegrateRPMDStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernel
(
false
)
{
}
~
OpenCLIntegrateRPMDStepKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -92,11 +91,11 @@ private:
int
numCopies
,
numParticles
,
workgroupSize
;
std
::
map
<
int
,
int
>
groupsByCopies
;
int
groupsNotContracted
;
OpenCLArray
*
forces
;
OpenCLArray
*
positions
;
OpenCLArray
*
velocities
;
OpenCLArray
*
contractedForces
;
OpenCLArray
*
contractedPositions
;
OpenCLArray
forces
;
OpenCLArray
positions
;
OpenCLArray
velocities
;
OpenCLArray
contractedForces
;
OpenCLArray
contractedPositions
;
cl
::
Kernel
pileKernel
,
stepKernel
,
velocitiesKernel
,
copyToContextKernel
,
copyFromContextKernel
,
translateKernel
;
std
::
map
<
int
,
cl
::
Kernel
>
positionContractionKernels
;
std
::
map
<
int
,
cl
::
Kernel
>
forceContractionKernels
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment