Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
d59b0373
Commit
d59b0373
authored
Feb 08, 2018
by
peastman
Browse files
Convert OpenCLArray to use RAII
parent
44daf269
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
705 additions
and
801 deletions
+705
-801
platforms/opencl/include/OpenCLArray.h
platforms/opencl/include/OpenCLArray.h
+68
-3
platforms/opencl/include/OpenCLCompact.h
platforms/opencl/include/OpenCLCompact.h
+1
-0
platforms/opencl/include/OpenCLContext.h
platforms/opencl/include/OpenCLContext.h
+22
-22
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+91
-105
platforms/opencl/include/OpenCLSort.h
platforms/opencl/include/OpenCLSort.h
+1
-0
platforms/opencl/src/OpenCLArray.cpp
platforms/opencl/src/OpenCLArray.cpp
+53
-11
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+70
-93
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+399
-567
No files found.
platforms/opencl/include/OpenCLArray.h
View file @
d59b0373
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
2
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -27,15 +27,19 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLContext.h"
#define __CL_ENABLE_EXCEPTIONS
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#include "openmm/OpenMMException.h"
#include "windowsExportOpenCL.h"
#include <cl.hpp>
#include <iostream>
#include <sstream>
#include <vector>
namespace
OpenMM
{
class
OpenCLContext
;
/**
* This class encapsulates an OpenCL Buffer. It provides a simplified API for working with it,
* and for copying data to and from the OpenCL Buffer.
...
...
@@ -69,6 +73,11 @@ public:
static
OpenCLArray
*
create
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
const
std
::
string
&
name
)
{
return
new
OpenCLArray
(
context
,
buffer
,
size
,
sizeof
(
T
),
name
);
}
/**
* Create an uninitialized OpenCLArray object. It does not point to any OpenCL Buffer,
* and cannot be used until initialize() is called on it.
*/
OpenCLArray
();
/**
* Create an OpenCLArray object.
*
...
...
@@ -90,6 +99,61 @@ public:
*/
OpenCLArray
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
);
~
OpenCLArray
();
/**
* Initialize this object.
*
* @param context the context for which to create the array
* @param size the number of elements in the array
* @param elementSize the size of each element in bytes
* @param name the name of the array
* @param flags the set of flags to specify when creating the OpenCL Buffer
*/
void
initialize
(
OpenCLContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
,
cl_int
flags
=
CL_MEM_READ_WRITE
);
/**
* Initialize this object to use a preexisting Buffer.
*
* @param context the context for which to create the array
* @param buffer the OpenCL Buffer this object encapsulates
* @param size the number of elements in the array
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
void
initialize
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
);
/**
* Initialize this object. The template argument is the data type of each array element.
*
* @param context the context for which to create the array
* @param size the number of elements in the array
* @param name the name of the array
* @param flags the set of flags to specify when creating the OpenCL Buffer
*/
template
<
class
T
>
void
initialize
(
OpenCLContext
&
context
,
int
size
,
const
std
::
string
&
name
,
cl_int
flags
=
CL_MEM_READ_WRITE
)
{
initialize
(
context
,
size
,
sizeof
(
T
),
name
,
flags
);
}
/**
* Initialize this object to use a preexisting Buffer. The template argument
* is the data type of each array element.
*
* @param context the context for which to create the array
* @param buffer the OpenCL Buffer this object encapsulates
* @param size the number of elements in the array
* @param name the name of the array
*/
template
<
class
T
>
void
initialize
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
const
std
::
string
&
name
)
{
initialize
(
context
,
buffer
,
size
,
sizeof
(
T
),
name
);
}
/**
* Recreate the internal storage to have a different size.
*/
void
resize
(
int
size
);
/**
* Get whether this array has been initialized.
*/
bool
isInitialized
()
const
{
return
(
buffer
!=
NULL
);
}
/**
* Get the size of the array.
*/
...
...
@@ -155,9 +219,10 @@ public:
*/
void
copyTo
(
OpenCLArray
&
dest
)
const
;
private:
OpenCLContext
&
context
;
OpenCLContext
*
context
;
cl
::
Buffer
*
buffer
;
int
size
,
elementSize
;
cl_int
flags
;
bool
ownsBuffer
;
std
::
string
name
;
};
...
...
platforms/opencl/include/OpenCLCompact.h
View file @
d59b0373
...
...
@@ -26,6 +26,7 @@
*/
#include "OpenCLArray.h"
#include "OpenCLContext.h"
namespace
OpenMM
{
...
...
platforms/opencl/include/OpenCLContext.h
View file @
d59b0373
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -39,11 +39,11 @@
#include <pthread.h>
#include <cl.hpp>
#include "windowsExportOpenCL.h"
#include "OpenCLArray.h"
#include "OpenCLPlatform.h"
namespace
OpenMM
{
class
OpenCLArray
;
class
OpenCLForceInfo
;
class
OpenCLIntegrationUtilities
;
class
OpenCLExpressionUtilities
;
...
...
@@ -227,49 +227,49 @@ public:
* Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
*/
OpenCLArray
&
getPosq
()
{
return
*
posq
;
return
posq
;
}
/**
* Get the array which contains a correction to the position of each atom. This only exists if getUseMixedPrecision() returns true.
*/
OpenCLArray
&
getPosqCorrection
()
{
return
*
posqCorrection
;
return
posqCorrection
;
}
/**
* Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom.
*/
OpenCLArray
&
getVelm
()
{
return
*
velm
;
return
velm
;
}
/**
* Get the array which contains the force on each atom.
*/
OpenCLArray
&
getForce
()
{
return
*
force
;
return
force
;
}
/**
* Get the array which contains the buffers in which forces are computed.
*/
OpenCLArray
&
getForceBuffers
()
{
return
*
forceBuffers
;
return
forceBuffers
;
}
/**
* Get the array which contains a contribution to each force represented as 64 bit fixed point.
*/
OpenCLArray
&
getLongForceBuffer
()
{
return
*
longForceBuffer
;
return
longForceBuffer
;
}
/**
* Get the array which contains the buffer in which energy is computed.
*/
OpenCLArray
&
getEnergyBuffer
()
{
return
*
energyBuffer
;
return
energyBuffer
;
}
/**
* Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
*/
OpenCLArray
&
getEnergyParamDerivBuffer
()
{
return
*
energyParamDerivBuffer
;
return
energyParamDerivBuffer
;
}
/**
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
...
...
@@ -288,7 +288,7 @@ public:
* Get the array which contains the index of each atom.
*/
OpenCLArray
&
getAtomIndexArray
()
{
return
*
atomIndexDevice
;
return
atomIndexDevice
;
}
/**
* Get the number of cells by which the positions are offset.
...
...
@@ -762,17 +762,17 @@ private:
std
::
vector
<
mm_int4
>
posCellOffsets
;
cl
::
Buffer
*
pinnedBuffer
;
void
*
pinnedMemory
;
OpenCLArray
*
posq
;
OpenCLArray
*
posqCorrection
;
OpenCLArray
*
velm
;
OpenCLArray
*
force
;
OpenCLArray
*
forceBuffers
;
OpenCLArray
*
longForceBuffer
;
OpenCLArray
*
energyBuffer
;
OpenCLArray
*
energySum
;
OpenCLArray
*
energyParamDerivBuffer
;
OpenCLArray
*
atomIndexDevice
;
OpenCLArray
*
chargeBuffer
;
OpenCLArray
posq
;
OpenCLArray
posqCorrection
;
OpenCLArray
velm
;
OpenCLArray
force
;
OpenCLArray
forceBuffers
;
OpenCLArray
longForceBuffer
;
OpenCLArray
energyBuffer
;
OpenCLArray
energySum
;
OpenCLArray
energyParamDerivBuffer
;
OpenCLArray
atomIndexDevice
;
OpenCLArray
chargeBuffer
;
std
::
vector
<
std
::
string
>
energyParamDerivNames
;
std
::
map
<
std
::
string
,
double
>
energyParamDerivWorkspace
;
std
::
vector
<
int
>
atomIndex
;
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
d59b0373
...
...
@@ -243,9 +243,8 @@ private:
class
OpenCLCalcHarmonicBondForceKernel
:
public
CalcHarmonicBondForceKernel
{
public:
OpenCLCalcHarmonicBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcHarmonicBondForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
,
params
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
{
}
~
OpenCLCalcHarmonicBondForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -276,7 +275,7 @@ private:
OpenCLContext
&
cl
;
ForceInfo
*
info
;
const
System
&
system
;
OpenCLArray
*
params
;
OpenCLArray
params
;
};
/**
...
...
@@ -285,7 +284,7 @@ private:
class
OpenCLCalcCustomBondForceKernel
:
public
CalcCustomBondForceKernel
{
public:
OpenCLCalcCustomBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomBondForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
{
}
~
OpenCLCalcCustomBondForceKernel
();
/**
...
...
@@ -319,7 +318,7 @@ private:
ForceInfo
*
info
;
const
System
&
system
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
};
...
...
@@ -330,9 +329,8 @@ private:
class
OpenCLCalcHarmonicAngleForceKernel
:
public
CalcHarmonicAngleForceKernel
{
public:
OpenCLCalcHarmonicAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcHarmonicAngleForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
,
params
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
{
}
~
OpenCLCalcHarmonicAngleForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -363,7 +361,7 @@ private:
OpenCLContext
&
cl
;
ForceInfo
*
info
;
const
System
&
system
;
OpenCLArray
*
params
;
OpenCLArray
params
;
};
/**
...
...
@@ -372,7 +370,7 @@ private:
class
OpenCLCalcCustomAngleForceKernel
:
public
CalcCustomAngleForceKernel
{
public:
OpenCLCalcCustomAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomAngleForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
{
}
~
OpenCLCalcCustomAngleForceKernel
();
/**
...
...
@@ -406,7 +404,7 @@ private:
ForceInfo
*
info
;
const
System
&
system
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
};
...
...
@@ -417,9 +415,8 @@ private:
class
OpenCLCalcPeriodicTorsionForceKernel
:
public
CalcPeriodicTorsionForceKernel
{
public:
OpenCLCalcPeriodicTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcPeriodicTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
,
params
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
{
}
~
OpenCLCalcPeriodicTorsionForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -450,7 +447,7 @@ private:
OpenCLContext
&
cl
;
ForceInfo
*
info
;
const
System
&
system
;
OpenCLArray
*
params
;
OpenCLArray
params
;
};
/**
...
...
@@ -459,9 +456,8 @@ private:
class
OpenCLCalcRBTorsionForceKernel
:
public
CalcRBTorsionForceKernel
{
public:
OpenCLCalcRBTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcRBTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
,
params
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
{
}
~
OpenCLCalcRBTorsionForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -492,7 +488,7 @@ private:
OpenCLContext
&
cl
;
ForceInfo
*
info
;
const
System
&
system
;
OpenCLArray
*
params
;
OpenCLArray
params
;
};
/**
...
...
@@ -501,9 +497,8 @@ private:
class
OpenCLCalcCMAPTorsionForceKernel
:
public
CalcCMAPTorsionForceKernel
{
public:
OpenCLCalcCMAPTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCMAPTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
,
coefficients
(
NULL
),
mapPositions
(
NULL
),
torsionMaps
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
{
}
~
OpenCLCalcCMAPTorsionForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -535,9 +530,9 @@ private:
ForceInfo
*
info
;
const
System
&
system
;
std
::
vector
<
mm_int2
>
mapPositionsVec
;
OpenCLArray
*
coefficients
;
OpenCLArray
*
mapPositions
;
OpenCLArray
*
torsionMaps
;
OpenCLArray
coefficients
;
OpenCLArray
mapPositions
;
OpenCLArray
torsionMaps
;
};
/**
...
...
@@ -546,7 +541,7 @@ private:
class
OpenCLCalcCustomTorsionForceKernel
:
public
CalcCustomTorsionForceKernel
{
public:
OpenCLCalcCustomTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
{
}
~
OpenCLCalcCustomTorsionForceKernel
();
/**
...
...
@@ -580,7 +575,7 @@ private:
ForceInfo
*
info
;
const
System
&
system
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
};
...
...
@@ -591,10 +586,7 @@ private:
class
OpenCLCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeDispersionBsplineModuliX
(
NULL
),
pmeDispersionBsplineModuliY
(
NULL
),
pmeDispersionBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
dispersionFft
(
NULL
),
pmeio
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
sort
(
NULL
),
fft
(
NULL
),
dispersionFft
(
NULL
),
pmeio
(
NULL
)
{
}
~
OpenCLCalcNonbondedForceKernel
();
/**
...
...
@@ -660,21 +652,21 @@ private:
OpenCLContext
&
cl
;
ForceInfo
*
info
;
bool
hasInitializedKernel
;
OpenCLArray
*
sigmaEpsilon
;
OpenCLArray
*
exceptionParams
;
OpenCLArray
*
cosSinSums
;
OpenCLArray
*
pmeGrid
;
OpenCLArray
*
pmeGrid2
;
OpenCLArray
*
pmeBsplineModuliX
;
OpenCLArray
*
pmeBsplineModuliY
;
OpenCLArray
*
pmeBsplineModuliZ
;
OpenCLArray
*
pmeDispersionBsplineModuliX
;
OpenCLArray
*
pmeDispersionBsplineModuliY
;
OpenCLArray
*
pmeDispersionBsplineModuliZ
;
OpenCLArray
*
pmeBsplineTheta
;
OpenCLArray
*
pmeAtomRange
;
OpenCLArray
*
pmeAtomGridIndex
;
OpenCLArray
*
pmeEnergyBuffer
;
OpenCLArray
sigmaEpsilon
;
OpenCLArray
exceptionParams
;
OpenCLArray
cosSinSums
;
OpenCLArray
pmeGrid
;
OpenCLArray
pmeGrid2
;
OpenCLArray
pmeBsplineModuliX
;
OpenCLArray
pmeBsplineModuliY
;
OpenCLArray
pmeBsplineModuliZ
;
OpenCLArray
pmeDispersionBsplineModuliX
;
OpenCLArray
pmeDispersionBsplineModuliY
;
OpenCLArray
pmeDispersionBsplineModuliZ
;
OpenCLArray
pmeBsplineTheta
;
OpenCLArray
pmeAtomRange
;
OpenCLArray
pmeAtomGridIndex
;
OpenCLArray
pmeEnergyBuffer
;
OpenCLSort
*
sort
;
cl
::
CommandQueue
pmeQueue
;
cl
::
Event
pmeSyncEvent
;
...
...
@@ -717,7 +709,7 @@ private:
class
OpenCLCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
public:
OpenCLCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
cl
(
cl
),
params
(
NULL
),
globals
(
NULL
),
interactionGroupData
(
NULL
),
forceCopy
(
NULL
),
system
(
system
),
hasInitializedKernel
(
false
)
{
cl
(
cl
),
params
(
NULL
),
forceCopy
(
NULL
),
system
(
system
),
hasInitializedKernel
(
false
)
{
}
~
OpenCLCalcCustomNonbondedForceKernel
();
/**
...
...
@@ -749,13 +741,13 @@ private:
OpenCLContext
&
cl
;
ForceInfo
*
info
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
*
interactionGroupData
;
OpenCLArray
globals
;
OpenCLArray
interactionGroupData
;
cl
::
Kernel
interactionGroupKernel
;
std
::
vector
<
void
*>
interactionGroupArgs
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*
>
tabulatedFunctions
;
std
::
vector
<
OpenCLArray
>
tabulatedFunctions
;
double
longRangeCoefficient
;
std
::
vector
<
double
>
longRangeCoefficientDerivs
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
,
hasParamDerivs
;
...
...
@@ -770,10 +762,8 @@ private:
class
OpenCLCalcGBSAOBCForceKernel
:
public
CalcGBSAOBCForceKernel
{
public:
OpenCLCalcGBSAOBCForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcGBSAOBCForceKernel
(
name
,
platform
),
cl
(
cl
),
hasCreatedKernels
(
false
),
params
(
NULL
),
bornSum
(
NULL
),
longBornSum
(
NULL
),
bornRadii
(
NULL
),
bornForce
(
NULL
),
longBornForce
(
NULL
),
obcChain
(
NULL
)
{
hasCreatedKernels
(
false
)
{
}
~
OpenCLCalcGBSAOBCForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -804,13 +794,13 @@ private:
int
maxTiles
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
OpenCLArray
*
params
;
OpenCLArray
*
bornSum
;
OpenCLArray
*
longBornSum
;
OpenCLArray
*
bornRadii
;
OpenCLArray
*
bornForce
;
OpenCLArray
*
longBornForce
;
OpenCLArray
*
obcChain
;
OpenCLArray
params
;
OpenCLArray
bornSum
;
OpenCLArray
longBornSum
;
OpenCLArray
bornRadii
;
OpenCLArray
bornForce
;
OpenCLArray
longBornForce
;
OpenCLArray
obcChain
;
cl
::
Kernel
computeBornSumKernel
;
cl
::
Kernel
reduceBornSumKernel
;
cl
::
Kernel
force1Kernel
;
...
...
@@ -823,8 +813,7 @@ private:
class
OpenCLCalcCustomGBForceKernel
:
public
CalcCustomGBForceKernel
{
public:
OpenCLCalcCustomGBForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomGBForceKernel
(
name
,
platform
),
hasInitializedKernels
(
false
),
cl
(
cl
),
params
(
NULL
),
computedValues
(
NULL
),
energyDerivs
(
NULL
),
energyDerivChain
(
NULL
),
longEnergyDerivs
(
NULL
),
globals
(
NULL
),
valueBuffers
(
NULL
),
longValueBuffers
(
NULL
),
system
(
system
)
{
hasInitializedKernels
(
false
),
cl
(
cl
),
params
(
NULL
),
computedValues
(
NULL
),
energyDerivs
(
NULL
),
energyDerivChain
(
NULL
),
system
(
system
)
{
}
~
OpenCLCalcCustomGBForceKernel
();
/**
...
...
@@ -862,14 +851,14 @@ private:
OpenCLParameterSet
*
energyDerivs
;
OpenCLParameterSet
*
energyDerivChain
;
std
::
vector
<
OpenCLParameterSet
*>
dValuedParam
;
std
::
vector
<
OpenCLArray
*
>
dValue0dParam
;
OpenCLArray
*
longEnergyDerivs
;
OpenCLArray
*
globals
;
OpenCLArray
*
valueBuffers
;
OpenCLArray
*
longValueBuffers
;
std
::
vector
<
OpenCLArray
>
dValue0dParam
;
OpenCLArray
longEnergyDerivs
;
OpenCLArray
globals
;
OpenCLArray
valueBuffers
;
OpenCLArray
longValueBuffers
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*
>
tabulatedFunctions
;
std
::
vector
<
OpenCLArray
>
tabulatedFunctions
;
std
::
vector
<
bool
>
pairValueUsesParam
,
pairEnergyUsesParam
,
pairEnergyUsesValue
;
const
System
&
system
;
cl
::
Kernel
pairValueKernel
,
perParticleValueKernel
,
pairEnergyKernel
,
perParticleEnergyKernel
,
gradientChainRuleKernel
;
...
...
@@ -883,7 +872,7 @@ private:
class
OpenCLCalcCustomExternalForceKernel
:
public
CalcCustomExternalForceKernel
{
public:
OpenCLCalcCustomExternalForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomExternalForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
{
}
~
OpenCLCalcCustomExternalForceKernel
();
/**
...
...
@@ -917,7 +906,7 @@ private:
ForceInfo
*
info
;
const
System
&
system
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
};
...
...
@@ -928,8 +917,7 @@ private:
class
OpenCLCalcCustomHbondForceKernel
:
public
CalcCustomHbondForceKernel
{
public:
OpenCLCalcCustomHbondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomHbondForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
donorParams
(
NULL
),
acceptorParams
(
NULL
),
donors
(
NULL
),
acceptors
(
NULL
),
donorBufferIndices
(
NULL
),
acceptorBufferIndices
(
NULL
),
globals
(
NULL
),
donorExclusions
(
NULL
),
acceptorExclusions
(
NULL
),
system
(
system
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
donorParams
(
NULL
),
acceptorParams
(
NULL
),
system
(
system
)
{
}
~
OpenCLCalcCustomHbondForceKernel
();
/**
...
...
@@ -963,16 +951,16 @@ private:
ForceInfo
*
info
;
OpenCLParameterSet
*
donorParams
;
OpenCLParameterSet
*
acceptorParams
;
OpenCLArray
*
globals
;
OpenCLArray
*
donors
;
OpenCLArray
*
acceptors
;
OpenCLArray
*
donorBufferIndices
;
OpenCLArray
*
acceptorBufferIndices
;
OpenCLArray
*
donorExclusions
;
OpenCLArray
*
acceptorExclusions
;
OpenCLArray
globals
;
OpenCLArray
donors
;
OpenCLArray
acceptors
;
OpenCLArray
donorBufferIndices
;
OpenCLArray
acceptorBufferIndices
;
OpenCLArray
donorExclusions
;
OpenCLArray
acceptorExclusions
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*
>
tabulatedFunctions
;
std
::
vector
<
OpenCLArray
>
tabulatedFunctions
;
const
System
&
system
;
cl
::
Kernel
donorKernel
,
acceptorKernel
;
};
...
...
@@ -983,7 +971,7 @@ private:
class
OpenCLCalcCustomCentroidBondForceKernel
:
public
CalcCustomCentroidBondForceKernel
{
public:
OpenCLCalcCustomCentroidBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomCentroidBondForceKernel
(
name
,
platform
),
cl
(
cl
),
params
(
NULL
),
globals
(
NULL
),
groupParticles
(
NULL
),
groupWeights
(
NULL
),
groupOffsets
(
NULL
),
groupForces
(
NULL
),
bondGroups
(
NULL
),
centerPositions
(
NULL
),
system
(
system
)
{
cl
(
cl
),
params
(
NULL
),
system
(
system
)
{
}
~
OpenCLCalcCustomCentroidBondForceKernel
();
/**
...
...
@@ -1017,16 +1005,16 @@ private:
OpenCLContext
&
cl
;
ForceInfo
*
info
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
*
groupParticles
;
OpenCLArray
*
groupWeights
;
OpenCLArray
*
groupOffsets
;
OpenCLArray
*
groupForces
;
OpenCLArray
*
bondGroups
;
OpenCLArray
*
centerPositions
;
OpenCLArray
globals
;
OpenCLArray
groupParticles
;
OpenCLArray
groupWeights
;
OpenCLArray
groupOffsets
;
OpenCLArray
groupForces
;
OpenCLArray
bondGroups
;
OpenCLArray
centerPositions
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*
>
tabulatedFunctions
;
std
::
vector
<
OpenCLArray
>
tabulatedFunctions
;
cl
::
Kernel
computeCentersKernel
,
groupForcesKernel
,
applyForcesKernel
;
const
System
&
system
;
};
...
...
@@ -1037,7 +1025,7 @@ private:
class
OpenCLCalcCustomCompoundBondForceKernel
:
public
CalcCustomCompoundBondForceKernel
{
public:
OpenCLCalcCustomCompoundBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomCompoundBondForceKernel
(
name
,
platform
),
cl
(
cl
),
params
(
NULL
),
globals
(
NULL
),
system
(
system
)
{
cl
(
cl
),
params
(
NULL
),
system
(
system
)
{
}
~
OpenCLCalcCustomCompoundBondForceKernel
();
/**
...
...
@@ -1070,10 +1058,10 @@ private:
OpenCLContext
&
cl
;
ForceInfo
*
info
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*
>
tabulatedFunctions
;
std
::
vector
<
OpenCLArray
>
tabulatedFunctions
;
const
System
&
system
;
};
...
...
@@ -1083,9 +1071,7 @@ private:
class
OpenCLCalcCustomManyParticleForceKernel
:
public
CalcCustomManyParticleForceKernel
{
public:
OpenCLCalcCustomManyParticleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomManyParticleForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
params
(
NULL
),
globals
(
NULL
),
particleTypes
(
NULL
),
orderIndex
(
NULL
),
particleOrder
(
NULL
),
exclusions
(
NULL
),
exclusionStartIndex
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
neighborPairs
(
NULL
),
numNeighborPairs
(
NULL
),
neighborStartIndex
(
NULL
),
numNeighborsForAtom
(
NULL
),
neighbors
(
NULL
),
system
(
system
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
params
(
NULL
),
system
(
system
)
{
}
~
OpenCLCalcCustomManyParticleForceKernel
();
/**
...
...
@@ -1120,22 +1106,22 @@ private:
NonbondedMethod
nonbondedMethod
;
int
maxNeighborPairs
,
forceWorkgroupSize
,
findNeighborsWorkgroupSize
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
*
particleTypes
;
OpenCLArray
*
orderIndex
;
OpenCLArray
*
particleOrder
;
OpenCLArray
*
exclusions
;
OpenCLArray
*
exclusionStartIndex
;
OpenCLArray
*
blockCenter
;
OpenCLArray
*
blockBoundingBox
;
OpenCLArray
*
neighborPairs
;
OpenCLArray
*
numNeighborPairs
;
OpenCLArray
*
neighborStartIndex
;
OpenCLArray
*
numNeighborsForAtom
;
OpenCLArray
*
neighbors
;
OpenCLArray
globals
;
OpenCLArray
particleTypes
;
OpenCLArray
orderIndex
;
OpenCLArray
particleOrder
;
OpenCLArray
exclusions
;
OpenCLArray
exclusionStartIndex
;
OpenCLArray
blockCenter
;
OpenCLArray
blockBoundingBox
;
OpenCLArray
neighborPairs
;
OpenCLArray
numNeighborPairs
;
OpenCLArray
neighborStartIndex
;
OpenCLArray
numNeighborsForAtom
;
OpenCLArray
neighbors
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*
>
tabulatedFunctions
;
std
::
vector
<
OpenCLArray
>
tabulatedFunctions
;
const
System
&
system
;
cl
::
Kernel
forceKernel
,
blockBoundsKernel
,
neighborsKernel
,
startIndicesKernel
,
copyPairsKernel
;
};
...
...
platforms/opencl/include/OpenCLSort.h
View file @
d59b0373
...
...
@@ -28,6 +28,7 @@
* -------------------------------------------------------------------------- */
#include "OpenCLArray.h"
#include "OpenCLContext.h"
#include "windowsExportOpenCL.h"
namespace
OpenMM
{
...
...
platforms/opencl/src/OpenCLArray.cpp
View file @
d59b0373
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012 Stanford University and the Authors.
*
* Portions copyright (c) 2012
-2018
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -25,14 +25,38 @@
* -------------------------------------------------------------------------- */
#include "OpenCLArray.h"
#include "OpenCLContext.h"
#include <iostream>
#include <sstream>
#include <vector>
using
namespace
OpenMM
;
OpenCLArray
::
OpenCLArray
(
OpenCLContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
,
cl_int
flags
)
:
context
(
context
),
size
(
size
),
elementSize
(
elementSize
),
name
(
name
),
ownsBuffer
(
true
)
{
OpenCLArray
::
OpenCLArray
()
:
buffer
(
NULL
),
ownsBuffer
(
false
)
{
}
OpenCLArray
::
OpenCLArray
(
OpenCLContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
,
cl_int
flags
)
:
buffer
(
NULL
)
{
initialize
(
context
,
size
,
elementSize
,
name
,
flags
);
}
OpenCLArray
::
OpenCLArray
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
)
:
buffer
(
NULL
)
{
initialize
(
context
,
buffer
,
size
,
elementSize
,
name
);
}
OpenCLArray
::~
OpenCLArray
()
{
if
(
buffer
!=
NULL
&&
ownsBuffer
)
delete
buffer
;
}
void
OpenCLArray
::
initialize
(
OpenCLContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
,
cl_int
flags
)
{
if
(
buffer
!=
NULL
)
throw
OpenMMException
(
"OpenCLArray has already been initialized"
);
this
->
context
=
&
context
;
this
->
size
=
size
;
this
->
elementSize
=
elementSize
;
this
->
name
=
name
;
this
->
flags
=
flags
;
ownsBuffer
=
true
;
try
{
buffer
=
new
cl
::
Buffer
(
context
.
getContext
(),
flags
,
size
*
elementSize
);
}
...
...
@@ -43,18 +67,32 @@ OpenCLArray::OpenCLArray(OpenCLContext& context, int size, int elementSize, cons
}
}
OpenCLArray
::
OpenCLArray
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
)
:
context
(
context
),
buffer
(
buffer
),
size
(
size
),
elementSize
(
elementSize
),
name
(
name
),
ownsBuffer
(
false
)
{
void
OpenCLArray
::
initialize
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
)
{
if
(
this
->
buffer
!=
NULL
)
throw
OpenMMException
(
"OpenCLArray has already been initialized"
);
this
->
context
=
&
context
;
this
->
buffer
=
buffer
;
this
->
size
=
size
;
this
->
elementSize
=
elementSize
;
this
->
name
=
name
;
ownsBuffer
=
false
;
}
OpenCLArray
::~
OpenCLArray
()
{
if
(
ownsBuffer
)
void
OpenCLArray
::
resize
(
int
size
)
{
if
(
buffer
==
NULL
)
throw
OpenMMException
(
"OpenCLArray has not been initialized"
);
if
(
!
ownsBuffer
)
throw
OpenMMException
(
"Cannot resize an array that does not own its storage"
);
delete
buffer
;
buffer
=
NULL
;
initialize
(
*
context
,
size
,
elementSize
,
name
,
flags
);
}
void
OpenCLArray
::
upload
(
const
void
*
data
,
bool
blocking
)
{
if
(
buffer
==
NULL
)
throw
OpenMMException
(
"OpenCLArray has not been initialized"
);
try
{
context
.
getQueue
().
enqueueWriteBuffer
(
*
buffer
,
blocking
?
CL_TRUE
:
CL_FALSE
,
0
,
size
*
elementSize
,
data
);
context
->
getQueue
().
enqueueWriteBuffer
(
*
buffer
,
blocking
?
CL_TRUE
:
CL_FALSE
,
0
,
size
*
elementSize
,
data
);
}
catch
(
cl
::
Error
err
)
{
std
::
stringstream
str
;
...
...
@@ -64,8 +102,10 @@ void OpenCLArray::upload(const void* data, bool blocking) {
}
void
OpenCLArray
::
download
(
void
*
data
,
bool
blocking
)
const
{
if
(
buffer
==
NULL
)
throw
OpenMMException
(
"OpenCLArray has not been initialized"
);
try
{
context
.
getQueue
().
enqueueReadBuffer
(
*
buffer
,
blocking
?
CL_TRUE
:
CL_FALSE
,
0
,
size
*
elementSize
,
data
);
context
->
getQueue
().
enqueueReadBuffer
(
*
buffer
,
blocking
?
CL_TRUE
:
CL_FALSE
,
0
,
size
*
elementSize
,
data
);
}
catch
(
cl
::
Error
err
)
{
std
::
stringstream
str
;
...
...
@@ -75,10 +115,12 @@ void OpenCLArray::download(void* data, bool blocking) const {
}
void
OpenCLArray
::
copyTo
(
OpenCLArray
&
dest
)
const
{
if
(
buffer
==
NULL
)
throw
OpenMMException
(
"OpenCLArray has not been initialized"
);
if
(
dest
.
getSize
()
!=
size
||
dest
.
getElementSize
()
!=
elementSize
)
throw
OpenMMException
(
"Error copying array "
+
name
+
" to "
+
dest
.
getName
()
+
": The destination array does not match the size of the array"
);
try
{
context
.
getQueue
().
enqueueCopyBuffer
(
*
buffer
,
dest
.
getDeviceBuffer
(),
0
,
0
,
size
*
elementSize
);
context
->
getQueue
().
enqueueCopyBuffer
(
*
buffer
,
dest
.
getDeviceBuffer
(),
0
,
0
,
size
*
elementSize
);
}
catch
(
cl
::
Error
err
)
{
std
::
stringstream
str
;
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
d59b0373
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -68,9 +68,8 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
}
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
,
OpenCLContext
*
originalContext
)
:
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
atomsWereReordered
(
false
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
energySum
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
chargeBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
atomsWereReordered
(
false
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
if
(
precision
==
"single"
)
{
useDoublePrecision
=
false
;
useMixedPrecision
=
false
;
...
...
@@ -275,23 +274,23 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
numAtomBlocks
=
(
paddedNumAtoms
+
(
TileSize
-
1
))
/
TileSize
;
numThreadBlocks
=
numThreadBlocksPerComputeUnit
*
device
.
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
();
if
(
useDoublePrecision
)
{
posq
=
OpenCLArray
::
creat
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
OpenCLArray
::
creat
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
posq
.
initializ
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
.
initializ
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"USE_DOUBLE_PRECISION"
]
=
"1"
;
compilationDefines
[
"convert_real4"
]
=
"convert_double4"
;
compilationDefines
[
"convert_mixed4"
]
=
"convert_double4"
;
}
else
if
(
useMixedPrecision
)
{
posq
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posqCorrection
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
OpenCLArray
::
creat
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
posq
.
initializ
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posqCorrection
.
initializ
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
.
initializ
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"USE_MIXED_PRECISION"
]
=
"1"
;
compilationDefines
[
"convert_real4"
]
=
"convert_float4"
;
compilationDefines
[
"convert_mixed4"
]
=
"convert_double4"
;
}
else
{
posq
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
posq
.
initializ
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
.
initializ
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"convert_real4"
]
=
"convert_float4"
;
compilationDefines
[
"convert_mixed4"
]
=
"convert_float4"
;
}
...
...
@@ -429,28 +428,6 @@ OpenCLContext::~OpenCLContext() {
delete
computation
;
if
(
pinnedBuffer
!=
NULL
)
delete
pinnedBuffer
;
if
(
posq
!=
NULL
)
delete
posq
;
if
(
posqCorrection
!=
NULL
)
delete
posqCorrection
;
if
(
velm
!=
NULL
)
delete
velm
;
if
(
force
!=
NULL
)
delete
force
;
if
(
forceBuffers
!=
NULL
)
delete
forceBuffers
;
if
(
longForceBuffer
!=
NULL
)
delete
longForceBuffer
;
if
(
energyBuffer
!=
NULL
)
delete
energyBuffer
;
if
(
energySum
!=
NULL
)
delete
energySum
;
if
(
energyParamDerivBuffer
!=
NULL
)
delete
energyParamDerivBuffer
;
if
(
atomIndexDevice
!=
NULL
)
delete
atomIndexDevice
;
if
(
chargeBuffer
!=
NULL
)
delete
chargeBuffer
;
if
(
integration
!=
NULL
)
delete
integration
;
if
(
expression
!=
NULL
)
...
...
@@ -471,42 +448,42 @@ void OpenCLContext::initialize() {
numForceBuffers
=
std
::
max
(
numForceBuffers
,
force
->
getRequiredForceBuffers
());
int
energyBufferSize
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
if
(
useDoublePrecision
)
{
forceBuffers
=
OpenCLArray
::
creat
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
creat
e
<
mm_double4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
creat
e
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
=
OpenCLArray
::
creat
e
<
cl_double
>
(
*
this
,
1
,
"energySum"
);
forceBuffers
.
initializ
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
.
initializ
e
<
mm_double4
>
(
*
this
,
&
forceBuffers
.
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
.
initializ
e
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
.
initializ
e
<
cl_double
>
(
*
this
,
1
,
"energySum"
);
}
else
if
(
useMixedPrecision
)
{
forceBuffers
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
creat
e
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
=
OpenCLArray
::
creat
e
<
cl_double
>
(
*
this
,
1
,
"energySum"
);
forceBuffers
.
initializ
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
.
initializ
e
<
mm_float4
>
(
*
this
,
&
forceBuffers
.
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
.
initializ
e
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
.
initializ
e
<
cl_double
>
(
*
this
,
1
,
"energySum"
);
}
else
{
forceBuffers
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
creat
e
<
cl_float
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
=
OpenCLArray
::
creat
e
<
cl_float
>
(
*
this
,
1
,
"energySum"
);
forceBuffers
.
initializ
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
.
initializ
e
<
mm_float4
>
(
*
this
,
&
forceBuffers
.
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
.
initializ
e
<
cl_float
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
.
initializ
e
<
cl_float
>
(
*
this
,
1
,
"energySum"
);
}
if
(
supports64BitGlobalAtomics
)
{
longForceBuffer
=
OpenCLArray
::
creat
e
<
cl_long
>
(
*
this
,
3
*
paddedNumAtoms
,
"longForceBuffer"
);
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
longForceBuffer
->
getDeviceBuffer
());
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forceBuffers
->
getDeviceBuffer
());
longForceBuffer
.
initializ
e
<
cl_long
>
(
*
this
,
3
*
paddedNumAtoms
,
"longForceBuffer"
);
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
longForceBuffer
.
getDeviceBuffer
());
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forceBuffers
.
getDeviceBuffer
());
reduceForcesKernel
.
setArg
<
cl_int
>
(
2
,
paddedNumAtoms
);
reduceForcesKernel
.
setArg
<
cl_int
>
(
3
,
numForceBuffers
);
addAutoclearBuffer
(
*
longForceBuffer
);
addAutoclearBuffer
(
longForceBuffer
);
}
addAutoclearBuffer
(
*
forceBuffers
);
addAutoclearBuffer
(
*
energyBuffer
);
addAutoclearBuffer
(
forceBuffers
);
addAutoclearBuffer
(
energyBuffer
);
int
numEnergyParamDerivs
=
energyParamDerivNames
.
size
();
if
(
numEnergyParamDerivs
>
0
)
{
if
(
useDoublePrecision
||
useMixedPrecision
)
energyParamDerivBuffer
=
OpenCLArray
::
creat
e
<
cl_double
>
(
*
this
,
numEnergyParamDerivs
*
energyBufferSize
,
"energyParamDerivBuffer"
);
energyParamDerivBuffer
.
initializ
e
<
cl_double
>
(
*
this
,
numEnergyParamDerivs
*
energyBufferSize
,
"energyParamDerivBuffer"
);
else
energyParamDerivBuffer
=
OpenCLArray
::
creat
e
<
cl_float
>
(
*
this
,
numEnergyParamDerivs
*
energyBufferSize
,
"energyParamDerivBuffer"
);
addAutoclearBuffer
(
*
energyParamDerivBuffer
);
energyParamDerivBuffer
.
initializ
e
<
cl_float
>
(
*
this
,
numEnergyParamDerivs
*
energyBufferSize
,
"energyParamDerivBuffer"
);
addAutoclearBuffer
(
energyParamDerivBuffer
);
}
int
bufferBytes
=
max
(
velm
->
getSize
()
*
velm
->
getElementSize
(),
energyBufferSize
*
energyBuffer
->
getElementSize
());
int
bufferBytes
=
max
(
velm
.
getSize
()
*
velm
.
getElementSize
(),
energyBufferSize
*
energyBuffer
.
getElementSize
());
pinnedBuffer
=
new
cl
::
Buffer
(
context
,
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedMemory
=
currentQueue
.
enqueueMapBuffer
(
*
pinnedBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
...
...
@@ -516,12 +493,12 @@ void OpenCLContext::initialize() {
else
((
mm_float4
*
)
pinnedMemory
)[
i
]
=
mm_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
mass
==
0.0
?
0.0
f
:
(
cl_float
)
(
1.0
/
mass
));
}
velm
->
upload
(
pinnedMemory
);
atomIndexDevice
=
OpenCLArray
::
creat
e
<
cl_int
>
(
*
this
,
paddedNumAtoms
,
"atomIndexDevice"
);
velm
.
upload
(
pinnedMemory
);
atomIndexDevice
.
initializ
e
<
cl_int
>
(
*
this
,
paddedNumAtoms
,
"atomIndexDevice"
);
atomIndex
.
resize
(
paddedNumAtoms
);
for
(
int
i
=
0
;
i
<
paddedNumAtoms
;
++
i
)
atomIndex
[
i
]
=
i
;
atomIndexDevice
->
upload
(
atomIndex
);
atomIndexDevice
.
upload
(
atomIndex
);
findMoleculeGroups
();
nonbonded
->
initialize
(
system
);
}
...
...
@@ -756,7 +733,7 @@ void OpenCLContext::reduceForces() {
if
(
supports64BitGlobalAtomics
)
executeKernel
(
reduceForcesKernel
,
paddedNumAtoms
,
128
);
else
reduceBuffer
(
*
forceBuffers
,
numForceBuffers
);
reduceBuffer
(
forceBuffers
,
numForceBuffers
);
}
void
OpenCLContext
::
reduceBuffer
(
OpenCLArray
&
array
,
int
numBuffers
)
{
...
...
@@ -771,42 +748,42 @@ double OpenCLContext::reduceEnergy() {
int
workGroupSize
=
device
.
getInfo
<
CL_DEVICE_MAX_WORK_GROUP_SIZE
>
();
if
(
workGroupSize
>
512
)
workGroupSize
=
512
;
reduceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
energyBuffer
->
getDeviceBuffer
());
reduceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
energySum
->
getDeviceBuffer
());
reduceEnergyKernel
.
setArg
<
cl_int
>
(
2
,
energyBuffer
->
getSize
());
reduceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
energyBuffer
.
getDeviceBuffer
());
reduceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
energySum
.
getDeviceBuffer
());
reduceEnergyKernel
.
setArg
<
cl_int
>
(
2
,
energyBuffer
.
getSize
());
reduceEnergyKernel
.
setArg
<
cl_int
>
(
3
,
workGroupSize
);
reduceEnergyKernel
.
setArg
(
4
,
workGroupSize
*
energyBuffer
->
getElementSize
(),
NULL
);
reduceEnergyKernel
.
setArg
(
4
,
workGroupSize
*
energyBuffer
.
getElementSize
(),
NULL
);
executeKernel
(
reduceEnergyKernel
,
workGroupSize
,
workGroupSize
);
if
(
getUseDoublePrecision
()
||
getUseMixedPrecision
())
{
double
energy
;
energySum
->
download
(
&
energy
);
energySum
.
download
(
&
energy
);
return
energy
;
}
else
{
float
energy
;
energySum
->
download
(
&
energy
);
energySum
.
download
(
&
energy
);
return
energy
;
}
}
void
OpenCLContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
if
(
chargeBuffer
==
NULL
)
chargeBuffer
=
new
OpenCLArray
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
if
(
!
chargeBuffer
.
isInitialized
()
)
chargeBuffer
.
initialize
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
if
(
getUseDoublePrecision
())
{
double
*
c
=
(
double
*
)
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
charges
.
size
();
i
++
)
c
[
i
]
=
charges
[
i
];
chargeBuffer
->
upload
(
c
);
chargeBuffer
.
upload
(
c
);
}
else
{
float
*
c
=
(
float
*
)
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
charges
.
size
();
i
++
)
c
[
i
]
=
(
float
)
charges
[
i
];
chargeBuffer
->
upload
(
c
);
chargeBuffer
.
upload
(
c
);
}
setChargesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
chargeBuffer
->
getDeviceBuffer
());
setChargesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
posq
->
getDeviceBuffer
());
setChargesKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
atomIndexDevice
->
getDeviceBuffer
());
setChargesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
chargeBuffer
.
getDeviceBuffer
());
setChargesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
posq
.
getDeviceBuffer
());
setChargesKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
atomIndexDevice
.
getDeviceBuffer
());
setChargesKernel
.
setArg
<
cl_int
>
(
3
,
numAtoms
);
executeKernel
(
setChargesKernel
,
numAtoms
);
}
...
...
@@ -1069,16 +1046,16 @@ bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) {
vector
<
mm_double4
>
newPosq
(
paddedNumAtoms
,
mm_double4
(
0
,
0
,
0
,
0
));
vector
<
mm_double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
mm_double4
>
newVelm
(
paddedNumAtoms
,
mm_double4
(
0
,
0
,
0
,
0
));
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
posq
.
download
(
oldPosq
);
velm
.
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
posq
->
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
posq
.
upload
(
newPosq
);
velm
.
upload
(
newVelm
);
}
else
if
(
useMixedPrecision
)
{
vector
<
mm_float4
>
oldPosq
(
paddedNumAtoms
);
...
...
@@ -1087,8 +1064,8 @@ bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) {
vector
<
mm_float4
>
newPosqCorrection
(
paddedNumAtoms
,
mm_float4
(
0
,
0
,
0
,
0
));
vector
<
mm_double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
mm_double4
>
newVelm
(
paddedNumAtoms
,
mm_double4
(
0
,
0
,
0
,
0
));
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
posq
.
download
(
oldPosq
);
velm
.
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
...
...
@@ -1096,31 +1073,31 @@ bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) {
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
posq
->
upload
(
newPosq
);
posqCorrection
->
upload
(
newPosqCorrection
);
velm
->
upload
(
newVelm
);
posq
.
upload
(
newPosq
);
posqCorrection
.
upload
(
newPosqCorrection
);
velm
.
upload
(
newVelm
);
}
else
{
vector
<
mm_float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
mm_float4
>
newPosq
(
paddedNumAtoms
,
mm_float4
(
0
,
0
,
0
,
0
));
vector
<
mm_float4
>
oldVelm
(
paddedNumAtoms
);
vector
<
mm_float4
>
newVelm
(
paddedNumAtoms
,
mm_float4
(
0
,
0
,
0
,
0
));
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
posq
.
download
(
oldPosq
);
velm
.
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
posq
->
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
posq
.
upload
(
newPosq
);
velm
.
upload
(
newVelm
);
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
atomIndex
[
i
]
=
i
;
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
}
atomIndexDevice
->
upload
(
atomIndex
);
atomIndexDevice
.
upload
(
atomIndex
);
findMoleculeGroups
();
for
(
auto
listener
:
reorderListeners
)
listener
->
execute
();
...
...
@@ -1152,10 +1129,10 @@ void OpenCLContext::reorderAtomsImpl() {
vector
<
Real4
>
oldPosq
(
paddedNumAtoms
);
vector
<
Real4
>
oldPosqCorrection
(
paddedNumAtoms
);
vector
<
Mixed4
>
oldVelm
(
paddedNumAtoms
);
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
posq
.
download
(
oldPosq
);
velm
.
download
(
oldVelm
);
if
(
useMixedPrecision
)
posqCorrection
->
download
(
oldPosqCorrection
);
posqCorrection
.
download
(
oldPosqCorrection
);
Real
minx
=
oldPosq
[
0
].
x
,
maxx
=
oldPosq
[
0
].
x
;
Real
miny
=
oldPosq
[
0
].
y
,
maxy
=
oldPosq
[
0
].
y
;
Real
minz
=
oldPosq
[
0
].
z
,
maxz
=
oldPosq
[
0
].
z
;
...
...
@@ -1299,11 +1276,11 @@ void OpenCLContext::reorderAtomsImpl() {
atomIndex
[
i
]
=
originalIndex
[
i
];
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
}
posq
->
upload
(
newPosq
);
posq
.
upload
(
newPosq
);
if
(
useMixedPrecision
)
posqCorrection
->
upload
(
newPosqCorrection
);
velm
->
upload
(
newVelm
);
atomIndexDevice
->
upload
(
atomIndex
);
posqCorrection
.
upload
(
newPosqCorrection
);
velm
.
upload
(
newVelm
);
atomIndexDevice
.
upload
(
atomIndex
);
for
(
auto
listener
:
reorderListeners
)
listener
->
execute
();
}
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
d59b0373
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -532,11 +532,6 @@ private:
const HarmonicBondForce& force;
};
OpenCLCalcHarmonicBondForceKernel::~OpenCLCalcHarmonicBondForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumBonds()/numContexts;
...
...
@@ -545,18 +540,18 @@ void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const H
if (numBonds == 0)
return;
vector<vector<int> > atoms(numBonds, vector<int>(2));
params
= OpenCLArray::creat
e<mm_float2>(cl, numBonds, "bondParams");
params
.initializ
e<mm_float2>(cl, numBonds, "bondParams");
vector<mm_float2> paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
double length, k;
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k);
paramVector[i] = mm_float2((cl_float) length, (cl_float) k);
}
params
->
upload(paramVector);
params
.
upload(paramVector);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::harmonicBondForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
->
getDeviceBuffer(), "float2");
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
.
getDeviceBuffer(), "float2");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::bondForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cl.addForce(info);
...
...
@@ -584,7 +579,7 @@ void OpenCLCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& con
force.getBondParameters(startIndex+i, atom1, atom2, length, k);
paramVector[i] = mm_float2((cl_float) length, (cl_float) k);
}
params
->
upload(paramVector);
params
.
upload(paramVector);
// Mark that the current reordering may be invalid.
...
...
@@ -623,8 +618,6 @@ private:
OpenCLCalcCustomBondForceKernel::~OpenCLCalcCustomBondForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
...
...
@@ -671,9 +664,9 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customBondGlobals", CL_MEM_READ_ONLY);
globals
->
upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals
->
getDeviceBuffer(), "float");
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customBondGlobals", CL_MEM_READ_ONLY);
globals
.
upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals
.
getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]";
...
...
@@ -702,7 +695,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
}
double OpenCLCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
...
@@ -711,7 +704,7 @@ double OpenCLCalcCustomBondForceKernel::execute(ContextImpl& context, bool inclu
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
return 0.0;
}
...
...
@@ -770,11 +763,6 @@ private:
const HarmonicAngleForce& force;
};
OpenCLCalcHarmonicAngleForceKernel::~OpenCLCalcHarmonicAngleForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumAngles()/numContexts;
...
...
@@ -783,7 +771,7 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const
if (numAngles == 0)
return;
vector<vector<int> > atoms(numAngles, vector<int>(3));
params
= OpenCLArray::creat
e<mm_float2>(cl, numAngles, "angleParams");
params
.initializ
e<mm_float2>(cl, numAngles, "angleParams");
vector<mm_float2> paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
double angle, k;
...
...
@@ -791,11 +779,11 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const
paramVector[i] = mm_float2((cl_float) angle, (cl_float) k);
}
params
->
upload(paramVector);
params
.
upload(paramVector);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::harmonicAngleForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
->
getDeviceBuffer(), "float2");
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
.
getDeviceBuffer(), "float2");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::angleForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cl.addForce(info);
...
...
@@ -823,7 +811,7 @@ void OpenCLCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& co
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k);
paramVector[i] = mm_float2((cl_float) angle, (cl_float) k);
}
params
->
upload(paramVector);
params
.
upload(paramVector);
// Mark that the current reordering may be invalid.
...
...
@@ -863,8 +851,6 @@ private:
OpenCLCalcCustomAngleForceKernel::~OpenCLCalcCustomAngleForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
...
...
@@ -911,9 +897,9 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
variables[name] = "angleParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customAngleGlobals", CL_MEM_READ_ONLY);
globals
->
upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals
->
getDeviceBuffer(), "float");
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customAngleGlobals", CL_MEM_READ_ONLY);
globals
.
upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals
.
getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]";
...
...
@@ -942,7 +928,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
}
double OpenCLCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
...
@@ -951,7 +937,7 @@ double OpenCLCalcCustomAngleForceKernel::execute(ContextImpl& context, bool incl
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
return 0.0;
}
...
...
@@ -1011,11 +997,6 @@ private:
const PeriodicTorsionForce& force;
};
OpenCLCalcPeriodicTorsionForceKernel::~OpenCLCalcPeriodicTorsionForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
...
...
@@ -1024,7 +1005,7 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons
if (numTorsions == 0)
return;
vector<vector<int> > atoms(numTorsions, vector<int>(4));
params
= OpenCLArray::creat
e<mm_float4>(cl, numTorsions, "periodicTorsionParams");
params
.initializ
e<mm_float4>(cl, numTorsions, "periodicTorsionParams");
vector<mm_float4> paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int periodicity;
...
...
@@ -1032,11 +1013,11 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k);
paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f);
}
params
->
upload(paramVector);
params
.
upload(paramVector);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::periodicTorsionForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
->
getDeviceBuffer(), "float4");
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
.
getDeviceBuffer(), "float4");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cl.addForce(info);
...
...
@@ -1064,7 +1045,7 @@ void OpenCLCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl&
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k);
paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f);
}
params
->
upload(paramVector);
params
.
upload(paramVector);
// Mark that the current reordering may be invalid.
...
...
@@ -1099,11 +1080,6 @@ private:
const RBTorsionForce& force;
};
OpenCLCalcRBTorsionForceKernel::~OpenCLCalcRBTorsionForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
...
...
@@ -1112,7 +1088,7 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo
if (numTorsions == 0)
return;
vector<vector<int> > atoms(numTorsions, vector<int>(4));
params
= OpenCLArray::creat
e<mm_float8>(cl, numTorsions, "rbTorsionParams");
params
.initializ
e<mm_float8>(cl, numTorsions, "rbTorsionParams");
vector<mm_float8> paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
double c0, c1, c2, c3, c4, c5;
...
...
@@ -1120,11 +1096,11 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo
paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f);
}
params
->
upload(paramVector);
params
.
upload(paramVector);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::rbTorsionForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
->
getDeviceBuffer(), "float8");
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
.
getDeviceBuffer(), "float8");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cl.addForce(info);
...
...
@@ -1152,7 +1128,7 @@ void OpenCLCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& contex
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, c0, c1, c2, c3, c4, c5);
paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f);
}
params
->
upload(paramVector);
params
.
upload(paramVector);
// Mark that the current reordering may be invalid.
...
...
@@ -1189,15 +1165,6 @@ private:
const CMAPTorsionForce& force;
};
OpenCLCalcCMAPTorsionForceKernel::~OpenCLCalcCMAPTorsionForceKernel() {
if (coefficients != NULL)
delete coefficients;
if (mapPositions != NULL)
delete mapPositions;
if (torsionMaps != NULL)
delete torsionMaps;
}
void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
...
...
@@ -1228,17 +1195,17 @@ void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CM
vector<cl_int> torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++)
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
coefficients
= OpenCLArray::creat
e<mm_float4>(cl, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions
= OpenCLArray::creat
e<mm_int2>(cl, numMaps, "cmapTorsionMapPositions");
torsionMaps
= OpenCLArray::creat
e<cl_int>(cl, numTorsions, "cmapTorsionMaps");
coefficients
->
upload(coeffVec);
mapPositions
->
upload(mapPositionsVec);
torsionMaps
->
upload(torsionMapsVec);
coefficients
.initializ
e<mm_float4>(cl, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions
.initializ
e<mm_int2>(cl, numMaps, "cmapTorsionMapPositions");
torsionMaps
.initializ
e<cl_int>(cl, numTorsions, "cmapTorsionMaps");
coefficients
.
upload(coeffVec);
mapPositions
.
upload(mapPositionsVec);
torsionMaps
.
upload(torsionMapsVec);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COEFF"] = cl.getBondedUtilities().addArgument(coefficients
->
getDeviceBuffer(), "float4");
replacements["MAP_POS"] = cl.getBondedUtilities().addArgument(mapPositions
->
getDeviceBuffer(), "int2");
replacements["MAPS"] = cl.getBondedUtilities().addArgument(torsionMaps
->
getDeviceBuffer(), "int");
replacements["COEFF"] = cl.getBondedUtilities().addArgument(coefficients
.
getDeviceBuffer(), "float4");
replacements["MAP_POS"] = cl.getBondedUtilities().addArgument(mapPositions
.
getDeviceBuffer(), "int2");
replacements["MAPS"] = cl.getBondedUtilities().addArgument(torsionMaps
.
getDeviceBuffer(), "int");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::cmapTorsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cl.addForce(info);
...
...
@@ -1254,9 +1221,9 @@ void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& cont
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (mapPositions
->
getSize() != numMaps)
if (mapPositions
.
getSize() != numMaps)
throw OpenMMException("updateParametersInContext: The number of maps has changed");
if (torsionMaps
->
getSize() != numTorsions)
if (torsionMaps
.
getSize() != numTorsions)
throw OpenMMException("updateParametersInContext: The number of CMAP torsions has changed");
// Update the maps.
...
...
@@ -1279,7 +1246,7 @@ void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& cont
coeffVec.push_back(mm_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
}
}
coefficients
->
upload(coeffVec);
coefficients
.
upload(coeffVec);
// Update the indices.
...
...
@@ -1288,7 +1255,7 @@ void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& cont
int index[8];
force.getTorsionParameters(i, torsionMapsVec[i], index[0], index[1], index[2], index[3], index[4], index[5], index[6], index[7]);
}
torsionMaps
->
upload(torsionMapsVec);
torsionMaps
.
upload(torsionMapsVec);
}
class OpenCLCalcCustomTorsionForceKernel::ForceInfo : public OpenCLForceInfo {
...
...
@@ -1325,8 +1292,6 @@ private:
OpenCLCalcCustomTorsionForceKernel::~OpenCLCalcCustomTorsionForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
...
...
@@ -1373,9 +1338,9 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
variables[name] = "torsionParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customTorsionGlobals", CL_MEM_READ_ONLY);
globals
->
upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals
->
getDeviceBuffer(), "float");
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customTorsionGlobals", CL_MEM_READ_ONLY);
globals
.
upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals
.
getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]";
...
...
@@ -1404,7 +1369,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
}
double OpenCLCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
...
@@ -1413,7 +1378,7 @@ double OpenCLCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool in
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
return 0.0;
}
...
...
@@ -1479,27 +1444,23 @@ private:
class OpenCLCalcNonbondedForceKernel::PmeIO : public CalcPmeReciprocalForceKernel::IO {
public:
PmeIO(OpenCLContext& cl, cl::Kernel addForcesKernel) : cl(cl), addForcesKernel(addForcesKernel), forceTemp(NULL) {
forceTemp = OpenCLArray::create<mm_float4>(cl, cl.getNumAtoms(), "PmeForce");
addForcesKernel.setArg<cl::Buffer>(0, forceTemp->getDeviceBuffer());
}
~PmeIO() {
if (forceTemp != NULL)
delete forceTemp;
PmeIO(OpenCLContext& cl, cl::Kernel addForcesKernel) : cl(cl), addForcesKernel(addForcesKernel) {
forceTemp.initialize<mm_float4>(cl, cl.getNumAtoms(), "PmeForce");
addForcesKernel.setArg<cl::Buffer>(0, forceTemp.getDeviceBuffer());
}
float* getPosq() {
cl.getPosq().download(posq);
return (float*) &posq[0];
}
void setForce(float* force) {
forceTemp
->
upload(force);
forceTemp
.
upload(force);
addForcesKernel.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
cl.executeKernel(addForcesKernel, cl.getNumAtoms());
}
private:
OpenCLContext& cl;
vector<mm_float4> posq;
OpenCLArray
*
forceTemp;
OpenCLArray forceTemp;
cl::Kernel addForcesKernel;
};
...
...
@@ -1577,36 +1538,6 @@ private:
};
OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
if (sigmaEpsilon != NULL)
delete sigmaEpsilon;
if (exceptionParams != NULL)
delete exceptionParams;
if (cosSinSums != NULL)
delete cosSinSums;
if (pmeGrid != NULL)
delete pmeGrid;
if (pmeGrid2 != NULL)
delete pmeGrid2;
if (pmeBsplineModuliX != NULL)
delete pmeBsplineModuliX;
if (pmeBsplineModuliY != NULL)
delete pmeBsplineModuliY;
if (pmeBsplineModuliZ != NULL)
delete pmeBsplineModuliZ;
if (pmeDispersionBsplineModuliX != NULL)
delete pmeDispersionBsplineModuliX;
if (pmeDispersionBsplineModuliY != NULL)
delete pmeDispersionBsplineModuliY;
if (pmeDispersionBsplineModuliZ != NULL)
delete pmeDispersionBsplineModuliZ;
if (pmeBsplineTheta != NULL)
delete pmeBsplineTheta;
if (pmeAtomRange != NULL)
delete pmeAtomRange;
if (pmeAtomGridIndex != NULL)
delete pmeAtomGridIndex;
if (pmeEnergyBuffer != NULL)
delete pmeEnergyBuffer;
if (sort != NULL)
delete sort;
if (fft != NULL)
...
...
@@ -1635,7 +1566,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
sigmaEpsilon
= OpenCLArray::creat
e<mm_float2>(cl, cl.getPaddedNumAtoms(), "sigmaEpsilon");
sigmaEpsilon
.initializ
e<mm_float2>(cl, cl.getPaddedNumAtoms(), "sigmaEpsilon");
vector<mm_float4> posqf(cl.getPaddedNumAtoms(), mm_float4(0,0,0,0));
vector<mm_double4> posqd(cl.getPaddedNumAtoms(), mm_double4(0,0,0,0));
vector<mm_float2> sigmaEpsilonVector(cl.getPaddedNumAtoms(), mm_float2(0,0));
...
...
@@ -1671,7 +1602,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
cl.getPosq().upload(posqd);
else
cl.getPosq().upload(posqf);
sigmaEpsilon
->
upload(sigmaEpsilonVector);
sigmaEpsilon
.
upload(sigmaEpsilonVector);
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
...
...
@@ -1726,7 +1657,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums");
ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double2) : sizeof(mm_float2));
cosSinSums
= new OpenCLArray
(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
cosSinSums
.initialize
(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
...
...
@@ -1796,26 +1727,26 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
int gridElements = gridSizeX*gridSizeY*gridSizeZ;
if (doLJPME)
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
pmeGrid
= new OpenCLArray
(cl, gridElements, 2*elementSize, "pmeGrid");
pmeGrid2
= new OpenCLArray
(cl, gridElements, 2*elementSize, "pmeGrid2");
pmeGrid
.initialize
(cl, gridElements, 2*elementSize, "pmeGrid");
pmeGrid2
.initialize
(cl, gridElements, 2*elementSize, "pmeGrid2");
if (cl.getSupports64BitGlobalAtomics())
cl.addAutoclearBuffer(
*
pmeGrid2);
cl.addAutoclearBuffer(pmeGrid2);
else
cl.addAutoclearBuffer(
*
pmeGrid);
pmeBsplineModuliX
= new OpenCLArray
(cl, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY
= new OpenCLArray
(cl, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ
= new OpenCLArray
(cl, gridSizeZ, elementSize, "pmeBsplineModuliZ");
cl.addAutoclearBuffer(pmeGrid);
pmeBsplineModuliX
.initialize
(cl, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY
.initialize
(cl, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ
.initialize
(cl, gridSizeZ, elementSize, "pmeBsplineModuliZ");
if (doLJPME) {
pmeDispersionBsplineModuliX
= new OpenCLArray
(cl, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY
= new OpenCLArray
(cl, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ
= new OpenCLArray
(cl, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
pmeDispersionBsplineModuliX
.initialize
(cl, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY
.initialize
(cl, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ
.initialize
(cl, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
}
pmeBsplineTheta
= new OpenCLArray
(cl, PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta");
pmeAtomRange
= OpenCLArray::creat
e<cl_int>(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex
= OpenCLArray::creat
e<mm_int2>(cl, numParticles, "pmeAtomGridIndex");
pmeBsplineTheta
.initialize
(cl, PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta");
pmeAtomRange
.initializ
e<cl_int>(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex
.initializ
e<mm_int2>(cl, numParticles, "pmeAtomGridIndex");
int energyElementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
pmeEnergyBuffer
= new OpenCLArray
(cl, cl.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cl.clearBuffer(
*
pmeEnergyBuffer);
pmeEnergyBuffer
.initialize
(cl, cl.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cl.clearBuffer(pmeEnergyBuffer);
sort = new OpenCLSort(cl, new SortTrait(), cl.getNumAtoms());
fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ, true);
if (doLJPME)
...
...
@@ -1832,7 +1763,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup();
cl.addPreComputation(new SyncQueuePreComputation(cl, pmeQueue, recipForceGroup));
cl.addPostComputation(syncQueue = new SyncQueuePostComputation(cl, pmeSyncEvent,
*
pmeEnergyBuffer, recipForceGroup));
cl.addPostComputation(syncQueue = new SyncQueuePostComputation(cl, pmeSyncEvent, pmeEnergyBuffer, recipForceGroup));
}
// Initialize the b-spline moduli.
...
...
@@ -1844,9 +1775,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
xsize = gridSizeX;
ysize = gridSizeY;
zsize = gridSizeZ;
xmoduli = pmeBsplineModuliX;
ymoduli = pmeBsplineModuliY;
zmoduli = pmeBsplineModuliZ;
xmoduli =
&
pmeBsplineModuliX;
ymoduli =
&
pmeBsplineModuliY;
zmoduli =
&
pmeBsplineModuliZ;
}
else {
if (!doLJPME)
...
...
@@ -1854,9 +1785,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
xsize = dispersionGridSizeX;
ysize = dispersionGridSizeY;
zsize = dispersionGridSizeZ;
xmoduli = pmeDispersionBsplineModuliX;
ymoduli = pmeDispersionBsplineModuliY;
zmoduli = pmeDispersionBsplineModuliZ;
xmoduli =
&
pmeDispersionBsplineModuliX;
ymoduli =
&
pmeDispersionBsplineModuliY;
zmoduli =
&
pmeDispersionBsplineModuliZ;
}
int maxSize = max(max(xsize, ysize), zsize);
vector<double> data(PmeOrder);
...
...
@@ -1938,7 +1869,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
if (hasLJ)
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2, sizeof(cl_float2), sigmaEpsilon
->
getDeviceBuffer()));
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2, sizeof(cl_float2), sigmaEpsilon
.
getDeviceBuffer()));
// Initialize the exceptions.
...
...
@@ -1949,7 +1880,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams
= OpenCLArray::creat
e<mm_float4>(cl, numExceptions, "exceptionParams");
exceptionParams
.initializ
e<mm_float4>(cl, numExceptions, "exceptionParams");
vector<mm_float4> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
...
...
@@ -1957,9 +1888,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
exceptionParams
->
upload(exceptionParamsVector);
exceptionParams
.
upload(exceptionParamsVector);
map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exceptionParams
->
getDeviceBuffer(), "float4");
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exceptionParams
.
getDeviceBuffer(), "float4");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
...
...
@@ -1970,15 +1901,15 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
if (!hasInitializedKernel) {
hasInitializedKernel = true;
if (cosSinSums
!= NULL
) {
if (cosSinSums
.isInitialized()
) {
ewaldSumsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
ewaldSumsKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
ewaldSumsKernel.setArg<cl::Buffer>(2, cosSinSums
->
getDeviceBuffer());
ewaldSumsKernel.setArg<cl::Buffer>(2, cosSinSums
.
getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(0, cl.getForceBuffers().getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(2, cosSinSums
->
getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(2, cosSinSums
.
getDeviceBuffer());
}
if (pmeGrid
!= NULL
) {
if (pmeGrid
.isInitialized()
) {
// Create kernels for Coulomb PME.
cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
...
...
@@ -1991,39 +1922,39 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta
->
getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta
.
getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*elementSize, NULL);
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(3, pmeAtomGridIndex
->
getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
->
getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange
->
getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(3, pmeAtomGridIndex
.
getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
.
getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange
.
getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
pmeZIndexKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
->
getDeviceBuffer());
pmeZIndexKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
.
getDeviceBuffer());
pmeZIndexKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex
->
getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange
->
getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex
.
getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange
.
getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics())
pmeSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid2
->
getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid2
.
getDeviceBuffer());
else
pmeSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid
->
getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(4, pmeBsplineTheta
->
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(0, pmeGrid2
->
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(1, pmeBsplineModuliX
->
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(2, pmeBsplineModuliY
->
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(3, pmeBsplineModuliZ
->
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(0, pmeGrid2
->
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(1, usePmeQueue ? pmeEnergyBuffer
->
getDeviceBuffer() : cl.getEnergyBuffer().getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(2, pmeBsplineModuliX
->
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(3, pmeBsplineModuliY
->
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(4, pmeBsplineModuliZ
->
getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid
.
getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(4, pmeBsplineTheta
.
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(0, pmeGrid2
.
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(1, pmeBsplineModuliX
.
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(2, pmeBsplineModuliY
.
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(3, pmeBsplineModuliZ
.
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(0, pmeGrid2
.
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(1, usePmeQueue ? pmeEnergyBuffer
.
getDeviceBuffer() : cl.getEnergyBuffer().getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(2, pmeBsplineModuliX
.
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(3, pmeBsplineModuliY
.
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(4, pmeBsplineModuliZ
.
getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid
->
getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(11, pmeAtomGridIndex
->
getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid
.
getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(11, pmeAtomGridIndex
.
getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) {
pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2
->
getDeviceBuffer());
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(1, pmeGrid
->
getDeviceBuffer());
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2
.
getDeviceBuffer());
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(1, pmeGrid
.
getDeviceBuffer());
}
if (usePmeQueue)
syncQueue->setKernel(cl::Kernel(program, "addEnergy"));
...
...
@@ -2048,50 +1979,50 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeDispersionInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta
->
getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta
.
getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*elementSize, NULL);
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(3, pmeAtomGridIndex
->
getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(12, sigmaEpsilon
->
getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
->
getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange
->
getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(3, pmeAtomGridIndex
.
getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(12, sigmaEpsilon
.
getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
.
getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange
.
getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
pmeDispersionZIndexKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
->
getDeviceBuffer());
pmeDispersionZIndexKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
.
getDeviceBuffer());
pmeDispersionZIndexKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex
->
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange
->
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex
.
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange
.
getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics())
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid2
->
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid2
.
getDeviceBuffer());
else
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid
->
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(4, pmeBsplineTheta
->
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid
.
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(4, pmeBsplineTheta
.
getDeviceBuffer());
if (deviceIsCpu || cl.getSupports64BitGlobalAtomics())
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(13, sigmaEpsilon
->
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(13, sigmaEpsilon
.
getDeviceBuffer());
else
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(5, sigmaEpsilon
->
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(0, pmeGrid2
->
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(1, pmeDispersionBsplineModuliX
->
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(2, pmeDispersionBsplineModuliY
->
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(3, pmeDispersionBsplineModuliZ
->
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(0, pmeGrid2
->
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(1, usePmeQueue ? pmeEnergyBuffer
->
getDeviceBuffer() : cl.getEnergyBuffer().getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(2, pmeDispersionBsplineModuliX
->
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(3, pmeDispersionBsplineModuliY
->
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(4, pmeDispersionBsplineModuliZ
->
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(5, sigmaEpsilon
.
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(0, pmeGrid2
.
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(1, pmeDispersionBsplineModuliX
.
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(2, pmeDispersionBsplineModuliY
.
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(3, pmeDispersionBsplineModuliZ
.
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(0, pmeGrid2
.
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(1, usePmeQueue ? pmeEnergyBuffer
.
getDeviceBuffer() : cl.getEnergyBuffer().getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(2, pmeDispersionBsplineModuliX
.
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(3, pmeDispersionBsplineModuliY
.
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(4, pmeDispersionBsplineModuliZ
.
getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid
->
getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(11, pmeAtomGridIndex
->
getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(12, sigmaEpsilon
->
getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid
.
getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(11, pmeAtomGridIndex
.
getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(12, sigmaEpsilon
.
getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) {
pmeDispersionFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeDispersionFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2
->
getDeviceBuffer());
pmeDispersionFinishSpreadChargeKernel.setArg<cl::Buffer>(1, pmeGrid
->
getDeviceBuffer());
pmeDispersionFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2
.
getDeviceBuffer());
pmeDispersionFinishSpreadChargeKernel.setArg<cl::Buffer>(1, pmeGrid
.
getDeviceBuffer());
}
}
}
}
if (cosSinSums
!= NULL
&& includeReciprocal) {
if (cosSinSums
.isInitialized()
&& includeReciprocal) {
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
mm_double4 recipBoxSize = mm_double4(2*M_PI/boxSize.x, 2*M_PI/boxSize.y, 2*M_PI/boxSize.z, 0.0);
double recipCoefficient = ONE_4PI_EPS0*4*M_PI/(boxSize.x*boxSize.y*boxSize.z);
...
...
@@ -2107,10 +2038,10 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
ewaldForcesKernel.setArg<mm_float4>(3, mm_float4((float) recipBoxSize.x, (float) recipBoxSize.y, (float) recipBoxSize.z, 0));
ewaldForcesKernel.setArg<cl_float>(4, (cl_float) recipCoefficient);
}
cl.executeKernel(ewaldSumsKernel, cosSinSums
->
getSize());
cl.executeKernel(ewaldSumsKernel, cosSinSums
.
getSize());
cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms());
}
if (pmeGrid
!= NULL
&& includeReciprocal) {
if (pmeGrid
.isInitialized()
&& includeReciprocal) {
if (usePmeQueue && !includeEnergy)
cl.setQueue(pmeQueue);
...
...
@@ -2157,7 +2088,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
}
else {
sort->sort(
*
pmeAtomGridIndex);
sort->sort(pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
...
...
@@ -2184,7 +2115,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
}
}
fft->execFFT(
*
pmeGrid,
*
pmeGrid2, true);
fft->execFFT(pmeGrid, pmeGrid2, true);
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
if (cl.getUseDoublePrecision()) {
pmeConvolutionKernel.setArg<mm_double4>(4, recipBoxVectors[0]);
...
...
@@ -2205,7 +2136,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (includeEnergy)
cl.executeKernel(pmeEvalEnergyKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeConvolutionKernel, gridSizeX*gridSizeY*gridSizeZ);
fft->execFFT(
*
pmeGrid2,
*
pmeGrid, false);
fft->execFFT(pmeGrid2, pmeGrid, false);
setPeriodicBoxArgs(cl, pmeInterpolateForceKernel, 3);
if (cl.getUseDoublePrecision()) {
pmeInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]);
...
...
@@ -2236,7 +2167,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
}
cl.executeKernel(pmeDispersionUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) {
cl.clearBuffer(
*
pmeGrid);
cl.clearBuffer(pmeGrid);
setPeriodicBoxArgs(cl, pmeDispersionSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
pmeDispersionSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
...
...
@@ -2251,9 +2182,9 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeDispersionSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
}
else {
sort->sort(
*
pmeAtomGridIndex);
sort->sort(pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) {
cl.clearBuffer(
*
pmeGrid2);
cl.clearBuffer(pmeGrid2);
setPeriodicBoxArgs(cl, pmeDispersionSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
pmeDispersionSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
...
...
@@ -2269,7 +2200,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeDispersionFinishSpreadChargeKernel, gridSizeX*gridSizeY*gridSizeZ);
}
else {
cl.clearBuffer(
*
pmeGrid);
cl.clearBuffer(pmeGrid);
cl.executeKernel(pmeDispersionAtomRangeKernel, cl.getNumAtoms());
setPeriodicBoxSizeArg(cl, pmeDispersionZIndexKernel, 2);
if (cl.getUseDoublePrecision())
...
...
@@ -2280,7 +2211,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeDispersionSpreadChargeKernel, cl.getNumAtoms());
}
}
dispersionFft->execFFT(
*
pmeGrid,
*
pmeGrid2, true);
dispersionFft->execFFT(pmeGrid, pmeGrid2, true);
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
if (cl.getUseDoublePrecision()) {
pmeDispersionConvolutionKernel.setArg<mm_double4>(4, recipBoxVectors[0]);
...
...
@@ -2301,7 +2232,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (includeEnergy)
cl.executeKernel(pmeDispersionEvalEnergyKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeDispersionConvolutionKernel, gridSizeX*gridSizeY*gridSizeZ);
fft->execFFT(
*
pmeGrid2,
*
pmeGrid, false);
fft->execFFT(pmeGrid2, pmeGrid, false);
setPeriodicBoxArgs(cl, pmeDispersionInterpolateForceKernel, 3);
if (cl.getUseDoublePrecision()) {
pmeDispersionInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]);
...
...
@@ -2374,7 +2305,7 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
sumSquaredCharges += charge*charge;
}
cl.setCharges(chargeVector);
sigmaEpsilon
->
upload(sigmaEpsilonVector);
sigmaEpsilon
.
upload(sigmaEpsilonVector);
// Record the exceptions.
...
...
@@ -2386,7 +2317,7 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
}
exceptionParams
->
upload(exceptionParamsVector);
exceptionParams
.
upload(exceptionParamsVector);
}
// Compute other values.
...
...
@@ -2473,12 +2404,6 @@ private:
OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
if (interactionGroupData != NULL)
delete interactionGroupData;
for (auto function : tabulatedFunctions)
delete function;
if (forceCopy != NULL)
delete forceCopy;
}
...
...
@@ -2494,7 +2419,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
int numParticles = force.getNumParticles();
params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
if (force.getNumGlobalParameters() > 0)
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customNonbondedGlobals", CL_MEM_READ_ONLY);
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customNonbondedGlobals", CL_MEM_READ_ONLY);
vector<vector<cl_float> > paramVector(numParticles);
vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
...
...
@@ -2519,7 +2444,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
vector<string> tableTypes;
for (int i = 0; i < force.getNumFunctions(); i++) {
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string arrayName = prefix+"table"+cl.intToString(i);
...
...
@@ -2527,9 +2453,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(OpenCLArray::creat
e<float>(cl, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
tabulatedFunctions.size()-1]->
getDeviceBuffer()));
tabulatedFunctions
[i].initializ
e<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[
i].
upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
i].
getDeviceBuffer()));
if (width == 1)
tableTypes.push_back("float");
else
...
...
@@ -2544,8 +2470,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
if (globals
!= NULL
)
globals
->
upload(globalParamValues);
if (globals
.isInitialized()
)
globals
.
upload(globalParamValues);
bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
...
...
@@ -2599,9 +2525,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"params"+cl.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
if (globals
!= NULL
) {
globals
->
upload(globalParamValues);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals
->
getDeviceBuffer()));
if (globals
.isInitialized()
) {
globals
.
upload(globalParamValues);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals
.
getDeviceBuffer()));
}
}
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
...
...
@@ -2785,8 +2711,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
for (; indexInTileSet < 32; indexInTileSet++)
groupData.push_back(mm_int4(0, 0, minSize<<16, 0));
}
interactionGroupData
= OpenCLArray::creat
e<mm_int4>(cl, groupData.size(), "interactionGroupData");
interactionGroupData
->
upload(groupData);
interactionGroupData
.initializ
e<mm_int4>(cl, groupData.size(), "interactionGroupData");
interactionGroupData
.
upload(groupData);
// Create the kernel.
...
...
@@ -2812,7 +2738,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
args<<", __global const "<<buffers[i].getType()<<"* restrict global_params"<<(i+1);
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
args << ", __global const " << tableTypes[i]<< "* restrict table" << i;
if (globals
!= NULL
)
if (globals
.isInitialized()
)
args<<", __global const float* restrict globals";
if (hasParamDerivs)
args << ", __global mixed* restrict energyParamDerivs";
...
...
@@ -2883,7 +2809,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
}
double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
...
@@ -2892,7 +2818,7 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
globalParamValues[i] = value;
}
if (changed) {
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
if (forceCopy != NULL) {
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true;
...
...
@@ -2903,7 +2829,7 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true;
}
if (interactionGroupData
!= NULL
) {
if (interactionGroupData
.isInitialized()
) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
int index = 0;
...
...
@@ -2911,14 +2837,14 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
interactionGroupKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer() : cl.getForceBuffers()).getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, interactionGroupData
->
getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, interactionGroupData
.
getDeviceBuffer());
index += 5;
for (auto& buffer : params->getBuffers())
interactionGroupKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto function : tabulatedFunctions)
interactionGroupKernel.setArg<cl::Memory>(index++, function
->
getDeviceBuffer());
if (globals
!= NULL
)
interactionGroupKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
for (auto
&
function : tabulatedFunctions)
interactionGroupKernel.setArg<cl::Memory>(index++, function
.
getDeviceBuffer());
if (globals
.isInitialized()
)
interactionGroupKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
if (hasParamDerivs)
interactionGroupKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer());
}
...
...
@@ -2978,43 +2904,26 @@ private:
const GBSAOBCForce& force;
};
OpenCLCalcGBSAOBCForceKernel::~OpenCLCalcGBSAOBCForceKernel() {
if (params != NULL)
delete params;
if (bornSum != NULL)
delete bornSum;
if (longBornSum != NULL)
delete longBornSum;
if (bornRadii != NULL)
delete bornRadii;
if (bornForce != NULL)
delete bornForce;
if (longBornForce != NULL)
delete longBornForce;
if (obcChain != NULL)
delete obcChain;
}
void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
if (cl.getPlatformData().contexts.size() > 1)
throw OpenMMException("GBSAOBCForce does not support using multiple OpenCL devices");
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
params
= OpenCLArray::creat
e<mm_float2>(cl, cl.getPaddedNumAtoms(), "gbsaObcParams");
params
.initializ
e<mm_float2>(cl, cl.getPaddedNumAtoms(), "gbsaObcParams");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
bornRadii
= new OpenCLArray
(cl, cl.getPaddedNumAtoms(), elementSize, "bornRadii");
obcChain
= new OpenCLArray
(cl, cl.getPaddedNumAtoms(), elementSize, "obcChain");
bornRadii
.initialize
(cl, cl.getPaddedNumAtoms(), elementSize, "bornRadii");
obcChain
.initialize
(cl, cl.getPaddedNumAtoms(), elementSize, "obcChain");
if (cl.getSupports64BitGlobalAtomics()) {
longBornSum
= OpenCLArray::creat
e<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornSum");
longBornForce
= OpenCLArray::creat
e<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornForce");
bornForce
= new OpenCLArray
(cl, cl.getPaddedNumAtoms(), elementSize, "bornForce");
cl.addAutoclearBuffer(
*
longBornSum);
cl.addAutoclearBuffer(
*
longBornForce);
longBornSum
.initializ
e<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornSum");
longBornForce
.initializ
e<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornForce");
bornForce
.initialize
(cl, cl.getPaddedNumAtoms(), elementSize, "bornForce");
cl.addAutoclearBuffer(longBornSum);
cl.addAutoclearBuffer(longBornForce);
}
else {
bornSum
= new OpenCLArray
(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "bornSum");
bornForce
= new OpenCLArray
(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "bornForce");
cl.addAutoclearBuffer(
*
bornSum);
cl.addAutoclearBuffer(
*
bornForce);
bornSum
.initialize
(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "bornSum");
bornForce
.initialize
(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "bornForce");
cl.addAutoclearBuffer(bornSum);
cl.addAutoclearBuffer(bornForce);
}
vector<mm_float4> posqf(cl.getPaddedNumAtoms());
vector<mm_double4> posqd(cl.getPaddedNumAtoms());
...
...
@@ -3034,7 +2943,7 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
cl.getPosq().upload(posqd);
else
cl.getPosq().upload(posqf);
params
->
upload(paramsVector);
params
.
upload(paramsVector);
prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy();
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
...
...
@@ -3042,8 +2951,8 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
cutoff = force.getCutoffDistance();
string source = OpenCLKernelSources::gbsaObc2;
nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params
->
getDeviceBuffer()));;
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("bornForce", "real", 1, elementSize, bornForce
->
getDeviceBuffer()));;
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params
.
getDeviceBuffer()));;
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("bornForce", "real", 1, elementSize, bornForce
.
getDeviceBuffer()));;
info = new ForceInfo(nb.getNumForceBuffers(), force);
cl.addForce(info);
}
...
...
@@ -3089,9 +2998,9 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
bool useLong = cl.getSupports64BitGlobalAtomics();
int index = 0;
computeBornSumKernel = cl::Kernel(program, "computeBornSum");
computeBornSumKernel.setArg<cl::Buffer>(index++, (useLong ? longBornSum
->
getDeviceBuffer() : bornSum
->
getDeviceBuffer()));
computeBornSumKernel.setArg<cl::Buffer>(index++, (useLong ? longBornSum
.
getDeviceBuffer() : bornSum
.
getDeviceBuffer()));
computeBornSumKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, params
->
getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, params
.
getDeviceBuffer());
if (nb.getUseCutoff()) {
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
...
...
@@ -3107,10 +3016,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
force1Kernel = cl::Kernel(program, "computeGBSAForce1");
index = 0;
force1Kernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
force1Kernel.setArg<cl::Buffer>(index++, (useLong ? longBornForce
->
getDeviceBuffer() : bornForce
->
getDeviceBuffer()));
force1Kernel.setArg<cl::Buffer>(index++, (useLong ? longBornForce
.
getDeviceBuffer() : bornForce
.
getDeviceBuffer()));
force1Kernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, bornRadii
->
getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, bornRadii
.
getDeviceBuffer());
index++; // Whether to include energy.
if (nb.getUseCutoff()) {
force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
...
...
@@ -3131,21 +3040,21 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
reduceBornSumKernel.setArg<cl_float>(2, 1.0f);
reduceBornSumKernel.setArg<cl_float>(3, 0.8f);
reduceBornSumKernel.setArg<cl_float>(4, 4.85f);
reduceBornSumKernel.setArg<cl::Buffer>(5, (useLong ? longBornSum
->
getDeviceBuffer() : bornSum
->
getDeviceBuffer()));
reduceBornSumKernel.setArg<cl::Buffer>(6, params
->
getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(7, bornRadii
->
getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(8, obcChain
->
getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(5, (useLong ? longBornSum
.
getDeviceBuffer() : bornSum
.
getDeviceBuffer()));
reduceBornSumKernel.setArg<cl::Buffer>(6, params
.
getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(7, bornRadii
.
getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(8, obcChain
.
getDeviceBuffer());
reduceBornForceKernel = cl::Kernel(program, "reduceBornForce");
index = 0;
reduceBornForceKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
reduceBornForceKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
reduceBornForceKernel.setArg<cl::Buffer>(index++, bornForce
->
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, bornForce
.
getDeviceBuffer());
if (useLong)
reduceBornForceKernel.setArg<cl::Buffer>(index++, longBornForce
->
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, longBornForce
.
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, params
->
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, bornRadii
->
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, obcChain
->
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, params
.
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, bornRadii
.
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, obcChain
.
getDeviceBuffer());
}
force1Kernel.setArg<cl_int>(5, includeEnergy);
if (nb.getUseCutoff()) {
...
...
@@ -3188,7 +3097,7 @@ void OpenCLCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context,
paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius));
}
cl.setCharges(chargeVector);
params
->
upload(paramsVector);
params
.
upload(paramsVector);
// Mark that the current reordering may be invalid.
...
...
@@ -3235,18 +3144,6 @@ OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() {
delete energyDerivs;
if (energyDerivChain != NULL)
delete energyDerivChain;
if (longEnergyDerivs != NULL)
delete longEnergyDerivs;
if (globals != NULL)
delete globals;
if (valueBuffers != NULL)
delete valueBuffers;
if (longValueBuffers != NULL)
delete longValueBuffers;
for (auto function : tabulatedFunctions)
delete function;
for (auto d : dValue0dParam)
delete d;
for (auto d : dValuedParam)
delete d;
}
...
...
@@ -3284,7 +3181,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), paddedNumParticles, "customGBParameters", true);
computedValues = new OpenCLParameterSet(cl, force.getNumComputedValues(), paddedNumParticles, "customGBComputedValues", true, cl.getUseDoublePrecision());
if (force.getNumGlobalParameters() > 0)
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customGBGlobals", CL_MEM_READ_ONLY);
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customGBGlobals", CL_MEM_READ_ONLY);
vector<vector<cl_float> > paramVector(paddedNumParticles, vector<cl_float>(numParams, 0));
vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
...
...
@@ -3308,6 +3205,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
...
...
@@ -3316,9 +3214,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(OpenCLArray::creat
e<float>(cl, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
tabulatedFunctions.size()-1]->
getDeviceBuffer()));
tabulatedFunctions
[i].initializ
e<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[
i].
upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
i].
getDeviceBuffer()));
tableArgs << ", __global const float";
if (width > 1)
tableArgs << width;
...
...
@@ -3333,8 +3231,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
if (globals
!= NULL
)
globals
->
upload(globalParamValues);
if (globals
.isInitialized()
)
globals
.
upload(globalParamValues);
// Record derivatives of expressions needed for the chain rule terms.
...
...
@@ -3385,7 +3283,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
bool useLong = cl.getSupports64BitGlobalAtomics();
if (useLong) {
longEnergyDerivs
= OpenCLArray::creat
e<cl_long>(cl, force.getNumComputedValues()*cl.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
longEnergyDerivs
.initializ
e<cl_long>(cl, force.getNumComputedValues()*cl.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
energyDerivs = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "customGBEnergyDerivatives", true);
}
else
...
...
@@ -3393,13 +3291,14 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
energyDerivChain = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true);
int elementSize = (cl.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
needEnergyParamDerivs = (force.getNumEnergyParameterDerivatives() > 0);
dValue0dParam.resize(force.getNumEnergyParameterDerivatives());
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
dValuedParam.push_back(new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "dValuedParam", true, cl.getUseDoublePrecision()));
if (useLong)
dValue0dParam
.push_back(OpenCLArray::creat
e<cl_long>(cl, cl.getPaddedNumAtoms(), "dValue0dParam")
)
;
dValue0dParam
[i].initializ
e<cl_long>(cl, cl.getPaddedNumAtoms(), "dValue0dParam");
else
dValue0dParam
.push_back(new OpenCLArray
(cl, cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), elementSize, "dValue0dParam")
)
;
cl.addAutoclearBuffer(
*
dValue0dParam
.back()
);
dValue0dParam
[i].initialize
(cl, cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), elementSize, "dValue0dParam");
cl.addAutoclearBuffer(dValue0dParam
[i]
);
string name = force.getEnergyParameterDerivativeName(i);
cl.addEnergyParameterDerivative(name);
}
...
...
@@ -4035,9 +3934,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
parameters.push_back(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
}
if (globals
!= NULL
) {
globals
->
upload(globalParamValues);
arguments.push_back(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals
->
getDeviceBuffer()));
if (globals
.isInitialized()
) {
globals
.
upload(globalParamValues);
arguments.push_back(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals
.
getDeviceBuffer()));
}
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, cutoff, exclusionList, source, force.getForceGroup());
for (auto param : parameters)
...
...
@@ -4048,7 +3947,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
cl.addForce(info);
if (useLong)
cl.addAutoclearBuffer(
*
longEnergyDerivs);
cl.addAutoclearBuffer(longEnergyDerivs);
else {
for (auto& buffer : energyDerivs->getBuffers())
cl.addAutoclearBuffer(buffer.getMemory(), buffer.getSize()*energyDerivs->getNumObjects());
...
...
@@ -4099,21 +3998,21 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0);
bool useLong = cl.getSupports64BitGlobalAtomics();
if (useLong) {
longValueBuffers
= OpenCLArray::creat
e<cl_long>(cl, cl.getPaddedNumAtoms(), "customGBLongValueBuffers");
cl.addAutoclearBuffer(
*
longValueBuffers);
cl.clearBuffer(
*
longValueBuffers);
longValueBuffers
.initializ
e<cl_long>(cl, cl.getPaddedNumAtoms(), "customGBLongValueBuffers");
cl.addAutoclearBuffer(longValueBuffers);
cl.clearBuffer(longValueBuffers);
}
else {
valueBuffers
= new OpenCLArray
(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "customGBValueBuffers");
cl.addAutoclearBuffer(
*
valueBuffers);
cl.clearBuffer(
*
valueBuffers);
valueBuffers
.initialize
(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "customGBValueBuffers");
cl.addAutoclearBuffer(valueBuffers);
cl.clearBuffer(valueBuffers);
}
int index = 0;
pairValueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*4*elementSize, NULL);
pairValueKernel.setArg<cl::Buffer>(index++, cl.getNonbondedUtilities().getExclusions().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, cl.getNonbondedUtilities().getExclusionTiles().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, useLong ? longValueBuffers
->
getDeviceBuffer() : valueBuffers
->
getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, useLong ? longValueBuffers
.
getDeviceBuffer() : valueBuffers
.
getDeviceBuffer());
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*elementSize, NULL);
if (nb.getUseCutoff()) {
pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
...
...
@@ -4126,8 +4025,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
}
else
pairValueKernel.setArg<cl_uint>(index++, cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2);
if (globals
!= NULL
)
pairValueKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
if (globals
.isInitialized()
)
pairValueKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairValueUsesParam[i]) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
...
...
@@ -4135,30 +4034,30 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*buffer.getSize(), NULL);
}
}
for (auto d : dValue0dParam) {
pairValueKernel.setArg<cl::Buffer>(index++, d
->
getDeviceBuffer());
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*d
->
getElementSize(), NULL);
for (auto
&
d : dValue0dParam) {
pairValueKernel.setArg<cl::Buffer>(index++, d
.
getDeviceBuffer());
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*d
.
getElementSize(), NULL);
}
for (auto function : tabulatedFunctions)
pairValueKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
for (auto
&
function : tabulatedFunctions)
pairValueKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
index = 0;
perParticleValueKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
perParticleValueKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
perParticleValueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
perParticleValueKernel.setArg<cl::Buffer>(index++, useLong ? longValueBuffers
->
getDeviceBuffer() : valueBuffers
->
getDeviceBuffer());
if (globals
!= NULL
)
perParticleValueKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
perParticleValueKernel.setArg<cl::Buffer>(index++, useLong ? longValueBuffers
.
getDeviceBuffer() : valueBuffers
.
getDeviceBuffer());
if (globals
.isInitialized()
)
perParticleValueKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (auto& buffer : params->getBuffers())
perParticleValueKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto& buffer : computedValues->getBuffers())
perParticleValueKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (int i = 0; i < dValuedParam.size(); i++) {
perParticleValueKernel.setArg<cl::Memory>(index++, dValue0dParam[i]
->
getDeviceBuffer());
perParticleValueKernel.setArg<cl::Memory>(index++, dValue0dParam[i]
.
getDeviceBuffer());
for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++)
perParticleValueKernel.setArg<cl::Memory>(index++, dValuedParam[i]->getBuffers()[j].getMemory());
}
for (auto function : tabulatedFunctions)
perParticleValueKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
for (auto
&
function : tabulatedFunctions)
perParticleValueKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
index = 0;
pairEnergyKernel.setArg<cl::Buffer>(index++, useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
...
...
@@ -4179,8 +4078,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
}
else
pairEnergyKernel.setArg<cl_uint>(index++, cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2);
if (globals
!= NULL
)
pairEnergyKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
if (globals
.isInitialized()
)
pairEnergyKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairEnergyUsesParam[i]) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
...
...
@@ -4196,7 +4095,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
}
}
if (useLong) {
pairEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs
->
getDeviceBuffer());
pairEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs
.
getDeviceBuffer());
for (int i = 0; i < numComputedValues; ++i)
pairEnergyKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*elementSize, NULL);
}
...
...
@@ -4208,16 +4107,16 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
}
if (needEnergyParamDerivs)
pairEnergyKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer());
for (auto function : tabulatedFunctions)
pairEnergyKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
for (auto
&
function : tabulatedFunctions)
pairEnergyKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
index = 0;
perParticleEnergyKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
perParticleEnergyKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
if (globals
!= NULL
)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
if (globals
.isInitialized()
)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (auto& buffer : params->getBuffers())
perParticleEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto& buffer : computedValues->getBuffers())
...
...
@@ -4227,17 +4126,17 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
for (auto& buffer : energyDerivChain->getBuffers())
perParticleEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory());
if (useLong)
perParticleEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs
->
getDeviceBuffer());
perParticleEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs
.
getDeviceBuffer());
if (needEnergyParamDerivs)
perParticleEnergyKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer());
for (auto function : tabulatedFunctions)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
for (auto
&
function : tabulatedFunctions)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
if (needParameterGradient || needEnergyParamDerivs) {
index = 0;
gradientChainRuleKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
gradientChainRuleKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
if (globals
!= NULL
)
gradientChainRuleKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
if (globals
.isInitialized()
)
gradientChainRuleKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (auto& buffer : params->getBuffers())
gradientChainRuleKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto& buffer : computedValues->getBuffers())
...
...
@@ -4252,7 +4151,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
}
}
}
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
...
@@ -4261,7 +4160,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
pairEnergyKernel.setArg<cl_int>(7, includeEnergy);
if (nb.getUseCutoff()) {
...
...
@@ -4342,8 +4241,6 @@ private:
OpenCLCalcCustomExternalForceKernel::~OpenCLCalcCustomExternalForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
...
...
@@ -4398,9 +4295,9 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
variables[name] = "particleParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customExternalGlobals", CL_MEM_READ_ONLY);
globals
->
upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals
->
getDeviceBuffer(), "float");
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customExternalGlobals", CL_MEM_READ_ONLY);
globals
.
upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals
.
getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]";
...
...
@@ -4422,7 +4319,7 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
}
double OpenCLCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
...
@@ -4431,7 +4328,7 @@ double OpenCLCalcCustomExternalForceKernel::execute(ContextImpl& context, bool i
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
return 0.0;
}
...
...
@@ -4544,22 +4441,6 @@ OpenCLCalcCustomHbondForceKernel::~OpenCLCalcCustomHbondForceKernel() {
delete donorParams;
if (acceptorParams != NULL)
delete acceptorParams;
if (donors != NULL)
delete donors;
if (acceptors != NULL)
delete acceptors;
if (donorBufferIndices != NULL)
delete donorBufferIndices;
if (acceptorBufferIndices != NULL)
delete acceptorBufferIndices;
if (globals != NULL)
delete globals;
if (donorExclusions != NULL)
delete donorExclusions;
if (acceptorExclusions != NULL)
delete acceptorExclusions;
for (auto function : tabulatedFunctions)
delete function;
}
static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) {
...
...
@@ -4586,12 +4467,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
if (numDonors == 0 || numAcceptors == 0)
return;
int numParticles = system.getNumParticles();
donors
= OpenCLArray::creat
e<mm_int4>(cl, numDonors, "customHbondDonors");
acceptors
= OpenCLArray::creat
e<mm_int4>(cl, numAcceptors, "customHbondAcceptors");
donors
.initializ
e<mm_int4>(cl, numDonors, "customHbondDonors");
acceptors
.initializ
e<mm_int4>(cl, numAcceptors, "customHbondAcceptors");
donorParams = new OpenCLParameterSet(cl, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters");
acceptorParams = new OpenCLParameterSet(cl, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters");
if (force.getNumGlobalParameters() > 0)
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customHbondGlobals", CL_MEM_READ_ONLY);
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customHbondGlobals", CL_MEM_READ_ONLY);
vector<vector<cl_float> > donorParamVector(numDonors);
vector<mm_int4> donorVector(numDonors);
for (int i = 0; i < numDonors; i++) {
...
...
@@ -4601,7 +4482,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
for (int j = 0; j < (int) parameters.size(); j++)
donorParamVector[i][j] = (cl_float) parameters[j];
}
donors
->
upload(donorVector);
donors
.
upload(donorVector);
donorParams->setParameterValues(donorParamVector);
vector<vector<cl_float> > acceptorParamVector(numAcceptors);
vector<mm_int4> acceptorVector(numAcceptors);
...
...
@@ -4612,13 +4493,13 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
for (int j = 0; j < (int) parameters.size(); j++)
acceptorParamVector[i][j] = (cl_float) parameters[j];
}
acceptors
->
upload(acceptorVector);
acceptors
.
upload(acceptorVector);
acceptorParams->setParameterValues(acceptorParamVector);
// Select an output buffer index for each donor and acceptor.
donorBufferIndices
= OpenCLArray::creat
e<mm_int4>(cl, numDonors, "customHbondDonorBuffers");
acceptorBufferIndices
= OpenCLArray::creat
e<mm_int4>(cl, numAcceptors, "customHbondAcceptorBuffers");
donorBufferIndices
.initializ
e<mm_int4>(cl, numDonors, "customHbondDonorBuffers");
acceptorBufferIndices
.initializ
e<mm_int4>(cl, numAcceptors, "customHbondAcceptorBuffers");
vector<mm_int4> donorBufferVector(numDonors);
vector<mm_int4> acceptorBufferVector(numAcceptors);
vector<int> donorBufferCounter(numParticles, 0);
...
...
@@ -4631,8 +4512,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
acceptorBufferVector[i] = mm_int4(acceptorVector[i].x > -1 ? acceptorBufferCounter[acceptorVector[i].x]++ : 0,
acceptorVector[i].y > -1 ? acceptorBufferCounter[acceptorVector[i].y]++ : 0,
acceptorVector[i].z > -1 ? acceptorBufferCounter[acceptorVector[i].z]++ : 0, 0);
donorBufferIndices
->
upload(donorBufferVector);
acceptorBufferIndices
->
upload(acceptorBufferVector);
donorBufferIndices
.
upload(donorBufferVector);
acceptorBufferIndices
.
upload(acceptorBufferVector);
int maxBuffers = 1;
for (int i : donorBufferCounter)
maxBuffers = max(maxBuffers, i);
...
...
@@ -4672,10 +4553,10 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
else
throw OpenMMException("CustomHbondForce: OpenCLPlatform does not support more than four exclusions per acceptor");
}
donorExclusions
= OpenCLArray::creat
e<mm_int4>(cl, numDonors, "customHbondDonorExclusions");
acceptorExclusions
= OpenCLArray::creat
e<mm_int4>(cl, numAcceptors, "customHbondAcceptorExclusions");
donorExclusions
->
upload(donorExclusionVector);
acceptorExclusions
->
upload(acceptorExclusionVector);
donorExclusions
.initializ
e<mm_int4>(cl, numDonors, "customHbondDonorExclusions");
acceptorExclusions
.initializ
e<mm_int4>(cl, numAcceptors, "customHbondAcceptorExclusions");
donorExclusions
.
upload(donorExclusionVector);
acceptorExclusions
.
upload(acceptorExclusionVector);
// Record the tabulated functions.
...
...
@@ -4683,6 +4564,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumFunctions());
for (int i = 0; i < force.getNumFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
...
...
@@ -4691,8 +4573,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(OpenCLArray::creat
e<float>(cl, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
tabulatedFunctions
[i].initializ
e<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[
i].
upload(f);
tableArgs << ", __global const float";
if (width > 1)
tableArgs << width;
...
...
@@ -4707,8 +4589,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
if (globals
!= NULL
)
globals
->
upload(globalParamValues);
if (globals
.isInitialized()
)
globals
.
upload(globalParamValues);
map<string, string> variables;
for (int i = 0; i < force.getNumPerDonorParameters(); i++) {
const string& name = force.getPerDonorParameterName(i);
...
...
@@ -4900,7 +4782,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numDonors == 0 || numAcceptors == 0)
return 0.0;
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
...
@@ -4909,7 +4791,7 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
if (!hasInitializedKernel) {
hasInitializedKernel = true;
...
...
@@ -4917,38 +4799,38 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
donorKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donorExclusions
->
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donors
->
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, acceptors
->
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donorBufferIndices
->
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donorExclusions
.
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donors
.
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, acceptors
.
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donorBufferIndices
.
getDeviceBuffer());
donorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
index += 5; // Periodic box size arguments are set when the kernel is executed.
if (globals
!= NULL
)
donorKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
if (globals
.isInitialized()
)
donorKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (auto& buffer : donorParams->getBuffers())
donorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto& buffer : acceptorParams->getBuffers())
donorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto function : tabulatedFunctions)
donorKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
for (auto
&
function : tabulatedFunctions)
donorKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
index = 0;
acceptorKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptorExclusions
->
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, donors
->
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptors
->
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptorBufferIndices
->
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptorExclusions
.
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, donors
.
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptors
.
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptorBufferIndices
.
getDeviceBuffer());
acceptorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
index += 5; // Periodic box size arguments are set when the kernel is executed.
if (globals
!= NULL
)
acceptorKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
if (globals
.isInitialized()
)
acceptorKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (auto& buffer : donorParams->getBuffers())
acceptorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto& buffer : acceptorParams->getBuffers())
acceptorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto function : tabulatedFunctions)
acceptorKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
for (auto
&
function : tabulatedFunctions)
acceptorKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
}
setPeriodicBoxArgs(cl, donorKernel, 8);
cl.executeKernel(donorKernel, max(numDonors, numAcceptors));
...
...
@@ -5047,22 +4929,6 @@ private:
OpenCLCalcCustomCentroidBondForceKernel::~OpenCLCalcCustomCentroidBondForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
if (groupParticles != NULL)
delete groupParticles;
if (groupWeights != NULL)
delete groupWeights;
if (groupOffsets != NULL)
delete groupOffsets;
if (groupForces != NULL)
delete groupForces;
if (bondGroups != NULL)
delete bondGroups;
if (centerPositions != NULL)
delete centerPositions;
for (auto function : tabulatedFunctions)
delete function;
}
void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) {
...
...
@@ -5100,22 +4966,22 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
for (int j = 0; j < normalizedWeights[i].size(); j++)
groupWeightVecFloat.push_back((float) normalizedWeights[i][j]);
}
groupParticles
= OpenCLArray::creat
e<int>(cl, groupParticleVec.size(), "groupParticles");
groupParticles
->
upload(groupParticleVec);
groupParticles
.initializ
e<int>(cl, groupParticleVec.size(), "groupParticles");
groupParticles
.
upload(groupParticleVec);
if (cl.getUseDoublePrecision()) {
groupWeights
= OpenCLArray::creat
e<double>(cl, groupParticleVec.size(), "groupWeights");
groupWeights
->
upload(groupWeightVecDouble);
centerPositions
= OpenCLArray::creat
e<mm_double4>(cl, numGroups, "centerPositions");
groupWeights
.initializ
e<double>(cl, groupParticleVec.size(), "groupWeights");
groupWeights
.
upload(groupWeightVecDouble);
centerPositions
.initializ
e<mm_double4>(cl, numGroups, "centerPositions");
}
else {
groupWeights
= OpenCLArray::creat
e<float>(cl, groupParticleVec.size(), "groupWeights");
groupWeights
->
upload(groupWeightVecFloat);
centerPositions
= OpenCLArray::creat
e<mm_float4>(cl, numGroups, "centerPositions");
groupWeights
.initializ
e<float>(cl, groupParticleVec.size(), "groupWeights");
groupWeights
.
upload(groupWeightVecFloat);
centerPositions
.initializ
e<mm_float4>(cl, numGroups, "centerPositions");
}
groupOffsets
= OpenCLArray::creat
e<int>(cl, groupOffsetVec.size(), "groupOffsets");
groupOffsets
->
upload(groupOffsetVec);
groupForces
= OpenCLArray::creat
e<long long>(cl, numGroups*3, "groupForces");
cl.addAutoclearBuffer(
*
groupForces);
groupOffsets
.initializ
e<int>(cl, groupOffsetVec.size(), "groupOffsets");
groupOffsets
.
upload(groupOffsetVec);
groupForces
.initializ
e<long long>(cl, numGroups*3, "groupForces");
cl.addAutoclearBuffer(groupForces);
// Record the bonds.
...
...
@@ -5134,8 +5000,8 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
bondGroups
= OpenCLArray::creat
e<int>(cl, bondGroupVec.size(), "bondGroups");
bondGroups
->
upload(bondGroupVec);
bondGroups
.initializ
e<int>(cl, bondGroupVec.size(), "bondGroups");
bondGroups
.
upload(bondGroupVec);
// Record the tabulated functions.
...
...
@@ -5143,6 +5009,7 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream extraArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
...
...
@@ -5151,8 +5018,8 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(OpenCLArray::creat
e<float>(cl, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions
.back()->
upload(f);
tabulatedFunctions
[i].initializ
e<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions
[i].
upload(f);
extraArgs << ", __global const float";
if (width > 1)
extraArgs << width;
...
...
@@ -5182,8 +5049,8 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
if (needEnergyParamDerivs)
extraArgs << ", __global mixed* restrict energyParamDerivs";
if (force.getNumGlobalParameters() > 0) {
globals
= OpenCLArray::creat
e<float>(cl, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals
->
upload(globalParamValues);
globals
.initializ
e<float>(cl, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals
.
upload(globalParamValues);
extraArgs << ", __global const float* restrict globals";
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
...
...
@@ -5394,37 +5261,37 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
index = 0;
computeCentersKernel = cl::Kernel(program, "computeGroupCenters");
computeCentersKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupParticles
->
getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupWeights
->
getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupOffsets
->
getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, centerPositions
->
getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupParticles
.
getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupWeights
.
getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupOffsets
.
getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, centerPositions
.
getDeviceBuffer());
index = 0;
groupForcesKernel = cl::Kernel(program, "computeGroupForces");
groupForcesKernel.setArg<cl::Buffer>(index++, groupForces
->
getDeviceBuffer());
groupForcesKernel.setArg<cl::Buffer>(index++, groupForces
.
getDeviceBuffer());
index++; // Energy buffer hasn't been created yet
groupForcesKernel.setArg<cl::Buffer>(index++, centerPositions
->
getDeviceBuffer());
groupForcesKernel.setArg<cl::Buffer>(index++, bondGroups
->
getDeviceBuffer());
groupForcesKernel.setArg<cl::Buffer>(index++, centerPositions
.
getDeviceBuffer());
groupForcesKernel.setArg<cl::Buffer>(index++, bondGroups
.
getDeviceBuffer());
index += 5; // Periodic box information
if (needEnergyParamDerivs)
index++; // Deriv buffer hasn't been created yet.
for (auto function : tabulatedFunctions)
groupForcesKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
if (globals
!= NULL
)
groupForcesKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
for (auto
&
function : tabulatedFunctions)
groupForcesKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
if (globals
.isInitialized()
)
groupForcesKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (auto& buffer : params->getBuffers())
groupForcesKernel.setArg<cl::Memory>(index++, buffer.getMemory());
index = 0;
applyForcesKernel = cl::Kernel(program, "applyForcesToAtoms");
applyForcesKernel.setArg<cl::Buffer>(index++, groupParticles
->
getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupWeights
->
getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupOffsets
->
getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupForces
->
getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupParticles
.
getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupWeights
.
getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupOffsets
.
getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupForces
.
getDeviceBuffer());
}
double OpenCLCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numBonds == 0)
return 0.0;
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
...
...
@@ -5433,7 +5300,7 @@ double OpenCLCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bo
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
cl.executeKernel(computeCentersKernel, OpenCLContext::TileSize*numGroups);
groupForcesKernel.setArg<cl::Buffer>(1, cl.getEnergyBuffer().getDeviceBuffer());
...
...
@@ -5498,10 +5365,6 @@ private:
OpenCLCalcCustomCompoundBondForceKernel::~OpenCLCalcCustomCompoundBondForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
for (auto function : tabulatedFunctions)
delete function;
}
void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) {
...
...
@@ -5532,16 +5395,16 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
for (int i = 0; i < force.getNumFunctions(); i++) {
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
OpenCLArray* array = OpenCLArray::create<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions.push_back(array);
array->upload(f);
string arrayName = cl.getBondedUtilities().addArgument(array->getDeviceBuffer(), width == 1 ? "float" : "float"+cl.intToString(width));
tabulatedFunctions[i].initialize<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[i].upload(f);
string arrayName = cl.getBondedUtilities().addArgument(tabulatedFunctions[i].getDeviceBuffer(), width == 1 ? "float" : "float"+cl.intToString(width));
functionDefinitions.push_back(make_pair(name, arrayName));
}
...
...
@@ -5565,9 +5428,9 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customCompoundBondGlobals", CL_MEM_READ_ONLY);
globals
->
upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals
->
getDeviceBuffer(), "float");
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customCompoundBondGlobals", CL_MEM_READ_ONLY);
globals
.
upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals
.
getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]";
...
...
@@ -5750,7 +5613,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
}
double OpenCLCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
...
@@ -5759,7 +5622,7 @@ double OpenCLCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bo
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
return 0.0;
}
...
...
@@ -5827,34 +5690,6 @@ private:
OpenCLCalcCustomManyParticleForceKernel::~OpenCLCalcCustomManyParticleForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
if (orderIndex != NULL)
delete orderIndex;
if (particleOrder != NULL)
delete particleOrder;
if (particleTypes != NULL)
delete particleTypes;
if (exclusions != NULL)
delete exclusions;
if (exclusionStartIndex != NULL)
delete exclusionStartIndex;
if (blockCenter != NULL)
delete blockCenter;
if (blockBoundingBox != NULL)
delete blockBoundingBox;
if (neighborPairs != NULL)
delete neighborPairs;
if (numNeighborPairs != NULL)
delete numNeighborPairs;
if (neighborStartIndex != NULL)
delete neighborStartIndex;
if (neighbors != NULL)
delete neighbors;
if (numNeighborsForAtom != NULL)
delete numNeighborsForAtom;
for (auto function : tabulatedFunctions)
delete function;
}
void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, const CustomManyParticleForce& force) {
...
...
@@ -5889,6 +5724,7 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
...
...
@@ -5897,8 +5733,8 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(OpenCLArray::creat
e<float>(cl, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
tabulatedFunctions
[i].initializ
e<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[
i].
upload(f);
tableArgs << ", __global const float";
if (width > 1)
tableArgs << width;
...
...
@@ -5928,8 +5764,8 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
}
}
if (force.getNumGlobalParameters() > 0) {
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customManyParticleGlobals", CL_MEM_READ_ONLY);
globals
->
upload(globalParamValues);
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customManyParticleGlobals", CL_MEM_READ_ONLY);
globals
.
upload(globalParamValues);
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+cl.intToString(i)+"]";
...
...
@@ -5946,16 +5782,16 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
CustomManyParticleForceImpl::buildFilterArrays(force, numTypes, particleTypesVec, orderIndexVec, particleOrderVec);
bool hasTypeFilters = (particleOrderVec.size() > 1);
if (hasTypeFilters) {
particleTypes
= OpenCLArray::creat
e<int>(cl, particleTypesVec.size(), "customManyParticleTypes");
orderIndex
= OpenCLArray::creat
e<int>(cl, orderIndexVec.size(), "customManyParticleOrderIndex");
particleOrder
= OpenCLArray::creat
e<int>(cl, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder");
particleTypes
->
upload(particleTypesVec);
orderIndex
->
upload(orderIndexVec);
vector<int> flattenedOrder(particleOrder
->
getSize());
particleTypes
.initializ
e<int>(cl, particleTypesVec.size(), "customManyParticleTypes");
orderIndex
.initializ
e<int>(cl, orderIndexVec.size(), "customManyParticleOrderIndex");
particleOrder
.initializ
e<int>(cl, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder");
particleTypes
.
upload(particleTypesVec);
orderIndex
.
upload(orderIndexVec);
vector<int> flattenedOrder(particleOrder
.
getSize());
for (int i = 0; i < (int) particleOrderVec.size(); i++)
for (int j = 0; j < particlesPerSet; j++)
flattenedOrder[i*particlesPerSet+j] = particleOrderVec[i][j];
particleOrder
->
upload(flattenedOrder);
particleOrder
.
upload(flattenedOrder);
}
// Build data structures for exclusions.
...
...
@@ -5976,10 +5812,10 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
exclusionsVec.insert(exclusionsVec.end(), particleExclusions[i].begin(), particleExclusions[i].end());
exclusionStartIndexVec[i+1] = exclusionsVec.size();
}
exclusions
= OpenCLArray::creat
e<int>(cl, exclusionsVec.size(), "customManyParticleExclusions");
exclusionStartIndex
= OpenCLArray::creat
e<int>(cl, exclusionStartIndexVec.size(), "customManyParticleExclusionStart");
exclusions
->
upload(exclusionsVec);
exclusionStartIndex
->
upload(exclusionStartIndexVec);
exclusions
.initializ
e<int>(cl, exclusionsVec.size(), "customManyParticleExclusions");
exclusionStartIndex
.initializ
e<int>(cl, exclusionStartIndexVec.size(), "customManyParticleExclusionStart");
exclusions
.
upload(exclusionsVec);
exclusionStartIndex
.
upload(exclusionStartIndexVec);
}
// Build data structures for the neighbor list.
...
...
@@ -5987,18 +5823,18 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
if (nonbondedMethod != NoCutoff) {
int numAtomBlocks = cl.getNumAtomBlocks();
int elementSize = (cl.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
blockCenter
= new OpenCLArray
(cl, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox
= new OpenCLArray
(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox");
numNeighborPairs
= OpenCLArray::creat
e<int>(cl, 1, "customManyParticleNumNeighborPairs");
neighborStartIndex
= OpenCLArray::creat
e<int>(cl, numParticles+1, "customManyParticleNeighborStartIndex");
numNeighborsForAtom
= OpenCLArray::creat
e<int>(cl, numParticles, "customManyParticleNumNeighborsForAtom");
blockCenter
.initialize
(cl, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox
.initialize
(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox");
numNeighborPairs
.initializ
e<int>(cl, 1, "customManyParticleNumNeighborPairs");
neighborStartIndex
.initializ
e<int>(cl, numParticles+1, "customManyParticleNeighborStartIndex");
numNeighborsForAtom
.initializ
e<int>(cl, numParticles, "customManyParticleNumNeighborsForAtom");
// Select a size for the array that holds the neighbor list. We have to make a fairly
// arbitrary guess, but if this turns out to be too small we'll increase it later.
maxNeighborPairs = 150*numParticles;
neighborPairs
= OpenCLArray::creat
e<mm_int2>(cl, maxNeighborPairs, "customManyParticleNeighborPairs");
neighbors
= OpenCLArray::creat
e<int>(cl, maxNeighborPairs, "customManyParticleNeighbors");
neighborPairs
.initializ
e<mm_int2>(cl, maxNeighborPairs, "customManyParticleNeighborPairs");
neighbors
.initializ
e<int>(cl, maxNeighborPairs, "customManyParticleNeighbors");
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
...
...
@@ -6317,24 +6153,24 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
setPeriodicBoxArgs(cl, forceKernel, index);
index += 5;
if (nonbondedMethod != NoCutoff) {
forceKernel.setArg<cl::Buffer>(index++, neighbors
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborStartIndex
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighbors
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborStartIndex
.
getDeviceBuffer());
}
if (particleTypes
!= NULL
) {
forceKernel.setArg<cl::Buffer>(index++, particleTypes
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, orderIndex
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, particleOrder
->
getDeviceBuffer());
if (particleTypes
.isInitialized()
) {
forceKernel.setArg<cl::Buffer>(index++, particleTypes
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, orderIndex
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, particleOrder
.
getDeviceBuffer());
}
if (exclusions
!= NULL
) {
forceKernel.setArg<cl::Buffer>(index++, exclusions
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex
->
getDeviceBuffer());
if (exclusions
.isInitialized()
) {
forceKernel.setArg<cl::Buffer>(index++, exclusions
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex
.
getDeviceBuffer());
}
if (globals
!= NULL
)
forceKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
if (globals
.isInitialized()
)
forceKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (auto& buffer : params->getBuffers())
forceKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto function : tabulatedFunctions)
forceKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
for (auto
&
function : tabulatedFunctions)
forceKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
if (nonbondedMethod != NoCutoff) {
// Set arguments for the block bounds kernel.
...
...
@@ -6343,9 +6179,9 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
setPeriodicBoxArgs(cl, blockBoundsKernel, index);
index += 5;
blockBoundsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, blockCenter
->
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, blockBoundingBox
->
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, numNeighborPairs
->
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, blockCenter
.
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, blockBoundingBox
.
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, numNeighborPairs
.
getDeviceBuffer());
// Set arguments for the neighbor list kernel.
...
...
@@ -6353,36 +6189,36 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
setPeriodicBoxArgs(cl, neighborsKernel, index);
index += 5;
neighborsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, blockCenter
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, blockBoundingBox
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, neighborPairs
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, numNeighborPairs
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, blockCenter
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, blockBoundingBox
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, neighborPairs
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, numNeighborPairs
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom
.
getDeviceBuffer());
index++;
if (exclusions
!= NULL
) {
neighborsKernel.setArg<cl::Buffer>(index++, exclusions
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, exclusionStartIndex
->
getDeviceBuffer());
if (exclusions
.isInitialized()
) {
neighborsKernel.setArg<cl::Buffer>(index++, exclusions
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, exclusionStartIndex
.
getDeviceBuffer());
}
// Set arguments for the kernel to find neighbor list start indices.
index = 0;
startIndicesKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom
->
getDeviceBuffer());
startIndicesKernel.setArg<cl::Buffer>(index++, neighborStartIndex
->
getDeviceBuffer());
startIndicesKernel.setArg<cl::Buffer>(index++, numNeighborPairs
->
getDeviceBuffer());
startIndicesKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom
.
getDeviceBuffer());
startIndicesKernel.setArg<cl::Buffer>(index++, neighborStartIndex
.
getDeviceBuffer());
startIndicesKernel.setArg<cl::Buffer>(index++, numNeighborPairs
.
getDeviceBuffer());
// Set arguments for the kernel to assemble the final neighbor list.
index = 0;
copyPairsKernel.setArg<cl::Buffer>(index++, neighborPairs
->
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, neighbors
->
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, numNeighborPairs
->
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, neighborPairs
.
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, neighbors
.
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, numNeighborPairs
.
getDeviceBuffer());
index++;
copyPairsKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom
->
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, neighborStartIndex
->
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom
.
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, neighborStartIndex
.
getDeviceBuffer());
}
}
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
...
@@ -6391,7 +6227,7 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
while (true) {
int* numPairs = (int*) cl.getPinnedBuffer();
...
...
@@ -6406,7 +6242,7 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
// We need to make sure there was enough memory for the neighbor list. Download the
// information asynchronously so kernels can be running at the same time.
numNeighborPairs
->
download(numPairs, false);
numNeighborPairs
.
download(numPairs, false);
cl.getQueue().enqueueMarker(&event);
cl.executeKernel(startIndicesKernel, 256, 256);
cl.executeKernel(copyPairsKernel, maxNeighborPairs);
...
...
@@ -6420,17 +6256,13 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
if (*numPairs > maxNeighborPairs) {
// Resize the arrays and run the calculation again.
delete neighborPairs;
neighborPairs = NULL;
delete neighbors;
neighbors = NULL;
maxNeighborPairs = (int) (1.1*(*numPairs));
neighborPairs
= OpenCLArray::create<mm_int2>(cl, maxNeighborPairs, "customManyParticleNeighborPairs"
);
neighbors
= OpenCLArray::create<int>(cl, maxNeighborPairs, "customManyParticleNeighbors"
);
forceKernel.setArg<cl::Buffer>(8, neighbors
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(8, neighborPairs
->
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(0, neighborPairs
->
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(1, neighbors
->
getDeviceBuffer());
neighborPairs
.resize(maxNeighborPairs
);
neighbors
.resize(maxNeighborPairs
);
forceKernel.setArg<cl::Buffer>(8, neighbors
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(8, neighborPairs
.
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(0, neighborPairs
.
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(1, neighbors
.
getDeviceBuffer());
continue;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment