Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c8dac206
Commit
c8dac206
authored
Oct 17, 2012
by
Peter Eastman
Browse files
Continuing to implement double precision in OpenCL
parent
34938e2c
Changes
36
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
635 additions
and
529 deletions
+635
-529
platforms/opencl/src/OpenCLBondedUtilities.cpp
platforms/opencl/src/OpenCLBondedUtilities.cpp
+8
-8
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+38
-15
platforms/opencl/src/OpenCLContext.h
platforms/opencl/src/OpenCLContext.h
+25
-4
platforms/opencl/src/OpenCLExpressionUtilities.cpp
platforms/opencl/src/OpenCLExpressionUtilities.cpp
+7
-20
platforms/opencl/src/OpenCLExpressionUtilities.h
platforms/opencl/src/OpenCLExpressionUtilities.h
+12
-17
platforms/opencl/src/OpenCLFFT3D.cpp
platforms/opencl/src/OpenCLFFT3D.cpp
+46
-45
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+5
-5
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+334
-287
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+37
-17
platforms/opencl/src/OpenCLNonbondedUtilities.h
platforms/opencl/src/OpenCLNonbondedUtilities.h
+6
-2
platforms/opencl/src/OpenCLParallelKernels.cpp
platforms/opencl/src/OpenCLParallelKernels.cpp
+16
-13
platforms/opencl/src/OpenCLParallelKernels.h
platforms/opencl/src/OpenCLParallelKernels.h
+2
-2
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+1
-1
platforms/opencl/src/OpenCLSort.h
platforms/opencl/src/OpenCLSort.h
+1
-1
platforms/opencl/src/kernels/angleForce.cl
platforms/opencl/src/kernels/angleForce.cl
+13
-13
platforms/opencl/src/kernels/bondForce.cl
platforms/opencl/src/kernels/bondForce.cl
+4
-4
platforms/opencl/src/kernels/cmapTorsionForce.cl
platforms/opencl/src/kernels/cmapTorsionForce.cl
+40
-40
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+28
-23
platforms/opencl/src/kernels/customCompoundBond.cl
platforms/opencl/src/kernels/customCompoundBond.cl
+11
-11
platforms/opencl/src/kernels/customExternalForce.cl
platforms/opencl/src/kernels/customExternalForce.cl
+1
-1
No files found.
platforms/opencl/src/OpenCLBondedUtilities.cpp
View file @
c8dac206
...
...
@@ -58,7 +58,7 @@ void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, co
std
::
string
OpenCLBondedUtilities
::
addArgument
(
cl
::
Memory
&
data
,
const
string
&
type
)
{
arguments
.
push_back
(
&
data
);
argTypes
.
push_back
(
type
);
return
"customArg"
+
OpenCLExpressionUtilities
::
intToString
(
arguments
.
size
());
return
"customArg"
+
context
.
intToString
(
arguments
.
size
());
}
void
OpenCLBondedUtilities
::
addPrefixCode
(
const
string
&
source
)
{
...
...
@@ -164,17 +164,17 @@ void OpenCLBondedUtilities::initialize(const System& system) {
stringstream
s
;
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
s
<<
prefixCode
[
i
];
s
<<
"__kernel void computeBondedForces(__global
float
4* restrict forceBuffers, __global
float
* restrict energyBuffer, __global const
float
4* restrict posq, int groups"
;
s
<<
"__kernel void computeBondedForces(__global
real
4* restrict forceBuffers, __global
real
* restrict energyBuffer, __global const
real
4* restrict posq, int groups"
;
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
int
force
=
set
[
i
];
string
indexType
=
"uint"
+
(
indexWidth
[
force
]
==
1
?
""
:
OpenCLExpressionUtilities
::
intToString
(
indexWidth
[
force
]));
string
indexType
=
"uint"
+
(
indexWidth
[
force
]
==
1
?
""
:
context
.
intToString
(
indexWidth
[
force
]));
s
<<
", __global const "
<<
indexType
<<
"* restrict atomIndices"
<<
i
;
s
<<
", __global const "
<<
indexType
<<
"* restrict bufferIndices"
<<
i
;
}
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
s
<<
", __global "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
s
<<
") {
\n
"
;
s
<<
"
float
energy = 0.0f;
\n
"
;
s
<<
"
real
energy = 0.0f;
\n
"
;
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
int
force
=
set
[
i
];
s
<<
createForceSource
(
i
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
...
...
@@ -182,7 +182,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
s
<<
"energyBuffer[get_global_id(0)] += energy;
\n
"
;
s
<<
"}
\n
"
;
map
<
string
,
string
>
defines
;
defines
[
"PADDED_NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
cl
::
Program
program
=
context
.
createProgram
(
s
.
str
(),
defines
);
kernels
.
push_back
(
cl
::
Kernel
(
program
,
"computeBondedForces"
));
}
...
...
@@ -206,7 +206,7 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
suffix
=
suffix4
;
else
suffix
=
suffix16
;
string
indexType
=
"uint"
+
(
width
==
1
?
""
:
OpenCLExpressionUtilities
::
intToString
(
width
));
string
indexType
=
"uint"
+
(
width
==
1
?
""
:
context
.
intToString
(
width
));
stringstream
s
;
s
<<
"if ((groups&"
<<
(
1
<<
group
)
<<
") != 0)
\n
"
;
s
<<
"for (unsigned int index = get_global_id(0); index < "
<<
numBonds
<<
"; index += get_global_size(0)) {
\n
"
;
...
...
@@ -214,13 +214,13 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
s
<<
" "
<<
indexType
<<
" buffers = bufferIndices"
<<
forceIndex
<<
"[index];
\n
"
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
s
<<
" unsigned int atom"
<<
(
i
+
1
)
<<
" = atoms"
<<
suffix
[
i
]
<<
";
\n
"
;
s
<<
"
float
4 pos"
<<
(
i
+
1
)
<<
" = posq[atom"
<<
(
i
+
1
)
<<
"];
\n
"
;
s
<<
"
real
4 pos"
<<
(
i
+
1
)
<<
" = posq[atom"
<<
(
i
+
1
)
<<
"];
\n
"
;
}
s
<<
computeForce
<<
"
\n
"
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
s
<<
" {
\n
"
;
s
<<
" unsigned int offset = atom"
<<
(
i
+
1
)
<<
"+buffers"
<<
suffix
[
i
]
<<
"*PADDED_NUM_ATOMS;
\n
"
;
s
<<
"
float
4 force = forceBuffers[offset];
\n
"
;
s
<<
"
real
4 force = forceBuffers[offset];
\n
"
;
s
<<
" force.xyz += force"
<<
(
i
+
1
)
<<
".xyz;
\n
"
;
s
<<
" forceBuffers[offset] = force;
\n
"
;
s
<<
" }
\n
"
;
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
c8dac206
...
...
@@ -68,7 +68,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
atomsWereReordered
(
false
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
integration
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
if
(
precision
==
"single"
)
{
useDoublePrecision
=
false
;
useMixedPrecision
=
false
;
...
...
@@ -145,7 +145,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
this
->
deviceIndex
=
deviceIndex
;
if
(
device
.
getInfo
<
CL_DEVICE_MAX_WORK_GROUP_SIZE
>
()
<
minThreadBlockSize
)
throw
OpenMMException
(
"The specified OpenCL device is not compatible with OpenMM"
);
compilationDefines
[
"WORK_GROUP_SIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
ThreadBlockSize
);
compilationDefines
[
"WORK_GROUP_SIZE"
]
=
intToString
(
ThreadBlockSize
);
if
(
platformVendor
.
size
()
>=
5
&&
platformVendor
.
substr
(
0
,
5
)
==
"Intel"
)
defaultOptimizationOptions
=
""
;
else
...
...
@@ -269,7 +269,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
clearFourBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearFourBuffers"
);
clearFiveBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearFiveBuffers"
);
clearSixBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearSixBuffers"
);
reduce
Float
4Kernel
=
cl
::
Kernel
(
utilities
,
"reduce
Float
4Buffer"
);
reduce
Real
4Kernel
=
cl
::
Kernel
(
utilities
,
"reduce
Real
4Buffer"
);
reduceForcesKernel
=
cl
::
Kernel
(
utilities
,
"reduceForces"
);
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
...
...
@@ -316,9 +316,10 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
thread
=
new
WorkThread
();
// Create
the integration
utilities object.
// Create utilities object
s
.
integration
=
new
OpenCLIntegrationUtilities
(
*
this
,
system
);
expression
=
new
OpenCLExpressionUtilities
(
*
this
);
}
OpenCLContext
::~
OpenCLContext
()
{
...
...
@@ -346,6 +347,8 @@ OpenCLContext::~OpenCLContext() {
delete
atomIndexDevice
;
if
(
integration
!=
NULL
)
delete
integration
;
if
(
expression
!=
NULL
)
delete
expression
;
if
(
bonded
!=
NULL
)
delete
bonded
;
if
(
nonbonded
!=
NULL
)
...
...
@@ -376,10 +379,10 @@ void OpenCLContext::initialize() {
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forceBuffers
->
getDeviceBuffer
());
reduceForcesKernel
.
setArg
<
cl_int
>
(
2
,
paddedNumAtoms
);
reduceForcesKernel
.
setArg
<
cl_int
>
(
3
,
numForceBuffers
);
addAutoclearBuffer
(
longForceBuffer
->
getDeviceBuffer
(),
longForceBuffer
->
getSize
()
*
2
);
addAutoclearBuffer
(
*
longForceBuffer
);
}
addAutoclearBuffer
(
forceBuffers
->
getDeviceBuffer
(),
forceBuffers
->
getSize
()
*
4
);
addAutoclearBuffer
(
energyBuffer
->
getDeviceBuffer
(),
energyBuffer
->
getSize
()
);
addAutoclearBuffer
(
*
forceBuffers
);
addAutoclearBuffer
(
*
energyBuffer
);
int
bufferBytes
=
max
(
posq
->
getSize
()
*
posq
->
getElementSize
(),
energyBuffer
->
getSize
()
*
energyBuffer
->
getElementSize
());
pinnedBuffer
=
new
cl
::
Buffer
(
context
,
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedMemory
=
queue
.
enqueueMapBuffer
(
*
pinnedBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
...
...
@@ -479,6 +482,21 @@ cl::Program OpenCLContext::createProgram(const string source, const map<string,
return
program
;
}
string
OpenCLContext
::
doubleToString
(
double
value
)
{
stringstream
s
;
s
.
precision
(
useDoublePrecision
?
16
:
8
);
s
<<
scientific
<<
value
;
if
(
!
useDoublePrecision
)
s
<<
"f"
;
return
s
.
str
();
}
string
OpenCLContext
::
intToString
(
int
value
)
{
stringstream
s
;
s
<<
value
;
return
s
.
str
();
}
void
OpenCLContext
::
executeKernel
(
cl
::
Kernel
&
kernel
,
int
workUnits
,
int
blockSize
)
{
if
(
blockSize
==
-
1
)
blockSize
=
ThreadBlockSize
;
...
...
@@ -494,18 +512,23 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
}
void
OpenCLContext
::
clearBuffer
(
OpenCLArray
&
array
)
{
clearBuffer
(
array
.
getDeviceBuffer
(),
array
.
getSize
()
*
array
.
getElementSize
()
/
sizeof
(
cl_float
)
);
clearBuffer
(
array
.
getDeviceBuffer
(),
array
.
getSize
()
*
array
.
getElementSize
());
}
void
OpenCLContext
::
clearBuffer
(
cl
::
Memory
&
memory
,
int
size
)
{
int
words
=
size
/
4
;
clearBufferKernel
.
setArg
<
cl
::
Memory
>
(
0
,
memory
);
clearBufferKernel
.
setArg
<
cl_int
>
(
1
,
size
);
executeKernel
(
clearBufferKernel
,
size
,
128
);
clearBufferKernel
.
setArg
<
cl_int
>
(
1
,
words
);
executeKernel
(
clearBufferKernel
,
words
,
128
);
}
void
OpenCLContext
::
addAutoclearBuffer
(
OpenCLArray
&
array
)
{
addAutoclearBuffer
(
array
.
getDeviceBuffer
(),
array
.
getSize
()
*
array
.
getElementSize
());
}
void
OpenCLContext
::
addAutoclearBuffer
(
cl
::
Memory
&
memory
,
int
size
)
{
autoclearBuffers
.
push_back
(
&
memory
);
autoclearBufferSizes
.
push_back
(
size
);
autoclearBufferSizes
.
push_back
(
size
/
4
);
}
void
OpenCLContext
::
clearAutoclearBuffers
()
{
...
...
@@ -581,10 +604,10 @@ void OpenCLContext::reduceForces() {
void
OpenCLContext
::
reduceBuffer
(
OpenCLArray
&
array
,
int
numBuffers
)
{
int
bufferSize
=
array
.
getSize
()
/
numBuffers
;
reduce
Float
4Kernel
.
setArg
<
cl
::
Buffer
>
(
0
,
array
.
getDeviceBuffer
());
reduce
Float
4Kernel
.
setArg
<
cl_int
>
(
1
,
bufferSize
);
reduce
Float
4Kernel
.
setArg
<
cl_int
>
(
2
,
numBuffers
);
executeKernel
(
reduce
Float
4Kernel
,
bufferSize
,
128
);
reduce
Real
4Kernel
.
setArg
<
cl
::
Buffer
>
(
0
,
array
.
getDeviceBuffer
());
reduce
Real
4Kernel
.
setArg
<
cl_int
>
(
1
,
bufferSize
);
reduce
Real
4Kernel
.
setArg
<
cl_int
>
(
2
,
numBuffers
);
executeKernel
(
reduce
Real
4Kernel
,
bufferSize
,
128
);
}
void
OpenCLContext
::
tagAtomsInMolecule
(
int
atom
,
int
molecule
,
vector
<
int
>&
atomMolecule
,
vector
<
vector
<
int
>
>&
atomBonds
)
{
...
...
platforms/opencl/src/OpenCLContext.h
View file @
c8dac206
...
...
@@ -45,6 +45,7 @@ namespace OpenMM {
class
OpenCLArray
;
class
OpenCLForceInfo
;
class
OpenCLIntegrationUtilities
;
class
OpenCLExpressionUtilities
;
class
OpenCLBondedUtilities
;
class
OpenCLNonbondedUtilities
;
class
System
;
...
...
@@ -314,14 +315,18 @@ public:
* Set all elements of an array to 0.
*
* @param memory the Memory to clear
* @param size the
number of float elements in
the buffer
* @param size the
size of
the buffer
in bytes
*/
void
clearBuffer
(
cl
::
Memory
&
memory
,
int
size
);
/**
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
*/
void
addAutoclearBuffer
(
OpenCLArray
&
array
);
/**
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
*
* @param memory the Memory to clear
* @param size the
number of float elements in
the buffer
* @param size the
size of
the buffer
in bytes
*/
void
addAutoclearBuffer
(
cl
::
Memory
&
memory
,
int
size
);
/**
...
...
@@ -329,7 +334,7 @@ public:
*/
void
clearAutoclearBuffers
();
/**
* Given a collection of buffers packed into an array, sum them and store
* Given a collection of
floating point
buffers packed into an array, sum them and store
* the sum in the first buffer.
*
* @param array the array containing the buffers to reduce
...
...
@@ -437,6 +442,15 @@ public:
bool
getUseMixedPrecision
()
{
return
useMixedPrecision
;
}
/**
* Convert a number to a string in a format suitable for including in a kernel.
* This takes into account whether the context uses single or double precision.
*/
std
::
string
doubleToString
(
double
value
);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
std
::
string
intToString
(
int
value
);
/**
* Get the size of the periodic box.
*/
...
...
@@ -476,6 +490,12 @@ public:
OpenCLIntegrationUtilities
&
getIntegrationUtilities
()
{
return
*
integration
;
}
/**
* Get the OpenCLExpressionUtilities for this context.
*/
OpenCLExpressionUtilities
&
getExpressionUtilities
()
{
return
*
expression
;
}
/**
* Get the OpenCLBondedUtilities for this context.
*/
...
...
@@ -580,7 +600,7 @@ private:
cl
::
Kernel
clearFourBuffersKernel
;
cl
::
Kernel
clearFiveBuffersKernel
;
cl
::
Kernel
clearSixBuffersKernel
;
cl
::
Kernel
reduce
Float
4Kernel
;
cl
::
Kernel
reduce
Real
4Kernel
;
cl
::
Kernel
reduceForcesKernel
;
std
::
vector
<
OpenCLForceInfo
*>
forces
;
std
::
vector
<
Molecule
>
molecules
;
...
...
@@ -601,6 +621,7 @@ private:
std
::
vector
<
int
>
autoclearBufferSizes
;
std
::
vector
<
ReorderListener
*>
reorderListeners
;
OpenCLIntegrationUtilities
*
integration
;
OpenCLExpressionUtilities
*
expression
;
OpenCLBondedUtilities
*
bonded
;
OpenCLNonbondedUtilities
*
nonbonded
;
WorkThread
*
thread
;
...
...
platforms/opencl/src/OpenCLExpressionUtilities.cpp
View file @
c8dac206
...
...
@@ -33,19 +33,6 @@ using namespace OpenMM;
using
namespace
Lepton
;
using
namespace
std
;
string
OpenCLExpressionUtilities
::
doubleToString
(
double
value
)
{
stringstream
s
;
s
.
precision
(
8
);
s
<<
scientific
<<
value
<<
"f"
;
return
s
.
str
();
}
string
OpenCLExpressionUtilities
::
intToString
(
int
value
)
{
stringstream
s
;
s
<<
value
;
return
s
.
str
();
}
string
OpenCLExpressionUtilities
::
createExpressions
(
const
map
<
string
,
ParsedExpression
>&
expressions
,
const
map
<
string
,
string
>&
variables
,
const
vector
<
pair
<
string
,
string
>
>&
functions
,
const
string
&
prefix
,
const
string
&
functionParams
,
const
string
&
tempType
)
{
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
variableNodes
;
...
...
@@ -75,13 +62,13 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
return
;
for
(
int
i
=
0
;
i
<
(
int
)
node
.
getChildren
().
size
();
i
++
)
processExpression
(
out
,
node
.
getChildren
()[
i
],
temps
,
functions
,
prefix
,
functionParams
,
allExpressions
,
tempType
);
string
name
=
prefix
+
intToString
(
temps
.
size
());
string
name
=
prefix
+
context
.
intToString
(
temps
.
size
());
bool
hasRecordedNode
=
false
;
out
<<
tempType
<<
" "
<<
name
<<
" = "
;
switch
(
node
.
getOperation
().
getId
())
{
case
Operation
::
CONSTANT
:
out
<<
doubleToString
(
dynamic_cast
<
const
Operation
::
Constant
*>
(
&
node
.
getOperation
())
->
getValue
());
out
<<
context
.
doubleToString
(
dynamic_cast
<
const
Operation
::
Constant
*>
(
&
node
.
getOperation
())
->
getValue
());
break
;
case
Operation
::
VARIABLE
:
throw
OpenMMException
(
"Unknown variable in expression: "
+
node
.
getOperation
().
getName
());
...
...
@@ -107,7 +94,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
string
valueName
=
name
;
string
derivName
=
name
;
if
(
valueNode
!=
NULL
&&
derivNode
!=
NULL
)
{
string
name2
=
prefix
+
intToString
(
temps
.
size
());
string
name2
=
prefix
+
context
.
intToString
(
temps
.
size
());
out
<<
tempType
<<
" "
<<
name2
<<
" = 0.0f;
\n
"
;
if
(
isDeriv
)
{
valueName
=
name2
;
...
...
@@ -236,10 +223,10 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out
<<
"RECIP("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
ADD_CONSTANT
:
out
<<
doubleToString
(
dynamic_cast
<
const
Operation
::
AddConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"+"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
out
<<
context
.
doubleToString
(
dynamic_cast
<
const
Operation
::
AddConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"+"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
break
;
case
Operation
::
MULTIPLY_CONSTANT
:
out
<<
doubleToString
(
dynamic_cast
<
const
Operation
::
MultiplyConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"*"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
out
<<
context
.
doubleToString
(
dynamic_cast
<
const
Operation
::
MultiplyConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"*"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
break
;
case
Operation
::
POWER_CONSTANT
:
{
...
...
@@ -266,7 +253,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
for
(
map
<
int
,
const
ExpressionTreeNode
*>::
const_iterator
iter
=
powers
.
begin
();
iter
!=
powers
.
end
();
++
iter
)
{
if
(
iter
->
first
!=
exponent
)
{
exponents
.
push_back
(
iter
->
first
>=
0
?
iter
->
first
:
-
iter
->
first
);
string
name2
=
prefix
+
intToString
(
temps
.
size
());
string
name2
=
prefix
+
context
.
intToString
(
temps
.
size
());
names
.
push_back
(
name2
);
temps
.
push_back
(
make_pair
(
*
iter
->
second
,
name2
));
out
<<
tempType
<<
" "
<<
name2
<<
" = 0.0f;
\n
"
;
...
...
@@ -295,7 +282,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out
<<
"}"
;
}
else
out
<<
"pow("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
", "
<<
doubleToString
(
exponent
)
<<
")"
;
out
<<
"pow("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
", "
<<
context
.
doubleToString
(
exponent
)
<<
")"
;
break
;
}
case
Operation
::
MIN
:
...
...
platforms/opencl/src/OpenCLExpressionUtilities.h
View file @
c8dac206
...
...
@@ -45,6 +45,8 @@ namespace OpenMM {
class
OPENMM_EXPORT
OpenCLExpressionUtilities
{
public:
OpenCLExpressionUtilities
(
OpenCLContext
&
context
)
:
context
(
context
)
{
}
/**
* Generate the source code for calculating a set of expressions.
*
...
...
@@ -54,10 +56,10 @@ public:
* @param functions defines the variable name for each tabulated function that may appear in the expressions
* @param prefix a prefix to put in front of temporary variables
* @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "
float
")
* @param tempType the type of value to use for temporary variables (defaults to "
real
")
*/
static
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
variables
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"
float
"
);
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
variables
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"
real
"
);
/**
* Generate the source code for calculating a set of expressions.
*
...
...
@@ -69,7 +71,7 @@ public:
* @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "float")
*/
static
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variables
,
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variables
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"float"
);
/**
* Calculate the spline coefficients for a tabulated function that appears in expressions.
...
...
@@ -79,26 +81,19 @@ public:
* @param max the value of the independent variable corresponding to the last element of values
* @return the spline coefficients
*/
static
std
::
vector
<
mm_float4
>
computeFunctionCoefficients
(
const
std
::
vector
<
double
>&
values
,
double
min
,
double
max
);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static
std
::
string
doubleToString
(
double
value
);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static
std
::
string
intToString
(
int
value
);
std
::
vector
<
mm_float4
>
computeFunctionCoefficients
(
const
std
::
vector
<
double
>&
values
,
double
min
,
double
max
);
class
FunctionPlaceholder
;
private:
static
void
processExpression
(
std
::
stringstream
&
out
,
const
Lepton
::
ExpressionTreeNode
&
node
,
void
processExpression
(
std
::
stringstream
&
out
,
const
Lepton
::
ExpressionTreeNode
&
node
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
vector
<
Lepton
::
ParsedExpression
>&
allExpressions
,
const
std
::
string
&
tempType
);
static
std
::
string
getTempName
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
);
static
void
findRelatedTabulatedFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
std
::
string
getTempName
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
);
void
findRelatedTabulatedFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
const
Lepton
::
ExpressionTreeNode
*&
valueNode
,
const
Lepton
::
ExpressionTreeNode
*&
derivNode
);
static
void
findRelatedPowers
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
void
findRelatedPowers
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
std
::
map
<
int
,
const
Lepton
::
ExpressionTreeNode
*>&
powers
);
OpenCLContext
&
context
;
};
/**
...
...
platforms/opencl/src/OpenCLFFT3D.cpp
View file @
c8dac206
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
1
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
2
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -47,15 +47,15 @@ void OpenCLFFT3D::execFFT(OpenCLArray& in, OpenCLArray& out, bool forward) {
maxSize
=
1
;
zkernel
.
setArg
<
cl
::
Buffer
>
(
0
,
in
.
getDeviceBuffer
());
zkernel
.
setArg
<
cl
::
Buffer
>
(
1
,
out
.
getDeviceBuffer
());
zkernel
.
setArg
<
cl_
floa
t
>
(
2
,
forward
?
1
.0
f
:
-
1
.0
f
);
zkernel
.
setArg
<
cl_
in
t
>
(
2
,
forward
?
1
:
-
1
);
context
.
executeKernel
(
zkernel
,
xsize
*
ysize
*
zsize
,
min
(
zsize
,
(
int
)
maxSize
));
xkernel
.
setArg
<
cl
::
Buffer
>
(
0
,
out
.
getDeviceBuffer
());
xkernel
.
setArg
<
cl
::
Buffer
>
(
1
,
in
.
getDeviceBuffer
());
xkernel
.
setArg
<
cl_
floa
t
>
(
2
,
forward
?
1
.0
f
:
-
1
.0
f
);
xkernel
.
setArg
<
cl_
in
t
>
(
2
,
forward
?
1
:
-
1
);
context
.
executeKernel
(
xkernel
,
xsize
*
ysize
*
zsize
,
min
(
xsize
,
(
int
)
maxSize
));
ykernel
.
setArg
<
cl
::
Buffer
>
(
0
,
in
.
getDeviceBuffer
());
ykernel
.
setArg
<
cl
::
Buffer
>
(
1
,
out
.
getDeviceBuffer
());
ykernel
.
setArg
<
cl_
floa
t
>
(
2
,
forward
?
1
.0
f
:
-
1
.0
f
);
ykernel
.
setArg
<
cl_
in
t
>
(
2
,
forward
?
1
:
-
1
);
context
.
executeKernel
(
ykernel
,
xsize
*
ysize
*
zsize
,
min
(
ysize
,
(
int
)
maxSize
));
}
...
...
@@ -99,23 +99,23 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source
<<
"int i = get_local_id(0);
\n
"
;
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"
float
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
float
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c3 = data"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c4 = data"
<<
input
<<
"[i+"
<<
(
4
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 d0 = c1+c4;
\n
"
;
source
<<
"
float
2 d1 = c2+c3;
\n
"
;
source
<<
"
float
2 d2 = "
<<
OpenCLExpressionUtilities
::
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c1-c4);
\n
"
;
source
<<
"
float
2 d3 = "
<<
OpenCLExpressionUtilities
::
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c2-c3);
\n
"
;
source
<<
"
float
2 d4 = d0+d1;
\n
"
;
source
<<
"
float
2 d5 = "
<<
OpenCLExpressionUtilities
::
doubleToString
(
0.25
*
sqrt
(
5.0
))
<<
"*(d0-d1);
\n
"
;
source
<<
"
float
2 d6 = c0-0.25f*d4;
\n
"
;
source
<<
"
float
2 d7 = d6+d5;
\n
"
;
source
<<
"
float
2 d8 = d6-d5;
\n
"
;
string
coeff
=
OpenCLExpressionUtilities
::
doubleToString
(
sin
(
0.2
*
M_PI
)
/
sin
(
0.4
*
M_PI
));
source
<<
"
float
2 d9 = sign*(
float
2) (d2.y+"
<<
coeff
<<
"*d3.y, -d2.x-"
<<
coeff
<<
"*d3.x);
\n
"
;
source
<<
"
float
2 d10 = sign*(
float
2) ("
<<
coeff
<<
"*d2.y-d3.y, d3.x-"
<<
coeff
<<
"*d2.x);
\n
"
;
source
<<
"
real
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
real
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c3 = data"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c4 = data"
<<
input
<<
"[i+"
<<
(
4
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 d0 = c1+c4;
\n
"
;
source
<<
"
real
2 d1 = c2+c3;
\n
"
;
source
<<
"
real
2 d2 = "
<<
context
.
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c1-c4);
\n
"
;
source
<<
"
real
2 d3 = "
<<
context
.
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c2-c3);
\n
"
;
source
<<
"
real
2 d4 = d0+d1;
\n
"
;
source
<<
"
real
2 d5 = "
<<
context
.
doubleToString
(
0.25
*
sqrt
(
5.0
))
<<
"*(d0-d1);
\n
"
;
source
<<
"
real
2 d6 = c0-0.25f*d4;
\n
"
;
source
<<
"
real
2 d7 = d6+d5;
\n
"
;
source
<<
"
real
2 d8 = d6-d5;
\n
"
;
string
coeff
=
context
.
doubleToString
(
sin
(
0.2
*
M_PI
)
/
sin
(
0.4
*
M_PI
));
source
<<
"
real
2 d9 = sign*(
real
2) (d2.y+"
<<
coeff
<<
"*d3.y, -d2.x-"
<<
coeff
<<
"*d3.x);
\n
"
;
source
<<
"
real
2 d10 = sign*(
real
2) ("
<<
coeff
<<
"*d2.y-d3.y, d3.x-"
<<
coeff
<<
"*d2.x);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+4*j*"
<<
m
<<
"] = c0+d4;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
5
*
L
)
<<
"], d7+d9);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8+d10);
\n
"
;
...
...
@@ -134,14 +134,14 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source
<<
"int i = get_local_id(0);
\n
"
;
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"
float
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
float
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c3 = data"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 d0 = c0+c2;
\n
"
;
source
<<
"
float
2 d1 = c0-c2;
\n
"
;
source
<<
"
float
2 d2 = c1+c3;
\n
"
;
source
<<
"
float
2 d3 = sign*(
float
2) (c1.y-c3.y, c3.x-c1.x);
\n
"
;
source
<<
"
real
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
real
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c3 = data"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 d0 = c0+c2;
\n
"
;
source
<<
"
real
2 d1 = c0-c2;
\n
"
;
source
<<
"
real
2 d2 = c1+c3;
\n
"
;
source
<<
"
real
2 d3 = sign*(
real
2) (c1.y-c3.y, c3.x-c1.x);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+3*j*"
<<
m
<<
"] = d0+d2;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
4
*
L
)
<<
"], d1+d3);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
4
*
L
)
<<
"], d0-d2);
\n
"
;
...
...
@@ -159,12 +159,12 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source
<<
"int i = get_local_id(0);
\n
"
;
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"
float
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
float
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 d0 = c1+c2;
\n
"
;
source
<<
"
float
2 d1 = c0-0.5f*d0;
\n
"
;
source
<<
"
float
2 d2 = sign*"
<<
OpenCLExpressionUtilities
::
doubleToString
(
sin
(
M_PI
/
3.0
))
<<
"*(
float
2) (c1.y-c2.y, c2.x-c1.x);
\n
"
;
source
<<
"
real
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
real
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 d0 = c1+c2;
\n
"
;
source
<<
"
real
2 d1 = c0-0.5f*d0;
\n
"
;
source
<<
"
real
2 d2 = sign*"
<<
context
.
doubleToString
(
sin
(
M_PI
/
3.0
))
<<
"*(
real
2) (c1.y-c2.y, c2.x-c1.x);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+2*j*"
<<
m
<<
"] = c0+d0;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(2*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
3
*
L
)
<<
"], d1+d2);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1-d2);
\n
"
;
...
...
@@ -181,15 +181,15 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source
<<
"int i = get_local_id(0);
\n
"
;
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"
float
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
float
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
real
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"data"
<<
output
<<
"[i+j*"
<<
m
<<
"] = c0+c1;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
2
*
L
)
<<
"], c0-c1);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
2
;
}
else
throw
OpenMMException
(
"Illegal size for FFT: "
+
OpenCLExpressionUtilities
::
intToString
(
zsize
));
throw
OpenMMException
(
"Illegal size for FFT: "
+
context
.
intToString
(
zsize
));
source
<<
"barrier(CLK_LOCAL_MEM_FENCE);
\n
"
;
source
<<
"}
\n
"
;
++
stage
;
...
...
@@ -205,16 +205,17 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source
<<
"out[y*(ZSIZE*XSIZE)+get_local_id(0)*XSIZE+x] = data"
<<
(
stage
%
2
)
<<
"[get_local_id(0)];
\n
"
;
source
<<
"barrier(CLK_GLOBAL_MEM_FENCE);"
;
map
<
string
,
string
>
replacements
;
replacements
[
"XSIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
xsize
);
replacements
[
"YSIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
ysize
);
replacements
[
"ZSIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
zsize
);
replacements
[
"M_PI"
]
=
OpenCLExpressionUtilities
::
doubleToString
(
M_PI
);
replacements
[
"XSIZE"
]
=
context
.
intToString
(
xsize
);
replacements
[
"YSIZE"
]
=
context
.
intToString
(
ysize
);
replacements
[
"ZSIZE"
]
=
context
.
intToString
(
zsize
);
replacements
[
"M_PI"
]
=
context
.
doubleToString
(
M_PI
);
replacements
[
"COMPUTE_FFT"
]
=
source
.
str
();
replacements
[
"LOOP_REQUIRED"
]
=
(
loopRequired
?
"1"
:
"0"
);
cl
::
Program
program
=
context
.
createProgram
(
context
.
replaceStrings
(
OpenCLKernelSources
::
fft
,
replacements
));
cl
::
Kernel
kernel
(
program
,
"execFFT"
);
kernel
.
setArg
(
3
,
zsize
*
sizeof
(
mm_float2
),
NULL
);
kernel
.
setArg
(
4
,
zsize
*
sizeof
(
mm_float2
),
NULL
);
kernel
.
setArg
(
5
,
zsize
*
sizeof
(
mm_float2
),
NULL
);
int
bufferSize
=
zsize
*
(
context
.
getUseDoublePrecision
()
?
sizeof
(
mm_double2
)
:
sizeof
(
mm_float2
));
kernel
.
setArg
(
3
,
bufferSize
,
NULL
);
kernel
.
setArg
(
4
,
bufferSize
,
NULL
);
kernel
.
setArg
(
5
,
bufferSize
,
NULL
);
return
kernel
;
}
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
View file @
c8dac206
...
...
@@ -559,8 +559,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// Create the CCMA kernels.
map
<
string
,
string
>
defines
;
defines
[
"NUM_CONSTRAINTS"
]
=
OpenCLExpressionUtilities
::
intToString
(
numCCMA
);
defines
[
"NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
numAtoms
);
defines
[
"NUM_CONSTRAINTS"
]
=
context
.
intToString
(
numCCMA
);
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
numAtoms
);
cl
::
Program
ccmaProgram
=
context
.
createProgram
(
OpenCLKernelSources
::
ccma
,
defines
);
ccmaDirectionsKernel
=
cl
::
Kernel
(
ccmaProgram
,
"computeConstraintDirections"
);
ccmaPosForceKernel
=
cl
::
Kernel
(
ccmaProgram
,
"computeConstraintForce"
);
...
...
@@ -630,9 +630,9 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// Create the kernels for virtual sites.
map
<
string
,
string
>
defines
;
defines
[
"NUM_2_AVERAGE"
]
=
OpenCLExpressionUtilities
::
intToString
(
num2Avg
);
defines
[
"NUM_3_AVERAGE"
]
=
OpenCLExpressionUtilities
::
intToString
(
num3Avg
);
defines
[
"NUM_OUT_OF_PLANE"
]
=
OpenCLExpressionUtilities
::
intToString
(
numOutOfPlane
);
defines
[
"NUM_2_AVERAGE"
]
=
context
.
intToString
(
num2Avg
);
defines
[
"NUM_3_AVERAGE"
]
=
context
.
intToString
(
num3Avg
);
defines
[
"NUM_OUT_OF_PLANE"
]
=
context
.
intToString
(
numOutOfPlane
);
cl
::
Program
vsiteProgram
=
context
.
createProgram
(
OpenCLKernelSources
::
virtualSites
,
defines
);
vsitePositionKernel
=
cl
::
Kernel
(
vsiteProgram
,
"computeVirtualSites"
);
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
context
.
getPosq
().
getDeviceBuffer
());
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
c8dac206
...
...
@@ -53,19 +53,6 @@ using namespace std;
using
Lepton
::
ExpressionTreeNode
;
using
Lepton
::
Operation
;
static
string
doubleToString
(
double
value
)
{
stringstream
s
;
s
.
precision
(
8
);
s
<<
scientific
<<
value
<<
"f"
;
return
s
.
str
();
}
static
string
intToString
(
int
value
)
{
stringstream
s
;
s
<<
value
;
return
s
.
str
();
}
static
void
setPosqCorrectionArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseMixedPrecision
())
kernel
.
setArg
<
cl
::
Buffer
>
(
index
,
cl
.
getPosqCorrection
().
getDeviceBuffer
());
...
...
@@ -73,6 +60,20 @@ static void setPosqCorrectionArg(OpenCLContext& cl, cl::Kernel& kernel, int inde
kernel
.
setArg
<
void
*>
(
index
,
NULL
);
}
static
void
setPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getPeriodicBoxSizeDouble
());
else
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getPeriodicBoxSize
());
}
static
void
setInvPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getInvPeriodicBoxSizeDouble
());
else
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getInvPeriodicBoxSize
());
}
static
bool
isZeroExpression
(
const
Lepton
::
ParsedExpression
&
expression
)
{
const
Lepton
::
Operation
&
op
=
expression
.
getRootNode
().
getOperation
();
if
(
op
.
getId
()
!=
Lepton
::
Operation
::
CONSTANT
)
...
...
@@ -124,11 +125,19 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
double
sum
=
0.0
f
;
if
(
includeEnergy
)
{
OpenCLArray
&
energyArray
=
cl
.
getEnergyBuffer
();
cl_float
*
energy
=
(
cl_float
*
)
cl
.
getPinnedBuffer
();
if
(
cl
.
getUseDoublePrecision
())
{
double
*
energy
=
(
double
*
)
cl
.
getPinnedBuffer
();
energyArray
.
download
(
energy
);
for
(
int
i
=
0
;
i
<
energyArray
.
getSize
();
i
++
)
sum
+=
energy
[
i
];
}
else
{
float
*
energy
=
(
float
*
)
cl
.
getPinnedBuffer
();
energyArray
.
download
(
energy
);
for
(
int
i
=
0
;
i
<
energyArray
.
getSize
();
i
++
)
sum
+=
energy
[
i
];
}
}
return
sum
;
}
...
...
@@ -401,7 +410,7 @@ void OpenCLApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
if
(
!
hasInitializedKernel
)
{
hasInitializedKernel
=
true
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
constraints
,
defines
);
applyDeltasKernel
=
cl
::
Kernel
(
program
,
"applyPositionDeltas"
);
applyDeltasKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
...
...
@@ -571,7 +580,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
Lepton
::
ParsedExpression
forceExpression
=
energyExpression
.
differentiate
(
"r"
).
optimize
();
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
expressions
[
"energy += "
]
=
energyExpression
;
expressions
[
"
float
dEdR = "
]
=
forceExpression
;
expressions
[
"
real
dEdR = "
]
=
forceExpression
;
// Create the kernels.
...
...
@@ -587,7 +596,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
string
argName
=
cl
.
getBondedUtilities
().
addArgument
(
globals
->
getDeviceBuffer
(),
"float"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
argName
+
"["
+
intToString
(
i
)
+
"]"
;
string
value
=
argName
+
"["
+
cl
.
intToString
(
i
)
+
"]"
;
variables
[
name
]
=
value
;
}
}
...
...
@@ -598,7 +607,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
compute
<<
buffer
.
getType
()
<<
" bondParams"
<<
(
i
+
1
)
<<
" = "
<<
argName
<<
"[index];
\n
"
;
}
vector
<
pair
<
string
,
string
>
>
functions
;
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
cl
.
replaceStrings
(
OpenCLKernelSources
::
bondForce
,
replacements
),
force
.
getForceGroup
());
...
...
@@ -796,7 +805,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
Lepton
::
ParsedExpression
forceExpression
=
energyExpression
.
differentiate
(
"theta"
).
optimize
();
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
expressions
[
"energy += "
]
=
energyExpression
;
expressions
[
"
float
dEdAngle = "
]
=
forceExpression
;
expressions
[
"
real
dEdAngle = "
]
=
forceExpression
;
// Create the kernels.
...
...
@@ -812,7 +821,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
string
argName
=
cl
.
getBondedUtilities
().
addArgument
(
globals
->
getDeviceBuffer
(),
"float"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
argName
+
"["
+
intToString
(
i
)
+
"]"
;
string
value
=
argName
+
"["
+
cl
.
intToString
(
i
)
+
"]"
;
variables
[
name
]
=
value
;
}
}
...
...
@@ -823,7 +832,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
compute
<<
buffer
.
getType
()
<<
" angleParams"
<<
(
i
+
1
)
<<
" = "
<<
argName
<<
"[index];
\n
"
;
}
vector
<
pair
<
string
,
string
>
>
functions
;
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
cl
.
replaceStrings
(
OpenCLKernelSources
::
angleForce
,
replacements
),
force
.
getForceGroup
());
...
...
@@ -1194,7 +1203,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
Lepton
::
ParsedExpression
forceExpression
=
energyExpression
.
differentiate
(
"theta"
).
optimize
();
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
expressions
[
"energy += "
]
=
energyExpression
;
expressions
[
"
float
dEdAngle = "
]
=
forceExpression
;
expressions
[
"
real
dEdAngle = "
]
=
forceExpression
;
// Create the kernels.
...
...
@@ -1210,7 +1219,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
string
argName
=
cl
.
getBondedUtilities
().
addArgument
(
globals
->
getDeviceBuffer
(),
"float"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
argName
+
"["
+
intToString
(
i
)
+
"]"
;
string
value
=
argName
+
"["
+
cl
.
intToString
(
i
)
+
"]"
;
variables
[
name
]
=
value
;
}
}
...
...
@@ -1221,7 +1230,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
compute
<<
buffer
.
getType
()
<<
" torsionParams"
<<
(
i
+
1
)
<<
" = "
<<
argName
<<
"[index];
\n
"
;
}
vector
<
pair
<
string
,
string
>
>
functions
;
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
cl
.
replaceStrings
(
OpenCLKernelSources
::
torsionForce
,
replacements
),
force
.
getForceGroup
());
...
...
@@ -1349,7 +1358,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
int
numParticles
=
force
.
getNumParticles
();
sigmaEpsilon
=
OpenCLArray
::
create
<
mm_float2
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"sigmaEpsilon"
);
vector
<
mm_float4
>
posq
(
cl
.
getPaddedNumAtoms
(),
mm_float4
(
0
,
0
,
0
,
0
));
vector
<
mm_float4
>
posqf
(
cl
.
getPaddedNumAtoms
());
vector
<
mm_double4
>
posqd
(
cl
.
getPaddedNumAtoms
());
vector
<
mm_float2
>
sigmaEpsilonVector
(
cl
.
getPaddedNumAtoms
());
vector
<
vector
<
int
>
>
exclusionList
(
numParticles
);
double
sumSquaredCharges
=
0.0
;
...
...
@@ -1358,7 +1368,10 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
double
charge
,
sigma
,
epsilon
;
force
.
getParticleParameters
(
i
,
charge
,
sigma
,
epsilon
);
posq
[
i
].
w
=
(
float
)
charge
;
if
(
cl
.
getUseDoublePrecision
())
posqd
[
i
]
=
mm_double4
(
0
,
0
,
0
,
charge
);
else
posqf
[
i
]
=
mm_float4
(
0
,
0
,
0
,
(
float
)
charge
);
sigmaEpsilonVector
[
i
]
=
mm_float2
((
float
)
(
0.5
*
sigma
),
(
float
)
(
2.0
*
sqrt
(
epsilon
)));
exclusionList
[
i
].
push_back
(
i
);
sumSquaredCharges
+=
charge
*
charge
;
...
...
@@ -1371,7 +1384,10 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
exclusionList
[
exclusions
[
i
].
first
].
push_back
(
exclusions
[
i
].
second
);
exclusionList
[
exclusions
[
i
].
second
].
push_back
(
exclusions
[
i
].
first
);
}
cl
.
getPosq
().
upload
(
posq
);
if
(
cl
.
getUseDoublePrecision
())
cl
.
getPosq
().
upload
(
posqd
);
else
cl
.
getPosq
().
upload
(
posqf
);
sigmaEpsilon
->
upload
(
sigmaEpsilonVector
);
bool
useCutoff
=
(
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
NoCutoff
);
bool
usePeriodic
=
(
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
CutoffNonPeriodic
);
...
...
@@ -1383,8 +1399,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
double
reactionFieldK
=
pow
(
force
.
getCutoffDistance
(),
-
3.0
)
*
(
force
.
getReactionFieldDielectric
()
-
1.0
)
/
(
2.0
*
force
.
getReactionFieldDielectric
()
+
1.0
);
double
reactionFieldC
=
(
1.0
/
force
.
getCutoffDistance
())
*
(
3.0
*
force
.
getReactionFieldDielectric
())
/
(
2.0
*
force
.
getReactionFieldDielectric
()
+
1.0
);
defines
[
"REACTION_FIELD_K"
]
=
doubleToString
(
reactionFieldK
);
defines
[
"REACTION_FIELD_C"
]
=
doubleToString
(
reactionFieldC
);
defines
[
"REACTION_FIELD_K"
]
=
cl
.
doubleToString
(
reactionFieldK
);
defines
[
"REACTION_FIELD_C"
]
=
cl
.
doubleToString
(
reactionFieldC
);
}
if
(
force
.
getUseDispersionCorrection
()
&&
cl
.
getContextIndex
()
==
0
)
dispersionCoefficient
=
NonbondedForceImpl
::
calcDispersionCorrection
(
system
,
force
);
...
...
@@ -1396,23 +1412,24 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
int
kmaxx
,
kmaxy
,
kmaxz
;
NonbondedForceImpl
::
calcEwaldParameters
(
system
,
force
,
alpha
,
kmaxx
,
kmaxy
,
kmaxz
);
defines
[
"EWALD_ALPHA"
]
=
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"EWALD_ALPHA"
]
=
cl
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
cl
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
(
cl
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
);
// Create the reciprocal space kernels.
map
<
string
,
string
>
replacements
;
replacements
[
"NUM_ATOMS"
]
=
intToString
(
numParticles
);
replacements
[
"KMAX_X"
]
=
intToString
(
kmaxx
);
replacements
[
"KMAX_Y"
]
=
intToString
(
kmaxy
);
replacements
[
"KMAX_Z"
]
=
intToString
(
kmaxz
);
replacements
[
"EXP_COEFFICIENT"
]
=
doubleToString
(
-
1.0
/
(
4.0
*
alpha
*
alpha
));
replacements
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
numParticles
);
replacements
[
"KMAX_X"
]
=
cl
.
intToString
(
kmaxx
);
replacements
[
"KMAX_Y"
]
=
cl
.
intToString
(
kmaxy
);
replacements
[
"KMAX_Z"
]
=
cl
.
intToString
(
kmaxz
);
replacements
[
"EXP_COEFFICIENT"
]
=
cl
.
doubleToString
(
-
1.0
/
(
4.0
*
alpha
*
alpha
));
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
ewald
,
replacements
);
ewaldSumsKernel
=
cl
::
Kernel
(
program
,
"calculateEwaldCosSinSums"
);
ewaldForcesKernel
=
cl
::
Kernel
(
program
,
"calculateEwaldForces"
);
cosSinSums
=
OpenCLArray
::
create
<
mm_float2
>
(
cl
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
"cosSinSums"
);
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double2
)
:
sizeof
(
mm_float2
));
cosSinSums
=
new
OpenCLArray
(
cl
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
elementSize
,
"cosSinSums"
);
}
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
)
{
// Compute the PME parameters.
...
...
@@ -1422,30 +1439,31 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
gridSizeX
=
OpenCLFFT3D
::
findLegalDimension
(
gridSizeX
);
gridSizeY
=
OpenCLFFT3D
::
findLegalDimension
(
gridSizeY
);
gridSizeZ
=
OpenCLFFT3D
::
findLegalDimension
(
gridSizeZ
);
defines
[
"EWALD_ALPHA"
]
=
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"EWALD_ALPHA"
]
=
cl
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
cl
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
(
cl
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
);
pmeDefines
[
"PME_ORDER"
]
=
intToString
(
PmeOrder
);
pmeDefines
[
"NUM_ATOMS"
]
=
intToString
(
numParticles
);
pmeDefines
[
"RECIP_EXP_FACTOR"
]
=
doubleToString
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
pmeDefines
[
"GRID_SIZE_X"
]
=
intToString
(
gridSizeX
);
pmeDefines
[
"GRID_SIZE_Y"
]
=
intToString
(
gridSizeY
);
pmeDefines
[
"GRID_SIZE_Z"
]
=
intToString
(
gridSizeZ
);
pmeDefines
[
"EPSILON_FACTOR"
]
=
doubleToString
(
sqrt
(
ONE_4PI_EPS0
));
pmeDefines
[
"PME_ORDER"
]
=
cl
.
intToString
(
PmeOrder
);
pmeDefines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
numParticles
);
pmeDefines
[
"RECIP_EXP_FACTOR"
]
=
cl
.
doubleToString
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
pmeDefines
[
"GRID_SIZE_X"
]
=
cl
.
intToString
(
gridSizeX
);
pmeDefines
[
"GRID_SIZE_Y"
]
=
cl
.
intToString
(
gridSizeY
);
pmeDefines
[
"GRID_SIZE_Z"
]
=
cl
.
intToString
(
gridSizeZ
);
pmeDefines
[
"EPSILON_FACTOR"
]
=
cl
.
doubleToString
(
sqrt
(
ONE_4PI_EPS0
));
// Create required data structures.
pmeGrid
=
OpenCLArray
::
create
<
mm_float2
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
"pmeGrid"
);
cl
.
addAutoclearBuffer
(
pmeGrid
->
getDeviceBuffer
(),
pmeGrid
->
getSize
()
*
2
);
pmeGrid2
=
OpenCLArray
::
create
<
mm_float2
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
"pmeGrid2"
);
pmeBsplineModuliX
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
gridSizeX
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
gridSizeY
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
gridSizeZ
,
"pmeBsplineModuliZ"
);
pmeBsplineTheta
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
PmeOrder
*
numParticles
,
"pmeBsplineTheta"
);
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
pmeGrid
=
new
OpenCLArray
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid"
);
cl
.
addAutoclearBuffer
(
*
pmeGrid
);
pmeGrid2
=
new
OpenCLArray
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid2"
);
pmeBsplineModuliX
=
new
OpenCLArray
(
cl
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
OpenCLArray
(
cl
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
OpenCLArray
(
cl
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeBsplineTheta
=
new
OpenCLArray
(
cl
,
PmeOrder
*
numParticles
,
4
*
elementSize
,
"pmeBsplineTheta"
);
bool
deviceIsCpu
=
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
if
(
deviceIsCpu
)
pmeBsplineDTheta
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
PmeOrder
*
numParticles
,
"pmeBsplineDTheta"
);
pmeBsplineDTheta
=
new
OpenCLArray
(
cl
,
PmeOrder
*
numParticles
,
4
*
elementSize
,
"pmeBsplineDTheta"
);
pmeAtomRange
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
OpenCLArray
::
create
<
mm_int2
>
(
cl
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
OpenCLSort
<
SortTrait
>
(
cl
,
cl
.
getNumAtoms
());
...
...
@@ -1487,7 +1505,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
for
(
int
dim
=
0
;
dim
<
3
;
dim
++
)
{
int
ndata
=
(
dim
==
0
?
gridSizeX
:
dim
==
1
?
gridSizeY
:
gridSizeZ
);
vector
<
cl_
float
>
moduli
(
ndata
);
vector
<
cl_
double
>
moduli
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
double
sc
=
0.0
;
double
ss
=
0.0
;
...
...
@@ -1503,6 +1521,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if
(
moduli
[
i
]
<
1.0e-7
)
moduli
[
i
]
=
(
moduli
[
i
-
1
]
+
moduli
[
i
+
1
])
*
0.5
f
;
}
if
(
cl
.
getUseDoublePrecision
())
{
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
moduli
);
else
if
(
dim
==
1
)
...
...
@@ -1510,6 +1529,18 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
else
pmeBsplineModuliZ
->
upload
(
moduli
);
}
else
{
vector
<
float
>
modulif
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
modulif
[
i
]
=
(
float
)
moduli
[
i
];
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
modulif
);
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
modulif
);
else
pmeBsplineModuliZ
->
upload
(
modulif
);
}
}
}
else
ewaldSelfEnergy
=
0.0
;
...
...
@@ -1568,9 +1599,10 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeSpreadChargeKernel
=
cl
::
Kernel
(
program
,
"gridSpreadCharge"
);
pmeConvolutionKernel
=
cl
::
Kernel
(
program
,
"reciprocalConvolution"
);
pmeInterpolateForceKernel
=
cl
::
Kernel
(
program
,
"gridInterpolateForce"
);
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
pmeUpdateBsplinesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeUpdateBsplinesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
pmeBsplineTheta
->
getDeviceBuffer
());
pmeUpdateBsplinesKernel
.
setArg
(
2
,
OpenCLContext
::
ThreadBlockSize
*
PmeOrder
*
sizeof
(
mm_float4
)
,
NULL
);
pmeUpdateBsplinesKernel
.
setArg
(
2
,
OpenCLContext
::
ThreadBlockSize
*
PmeOrder
*
elementSize
,
NULL
);
pmeUpdateBsplinesKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pmeAtomGridIndex
->
getDeviceBuffer
());
if
(
deviceIsCpu
)
pmeUpdateBsplinesKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
pmeBsplineDTheta
->
getDeviceBuffer
());
...
...
@@ -1591,7 +1623,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeBsplineModuliX
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pmeBsplineModuliY
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
pmeBsplineModuliZ
->
getDeviceBuffer
());
interpolateForceThreads
=
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_LOCAL_MEM_SIZE
>
()
>
2
*
128
*
PmeOrder
*
sizeof
(
mm_float4
)
?
128
:
64
);
interpolateForceThreads
=
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_LOCAL_MEM_SIZE
>
()
>
2
*
128
*
PmeOrder
*
elementSize
?
128
:
64
);
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getForceBuffers
().
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeGrid
->
getDeviceBuffer
());
...
...
@@ -1600,7 +1632,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
pmeBsplineDTheta
->
getDeviceBuffer
());
}
else
pmeInterpolateForceKernel
.
setArg
(
5
,
2
*
interpolateForceThreads
*
PmeOrder
*
sizeof
(
mm_float4
)
,
NULL
);
pmeInterpolateForceKernel
.
setArg
(
5
,
2
*
interpolateForceThreads
*
PmeOrder
*
elementSize
,
NULL
);
if
(
cl
.
getSupports64BitGlobalAtomics
())
{
pmeFinishSpreadChargeKernel
=
cl
::
Kernel
(
program
,
"finishSpreadCharge"
);
pmeFinishSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeGrid
->
getDeviceBuffer
());
...
...
@@ -1608,57 +1640,68 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
}
}
if
(
cosSinSums
!=
NULL
&&
cl
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
mm_float4
boxSize
=
cl
.
getPeriodicBoxSize
();
mm_float4
recipBoxSize
=
mm_float4
((
float
)
(
2
*
M_PI
/
boxSize
.
x
),
(
float
)
(
2
*
M_PI
/
boxSize
.
y
),
(
float
)
(
2
*
M_PI
/
boxSize
.
z
),
0
);
float
recipCoefficient
=
(
float
)
(
ONE_4PI_EPS0
*
4
*
M_PI
/
(
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
));
ewaldSumsKernel
.
setArg
<
mm_float4
>
(
3
,
recipBoxSize
);
ewaldSumsKernel
.
setArg
<
cl_float
>
(
4
,
recipCoefficient
);
mm_double4
boxSize
=
cl
.
getPeriodicBoxSizeDouble
();
mm_double4
recipBoxSize
=
mm_double4
(
2
*
M_PI
/
boxSize
.
x
,
2
*
M_PI
/
boxSize
.
y
,
2
*
M_PI
/
boxSize
.
z
,
0.0
);
double
recipCoefficient
=
ONE_4PI_EPS0
*
4
*
M_PI
/
(
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
if
(
cl
.
getUseDoublePrecision
())
{
ewaldSumsKernel
.
setArg
<
mm_double4
>
(
3
,
recipBoxSize
);
ewaldSumsKernel
.
setArg
<
cl_double
>
(
4
,
recipCoefficient
);
ewaldForcesKernel
.
setArg
<
mm_double4
>
(
3
,
recipBoxSize
);
ewaldForcesKernel
.
setArg
<
cl_double
>
(
4
,
recipCoefficient
);
}
else
{
ewaldSumsKernel
.
setArg
<
mm_float4
>
(
3
,
mm_float4
((
float
)
recipBoxSize
.
x
,
(
float
)
recipBoxSize
.
y
,
(
float
)
recipBoxSize
.
z
,
0
));
ewaldSumsKernel
.
setArg
<
cl_float
>
(
4
,
(
cl_float
)
recipCoefficient
);
ewaldForcesKernel
.
setArg
<
mm_float4
>
(
3
,
mm_float4
((
float
)
recipBoxSize
.
x
,
(
float
)
recipBoxSize
.
y
,
(
float
)
recipBoxSize
.
z
,
0
));
ewaldForcesKernel
.
setArg
<
cl_float
>
(
4
,
(
cl_float
)
recipCoefficient
);
}
cl
.
executeKernel
(
ewaldSumsKernel
,
cosSinSums
->
getSize
());
ewaldForcesKernel
.
setArg
<
mm_float4
>
(
3
,
recipBoxSize
);
ewaldForcesKernel
.
setArg
<
cl_float
>
(
4
,
recipCoefficient
);
cl
.
executeKernel
(
ewaldForcesKernel
,
cl
.
getNumAtoms
());
}
if
(
pmeGrid
!=
NULL
&&
cl
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
mm_float4
boxSize
=
cl
.
getPeriodicBoxSize
();
mm_float4
invBoxSize
=
cl
.
getInvPeriodicBoxSize
();
pmeUpdateBsplinesKernel
.
setArg
<
mm_float4
>
(
4
,
boxSize
);
pmeUpdateBsplinesKernel
.
setArg
<
mm_float4
>
(
5
,
invBoxSize
);
setPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
4
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
5
);
cl
.
executeKernel
(
pmeUpdateBsplinesKernel
,
cl
.
getNumAtoms
());
if
(
deviceIsCpu
)
{
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
5
,
boxSize
);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
6
,
invBoxSize
);
setPeriodicBoxSizeArg
(
cl
,
pmeSpreadChargeKernel
,
5
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeSpreadChargeKernel
,
6
);
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
2
*
cl
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
(),
1
);
}
else
{
sort
->
sort
(
*
pmeAtomGridIndex
);
pmeAtomRangeKernel
.
setArg
<
mm_float4
>
(
3
,
boxSize
);
pmeAtomRangeKernel
.
setArg
<
mm_float4
>
(
4
,
invBoxSize
);
setPeriodicBoxSizeArg
(
cl
,
pmeAtomRangeKernel
,
3
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeAtomRangeKernel
,
4
);
cl
.
executeKernel
(
pmeAtomRangeKernel
,
cl
.
getNumAtoms
());
if
(
cl
.
getSupports64BitGlobalAtomics
())
{
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
5
,
boxSize
);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
6
,
invBoxSize
);
setPeriodicBoxSizeArg
(
cl
,
pmeSpreadChargeKernel
,
5
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeSpreadChargeKernel
,
6
);
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
(),
PmeOrder
*
PmeOrder
*
PmeOrder
);
cl
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
pmeGrid
->
getSize
());
}
else
{
pmeZIndexKernel
.
setArg
<
mm_float4
>
(
2
,
boxSize
);
pmeZIndexKernel
.
setArg
<
mm_float4
>
(
3
,
invBoxSize
);
setPeriodicBoxSizeArg
(
cl
,
pmeZIndexKernel
,
2
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeZIndexKernel
,
3
);
cl
.
executeKernel
(
pmeZIndexKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
());
}
}
fft
->
execFFT
(
*
pmeGrid
,
*
pmeGrid2
,
true
);
pmeConvolutionKernel
.
setArg
<
mm_float4
>
(
5
,
invBoxSize
);
pmeConvolutionKernel
.
setArg
<
cl_float
>
(
6
,
(
float
)
(
1.0
/
(
M_PI
*
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
)));
setInvPeriodicBoxSizeArg
(
cl
,
pmeConvolutionKernel
,
5
);
mm_double4
boxSize
=
cl
.
getPeriodicBoxSizeDouble
();
double
scaleFactor
=
1.0
/
(
M_PI
*
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
if
(
cl
.
getUseDoublePrecision
())
pmeConvolutionKernel
.
setArg
<
cl_double
>
(
6
,
scaleFactor
);
else
pmeConvolutionKernel
.
setArg
<
cl_float
>
(
6
,
(
float
)
scaleFactor
);
cl
.
executeKernel
(
pmeConvolutionKernel
,
cl
.
getNumAtoms
());
fft
->
execFFT
(
*
pmeGrid2
,
*
pmeGrid
,
false
);
pmeInterpolateForceKernel
.
setArg
<
mm_float4
>
(
3
,
boxSize
);
pmeInterpolateForceKernel
.
setArg
<
mm_float4
>
(
4
,
invBoxSize
);
setPeriodicBoxSizeArg
(
cl
,
pmeInterpolateForceKernel
,
3
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeInterpolateForceKernel
,
4
);
cl
.
executeKernel
(
pmeInterpolateForceKernel
,
cl
.
getNumAtoms
(),
interpolateForceThreads
);
}
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
if
(
dispersionCoefficient
!=
0.0
&&
includeDirect
)
{
mm_
float
4
boxSize
=
cl
.
getPeriodicBoxSize
();
mm_
double
4
boxSize
=
cl
.
getPeriodicBoxSize
Double
();
energy
+=
dispersionCoefficient
/
(
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
}
return
energy
;
...
...
@@ -1697,8 +1740,9 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
// Record the per-particle parameters.
OpenCLArray
&
posq
=
cl
.
getPosq
();
posq
.
download
(
(
mm_float4
*
)
cl
.
getPinnedBuffer
());
posq
.
download
(
cl
.
getPinnedBuffer
());
mm_float4
*
posqf
=
(
mm_float4
*
)
cl
.
getPinnedBuffer
();
mm_double4
*
posqd
=
(
mm_double4
*
)
cl
.
getPinnedBuffer
();
vector
<
mm_float2
>
sigmaEpsilonVector
(
cl
.
getPaddedNumAtoms
());
double
sumSquaredCharges
=
0.0
;
const
vector
<
cl_int
>&
order
=
cl
.
getAtomIndex
();
...
...
@@ -1706,6 +1750,9 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
int
index
=
order
[
i
];
double
charge
,
sigma
,
epsilon
;
force
.
getParticleParameters
(
index
,
charge
,
sigma
,
epsilon
);
if
(
cl
.
getUseDoublePrecision
())
posqd
[
i
].
w
=
charge
;
else
posqf
[
i
].
w
=
(
float
)
charge
;
sigmaEpsilonVector
[
index
]
=
mm_float2
((
float
)
(
0.5
*
sigma
),
(
float
)
(
2.0
*
sqrt
(
epsilon
)));
sumSquaredCharges
+=
charge
*
charge
;
...
...
@@ -1782,7 +1829,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
int
forceIndex
;
for
(
forceIndex
=
0
;
forceIndex
<
system
.
getNumForces
()
&&
&
system
.
getForce
(
forceIndex
)
!=
&
force
;
++
forceIndex
)
;
string
prefix
=
"custom"
+
intToString
(
forceIndex
)
+
"_"
;
string
prefix
=
"custom"
+
cl
.
intToString
(
forceIndex
)
+
"_"
;
// Record parameters and exclusions.
...
...
@@ -1819,11 +1866,11 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
vector
<
double
>
values
;
double
min
,
max
;
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
string
arrayName
=
prefix
+
"table"
+
intToString
(
i
);
string
arrayName
=
prefix
+
"table"
+
cl
.
intToString
(
i
);
functionDefinitions
.
push_back
(
make_pair
(
name
,
arrayName
));
functions
[
name
]
=
&
fp
;
tabulatedFunctionParamsVec
[
i
]
=
mm_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
vector
<
mm_float4
>
f
=
OpenCL
ExpressionUtilities
::
computeFunctionCoefficients
(
values
,
min
,
max
);
vector
<
mm_float4
>
f
=
cl
.
get
ExpressionUtilities
().
computeFunctionCoefficients
(
values
,
min
,
max
);
tabulatedFunctions
.
push_back
(
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
arrayName
,
"float"
,
4
,
sizeof
(
cl_float4
),
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
getDeviceBuffer
()));
...
...
@@ -1866,18 +1913,18 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
}
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
"globals["
+
intToString
(
i
)
+
"]"
;
string
value
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
variables
.
push_back
(
makeVariable
(
name
,
prefix
+
value
));
}
stringstream
compute
;
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
prefix
+
"temp"
,
prefix
+
"functionParams"
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
prefix
+
"temp"
,
prefix
+
"functionParams"
);
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
string
source
=
cl
.
replaceStrings
(
OpenCLKernelSources
::
customNonbonded
,
replacements
);
cl
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
());
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"params"
+
intToString
(
i
+
1
),
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"params"
+
cl
.
intToString
(
i
+
1
),
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
if
(
globals
!=
NULL
)
{
globals
->
upload
(
globalParamValues
);
...
...
@@ -1965,14 +2012,14 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
longBornSum
=
OpenCLArray
::
create
<
cl_long
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"longBornSum"
);
longBornForce
=
OpenCLArray
::
create
<
cl_long
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"longBornForce"
);
bornForce
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"bornForce"
);
cl
.
addAutoclearBuffer
(
longBornSum
->
getDeviceBuffer
(),
2
*
longBornSum
->
getSize
()
);
cl
.
addAutoclearBuffer
(
longBornForce
->
getDeviceBuffer
(),
2
*
longBornForce
->
getSize
()
);
cl
.
addAutoclearBuffer
(
*
longBornSum
);
cl
.
addAutoclearBuffer
(
*
longBornForce
);
}
else
{
bornSum
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
()
*
nb
.
getNumForceBuffers
(),
"bornSum"
);
bornForce
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
()
*
nb
.
getNumForceBuffers
(),
"bornForce"
);
cl
.
addAutoclearBuffer
(
bornSum
->
getDeviceBuffer
(),
bornSum
->
getSize
()
);
cl
.
addAutoclearBuffer
(
bornForce
->
getDeviceBuffer
(),
bornForce
->
getSize
()
);
cl
.
addAutoclearBuffer
(
*
bornSum
);
cl
.
addAutoclearBuffer
(
*
bornForce
);
}
vector
<
mm_float4
>
posq
(
cl
.
getPaddedNumAtoms
(),
mm_float4
(
0
,
0
,
0
,
0
));
int
numParticles
=
force
.
getNumParticles
();
...
...
@@ -2012,12 +2059,12 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
defines
[
"USE_CUTOFF"
]
=
"1"
;
if
(
nb
.
getUsePeriodic
())
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines
[
"CUTOFF_SQUARED"
]
=
doubleToString
(
nb
.
getCutoffDistance
()
*
nb
.
getCutoffDistance
());
defines
[
"PREFACTOR"
]
=
doubleToString
(
prefactor
);
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNumAtomBlocks
());
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
nb
.
getForceThreadBlockSize
());
defines
[
"CUTOFF_SQUARED"
]
=
cl
.
doubleToString
(
nb
.
getCutoffDistance
()
*
nb
.
getCutoffDistance
());
defines
[
"PREFACTOR"
]
=
cl
.
doubleToString
(
prefactor
);
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
cl
.
intToString
(
cl
.
getNumAtomBlocks
());
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
cl
.
intToString
(
nb
.
getForceThreadBlockSize
());
string
platformVendor
=
cl
::
Platform
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_PLATFORM
>
()).
getInfo
<
CL_PLATFORM_VENDOR
>
();
if
(
platformVendor
==
"Apple"
)
defines
[
"USE_APPLE_WORKAROUND"
]
=
"1"
;
...
...
@@ -2220,7 +2267,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
int
forceIndex
;
for
(
forceIndex
=
0
;
forceIndex
<
system
.
getNumForces
()
&&
&
system
.
getForce
(
forceIndex
)
!=
&
force
;
++
forceIndex
)
;
string
prefix
=
"custom"
+
intToString
(
forceIndex
)
+
"_"
;
string
prefix
=
"custom"
+
cl
.
intToString
(
forceIndex
)
+
"_"
;
// Record parameters and exclusions.
...
...
@@ -2259,11 +2306,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
vector
<
double
>
values
;
double
min
,
max
;
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
string
arrayName
=
prefix
+
"table"
+
intToString
(
i
);
string
arrayName
=
prefix
+
"table"
+
cl
.
intToString
(
i
);
functionDefinitions
.
push_back
(
make_pair
(
name
,
arrayName
));
functions
[
name
]
=
&
fp
;
tabulatedFunctionParamsVec
[
i
]
=
mm_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
vector
<
mm_float4
>
f
=
OpenCL
ExpressionUtilities
::
computeFunctionCoefficients
(
values
,
min
,
max
);
vector
<
mm_float4
>
f
=
cl
.
get
ExpressionUtilities
().
computeFunctionCoefficients
(
values
,
min
,
max
);
tabulatedFunctions
.
push_back
(
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
arrayName
,
"float"
,
4
,
sizeof
(
cl_float4
),
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
getDeviceBuffer
()));
...
...
@@ -2356,7 +2403,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
}
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
"globals["
+
intToString
(
i
)
+
"]"
;
string
value
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
variables
.
push_back
(
makeVariable
(
name
,
value
));
}
map
<
string
,
Lepton
::
ParsedExpression
>
n2ValueExpressions
;
...
...
@@ -2364,7 +2411,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
Lepton
::
ParsedExpression
ex
=
Lepton
::
Parser
::
parse
(
computedValueExpressions
[
0
],
functions
).
optimize
();
n2ValueExpressions
[
"tempValue1 = "
]
=
ex
;
n2ValueExpressions
[
"tempValue2 = "
]
=
ex
.
renameVariables
(
rename
);
n2ValueSource
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
n2ValueExpressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
n2ValueSource
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
n2ValueExpressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
map
<
string
,
string
>
replacements
;
string
n2ValueStr
=
n2ValueSource
.
str
();
replacements
[
"COMPUTE_VALUE"
]
=
n2ValueStr
;
...
...
@@ -2374,7 +2421,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
pairValueUsesParam
.
resize
(
params
->
getBuffers
().
size
(),
false
);
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
string
paramName
=
"params"
+
cl
.
intToString
(
i
+
1
);
if
(
n2ValueStr
.
find
(
paramName
+
"1"
)
!=
n2ValueStr
.
npos
||
n2ValueStr
.
find
(
paramName
+
"2"
)
!=
n2ValueStr
.
npos
)
{
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict global_"
<<
paramName
<<
", __local "
<<
buffer
.
getType
()
<<
"* restrict local_"
<<
paramName
;
loadLocal1
<<
"local_"
<<
paramName
<<
"[localAtomIndex] = "
<<
paramName
<<
"1;
\n
"
;
...
...
@@ -2399,11 +2446,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if
(
useExclusionsForValue
)
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
if
(
cl
.
getSIMDWidth
()
==
32
)
defines
[
"WARPS_PER_GROUP"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
/
OpenCLContext
::
TileSize
);
defines
[
"CUTOFF_SQUARED"
]
=
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
());
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNumAtomBlocks
());
defines
[
"WARPS_PER_GROUP"
]
=
cl
.
intToString
(
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
/
OpenCLContext
::
TileSize
);
defines
[
"CUTOFF_SQUARED"
]
=
cl
.
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
cl
.
intToString
(
cl
.
getNumAtomBlocks
());
string
file
;
if
(
deviceIsCpu
)
file
=
OpenCLKernelSources
::
customGBValueN2_cpu
;
...
...
@@ -2424,12 +2471,12 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
extraArgs
<<
", __global const float* globals"
;
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
string
paramName
=
"params"
+
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict "
<<
paramName
;
}
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
valueName
=
"values"
+
intToString
(
i
+
1
);
string
valueName
=
"values"
+
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* restrict global_"
<<
valueName
;
reductionSource
<<
buffer
.
getType
()
<<
" local_"
<<
valueName
<<
";
\n
"
;
}
...
...
@@ -2441,22 +2488,22 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for
(
int
i
=
0
;
i
<
force
.
getNumPerParticleParameters
();
i
++
)
variables
[
force
.
getPerParticleParameterName
(
i
)]
=
"params"
+
params
->
getParameterSuffix
(
i
,
"[index]"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
variables
[
force
.
getGlobalParameterName
(
i
)]
=
"globals["
+
intToString
(
i
)
+
"]"
;
variables
[
force
.
getGlobalParameterName
(
i
)]
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
variables
[
computedValueNames
[
i
-
1
]]
=
"local_values"
+
computedValues
->
getParameterSuffix
(
i
-
1
);
map
<
string
,
Lepton
::
ParsedExpression
>
valueExpressions
;
valueExpressions
[
"local_values"
+
computedValues
->
getParameterSuffix
(
i
)
+
" = "
]
=
Lepton
::
Parser
::
parse
(
computedValueExpressions
[
i
],
functions
).
optimize
();
reductionSource
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
valueExpressions
,
variables
,
functionDefinitions
,
"value"
+
intToString
(
i
)
+
"_temp"
,
prefix
+
"functionParams"
);
reductionSource
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
valueExpressions
,
variables
,
functionDefinitions
,
"value"
+
cl
.
intToString
(
i
)
+
"_temp"
,
prefix
+
"functionParams"
);
}
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
string
valueName
=
"values"
+
intToString
(
i
+
1
);
string
valueName
=
"values"
+
cl
.
intToString
(
i
+
1
);
reductionSource
<<
"global_"
<<
valueName
<<
"[index] = local_"
<<
valueName
<<
";
\n
"
;
}
map
<
string
,
string
>
replacements
;
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
replacements
[
"COMPUTE_VALUES"
]
=
reductionSource
.
str
();
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
replaceStrings
(
OpenCLKernelSources
::
customGBValuePerParticle
,
replacements
),
defines
);
perParticleValueKernel
=
cl
::
Kernel
(
program
,
"computePerParticleValues"
);
}
...
...
@@ -2478,7 +2525,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
variables
.
push_back
(
makeVariable
(
computedValueNames
[
i
]
+
"2"
,
"values"
+
computedValues
->
getParameterSuffix
(
i
,
"2"
)));
}
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
variables
.
push_back
(
makeVariable
(
force
.
getGlobalParameterName
(
i
),
"globals["
+
intToString
(
i
)
+
"]"
));
variables
.
push_back
(
makeVariable
(
force
.
getGlobalParameterName
(
i
),
"globals["
+
cl
.
intToString
(
i
)
+
"]"
));
stringstream
n2EnergySource
;
bool
anyExclusions
=
(
force
.
getNumExclusions
()
>
0
);
for
(
int
i
=
0
;
i
<
force
.
getNumEnergyTerms
();
i
++
)
{
...
...
@@ -2494,23 +2541,23 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if
(
useLong
)
{
for
(
int
j
=
0
;
j
<
force
.
getNumComputedValues
();
j
++
)
{
if
(
needChainForValue
[
j
])
{
string
index
=
intToString
(
j
+
1
);
n2EnergyExpressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ deriv"
+
index
+
"_1 += "
]
=
energyDerivExpressions
[
i
][
2
*
j
];
n2EnergyExpressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ deriv"
+
index
+
"_2 += "
]
=
energyDerivExpressions
[
i
][
2
*
j
+
1
];
string
index
=
cl
.
intToString
(
j
+
1
);
n2EnergyExpressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ deriv"
+
index
+
"_1 += "
]
=
energyDerivExpressions
[
i
][
2
*
j
];
n2EnergyExpressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ deriv"
+
index
+
"_2 += "
]
=
energyDerivExpressions
[
i
][
2
*
j
+
1
];
}
}
}
else
{
for
(
int
j
=
0
;
j
<
force
.
getNumComputedValues
();
j
++
)
{
if
(
needChainForValue
[
j
])
{
n2EnergyExpressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ deriv"
+
energyDerivs
->
getParameterSuffix
(
j
,
"_1"
)
+
" += "
]
=
energyDerivExpressions
[
i
][
2
*
j
];
n2EnergyExpressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ deriv"
+
energyDerivs
->
getParameterSuffix
(
j
,
"_2"
)
+
" += "
]
=
energyDerivExpressions
[
i
][
2
*
j
+
1
];
n2EnergyExpressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ deriv"
+
energyDerivs
->
getParameterSuffix
(
j
,
"_1"
)
+
" += "
]
=
energyDerivExpressions
[
i
][
2
*
j
];
n2EnergyExpressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ deriv"
+
energyDerivs
->
getParameterSuffix
(
j
,
"_2"
)
+
" += "
]
=
energyDerivExpressions
[
i
][
2
*
j
+
1
];
}
}
}
if
(
exclude
)
n2EnergySource
<<
"if (!isExcluded) {
\n
"
;
n2EnergySource
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
n2EnergyExpressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
n2EnergySource
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
n2EnergyExpressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
if
(
exclude
)
n2EnergySource
<<
"}
\n
"
;
}
...
...
@@ -2523,7 +2570,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
pairEnergyUsesParam
.
resize
(
params
->
getBuffers
().
size
(),
false
);
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
string
paramName
=
"params"
+
cl
.
intToString
(
i
+
1
);
if
(
n2EnergyStr
.
find
(
paramName
+
"1"
)
!=
n2EnergyStr
.
npos
||
n2EnergyStr
.
find
(
paramName
+
"2"
)
!=
n2EnergyStr
.
npos
)
{
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict global_"
<<
paramName
<<
", __local "
<<
buffer
.
getType
()
<<
"* restrict local_"
<<
paramName
;
loadLocal1
<<
"local_"
<<
paramName
<<
"[localAtomIndex] = "
<<
paramName
<<
"1;
\n
"
;
...
...
@@ -2536,7 +2583,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
pairEnergyUsesValue
.
resize
(
computedValues
->
getBuffers
().
size
(),
false
);
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
valueName
=
"values"
+
intToString
(
i
+
1
);
string
valueName
=
"values"
+
cl
.
intToString
(
i
+
1
);
if
(
n2EnergyStr
.
find
(
valueName
+
"1"
)
!=
n2EnergyStr
.
npos
||
n2EnergyStr
.
find
(
valueName
+
"2"
)
!=
n2EnergyStr
.
npos
)
{
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict global_"
<<
valueName
<<
", __local "
<<
buffer
.
getType
()
<<
"* restrict local_"
<<
valueName
;
loadLocal1
<<
"local_"
<<
valueName
<<
"[localAtomIndex] = "
<<
valueName
<<
"1;
\n
"
;
...
...
@@ -2549,7 +2596,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if
(
useLong
)
{
extraArgs
<<
", __global long* restrict derivBuffers"
;
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
string
index
=
intToString
(
i
+
1
);
string
index
=
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __local float* restrict local_deriv"
<<
index
;
clearLocal
<<
"local_deriv"
<<
index
<<
"[localAtomIndex] = 0.0f;
\n
"
;
declare1
<<
"float deriv"
<<
index
<<
"_1 = 0.0f;
\n
"
;
...
...
@@ -2564,7 +2611,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
else
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
string
index
=
intToString
(
i
+
1
);
string
index
=
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* restrict derivBuffers"
<<
index
<<
", __local "
<<
buffer
.
getType
()
<<
"* restrict local_deriv"
<<
index
;
clearLocal
<<
"local_deriv"
<<
index
<<
"[localAtomIndex] = 0.0f;
\n
"
;
declare1
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
"_1 = 0.0f;
\n
"
;
...
...
@@ -2598,11 +2645,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if
(
anyExclusions
)
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
if
(
cl
.
getSIMDWidth
()
==
32
)
defines
[
"WARPS_PER_GROUP"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
/
OpenCLContext
::
TileSize
);
defines
[
"CUTOFF_SQUARED"
]
=
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
());
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNumAtomBlocks
());
defines
[
"WARPS_PER_GROUP"
]
=
cl
.
intToString
(
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
/
OpenCLContext
::
TileSize
);
defines
[
"CUTOFF_SQUARED"
]
=
cl
.
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
cl
.
intToString
(
cl
.
getNumAtomBlocks
());
string
file
;
if
(
deviceIsCpu
)
file
=
OpenCLKernelSources
::
customGBEnergyN2_cpu
;
...
...
@@ -2621,17 +2668,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
extraArgs
<<
", __global const float* globals"
;
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
string
paramName
=
"params"
+
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict "
<<
paramName
;
}
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
valueName
=
"values"
+
intToString
(
i
+
1
);
string
valueName
=
"values"
+
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict "
<<
valueName
;
}
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
string
index
=
intToString
(
i
+
1
);
string
index
=
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* restrict derivBuffers"
<<
index
;
compute
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
" = derivBuffers"
<<
index
<<
"[index];
\n
"
;
}
...
...
@@ -2639,11 +2686,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
extraArgs
<<
", __global const long* restrict derivBuffersIn"
;
for
(
int
i
=
0
;
i
<
energyDerivs
->
getNumParameters
();
++
i
)
reduce
<<
"derivBuffers"
<<
energyDerivs
->
getParameterSuffix
(
i
,
"[index]"
)
<<
" = (1.0f/0xFFFFFFFF)*derivBuffersIn[index+PADDED_NUM_ATOMS*"
<<
intToString
(
i
)
<<
"];
\n
"
;
" = (1.0f/0xFFFFFFFF)*derivBuffersIn[index+PADDED_NUM_ATOMS*"
<<
cl
.
intToString
(
i
)
<<
"];
\n
"
;
}
else
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
reduce
<<
"REDUCE_VALUE(derivBuffers"
<<
intToString
(
i
+
1
)
<<
", "
<<
energyDerivs
->
getBuffers
()[
i
].
getType
()
<<
")
\n
"
;
reduce
<<
"REDUCE_VALUE(derivBuffers"
<<
cl
.
intToString
(
i
+
1
)
<<
", "
<<
energyDerivs
->
getBuffers
()[
i
].
getType
()
<<
")
\n
"
;
}
// Compute the various expressions.
...
...
@@ -2655,7 +2702,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for
(
int
i
=
0
;
i
<
force
.
getNumPerParticleParameters
();
i
++
)
variables
[
force
.
getPerParticleParameterName
(
i
)]
=
"params"
+
params
->
getParameterSuffix
(
i
,
"[index]"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
variables
[
force
.
getGlobalParameterName
(
i
)]
=
"globals["
+
intToString
(
i
)
+
"]"
;
variables
[
force
.
getGlobalParameterName
(
i
)]
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
variables
[
computedValueNames
[
i
]]
=
"values"
+
computedValues
->
getParameterSuffix
(
i
,
"[index]"
);
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
...
...
@@ -2666,23 +2713,23 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if
(
type
!=
CustomGBForce
::
SingleParticle
)
continue
;
Lepton
::
ParsedExpression
parsed
=
Lepton
::
Parser
::
parse
(
expression
,
functions
).
optimize
();
expressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ energy += "
]
=
parsed
;
expressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ energy += "
]
=
parsed
;
for
(
int
j
=
0
;
j
<
force
.
getNumComputedValues
();
j
++
)
expressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ deriv"
+
energyDerivs
->
getParameterSuffix
(
j
)
+
" += "
]
=
energyDerivExpressions
[
i
][
j
];
expressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ deriv"
+
energyDerivs
->
getParameterSuffix
(
j
)
+
" += "
]
=
energyDerivExpressions
[
i
][
j
];
Lepton
::
ParsedExpression
gradx
=
parsed
.
differentiate
(
"x"
).
optimize
();
Lepton
::
ParsedExpression
grady
=
parsed
.
differentiate
(
"y"
).
optimize
();
Lepton
::
ParsedExpression
gradz
=
parsed
.
differentiate
(
"z"
).
optimize
();
if
(
!
isZeroExpression
(
gradx
))
expressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ force.x -= "
]
=
gradx
;
expressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ force.x -= "
]
=
gradx
;
if
(
!
isZeroExpression
(
grady
))
expressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ force.y -= "
]
=
grady
;
expressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ force.y -= "
]
=
grady
;
if
(
!
isZeroExpression
(
gradz
))
expressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ force.z -= "
]
=
gradz
;
expressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ force.z -= "
]
=
gradz
;
}
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
for
(
int
j
=
0
;
j
<
i
;
j
++
)
expressions
[
"float dV"
+
intToString
(
i
)
+
"dV"
+
intToString
(
j
)
+
" = "
]
=
valueDerivExpressions
[
i
][
j
];
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
expressions
[
"float dV"
+
cl
.
intToString
(
i
)
+
"dV"
+
cl
.
intToString
(
j
)
+
" = "
]
=
valueDerivExpressions
[
i
][
j
];
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
// Record values.
...
...
@@ -2695,7 +2742,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
compute
<<
"deriv"
<<
(
i
+
1
)
<<
" *= totalDeriv"
<<
i
<<
";
\n
"
;
}
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
string
index
=
intToString
(
i
+
1
);
string
index
=
cl
.
intToString
(
i
+
1
);
compute
<<
"derivBuffers"
<<
index
<<
"[index] = deriv"
<<
index
<<
";
\n
"
;
}
map
<
string
,
string
>
replacements
;
...
...
@@ -2703,8 +2750,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
replacements
[
"REDUCE_DERIVATIVES"
]
=
reduce
.
str
();
replacements
[
"COMPUTE_ENERGY"
]
=
compute
.
str
();
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
replaceStrings
(
OpenCLKernelSources
::
customGBEnergyPerParticle
,
replacements
),
defines
);
perParticleEnergyKernel
=
cl
::
Kernel
(
program
,
"computePerParticleEnergy"
);
}
...
...
@@ -2716,17 +2763,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
extraArgs
<<
", __global const float* globals"
;
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
string
paramName
=
"params"
+
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict "
<<
paramName
;
}
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
valueName
=
"values"
+
intToString
(
i
+
1
);
string
valueName
=
"values"
+
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict "
<<
valueName
;
}
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
string
index
=
intToString
(
i
+
1
);
string
index
=
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* restrict derivBuffers"
<<
index
;
compute
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
" = derivBuffers"
<<
index
<<
"[index];
\n
"
;
}
...
...
@@ -2737,18 +2784,18 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for
(
int
i
=
0
;
i
<
force
.
getNumPerParticleParameters
();
i
++
)
variables
[
force
.
getPerParticleParameterName
(
i
)]
=
"params"
+
params
->
getParameterSuffix
(
i
,
"[index]"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
variables
[
force
.
getGlobalParameterName
(
i
)]
=
"globals["
+
intToString
(
i
)
+
"]"
;
variables
[
force
.
getGlobalParameterName
(
i
)]
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
variables
[
computedValueNames
[
i
]]
=
"values"
+
computedValues
->
getParameterSuffix
(
i
,
"[index]"
);
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
string
is
=
intToString
(
i
);
string
is
=
cl
.
intToString
(
i
);
compute
<<
"float4 dV"
<<
is
<<
"dR = (float4) 0;
\n
"
;
for
(
int
j
=
1
;
j
<
i
;
j
++
)
{
if
(
!
isZeroExpression
(
valueDerivExpressions
[
i
][
j
]))
{
map
<
string
,
Lepton
::
ParsedExpression
>
derivExpressions
;
string
js
=
intToString
(
j
);
string
js
=
cl
.
intToString
(
j
);
derivExpressions
[
"float dV"
+
is
+
"dV"
+
js
+
" = "
]
=
valueDerivExpressions
[
i
][
j
];
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
derivExpressions
,
variables
,
functionDefinitions
,
"temp_"
+
is
+
"_"
+
js
,
prefix
+
"functionParams"
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
derivExpressions
,
variables
,
functionDefinitions
,
"temp_"
+
is
+
"_"
+
js
,
prefix
+
"functionParams"
);
compute
<<
"dV"
<<
is
<<
"dR += dV"
<<
is
<<
"dV"
<<
js
<<
"*dV"
<<
js
<<
"dR;
\n
"
;
}
}
...
...
@@ -2759,17 +2806,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
gradientExpressions
[
"dV"
+
is
+
"dR.y += "
]
=
valueGradientExpressions
[
i
][
1
];
if
(
!
isZeroExpression
(
valueGradientExpressions
[
i
][
2
]))
gradientExpressions
[
"dV"
+
is
+
"dR.z += "
]
=
valueGradientExpressions
[
i
][
2
];
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
gradientExpressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
gradientExpressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
}
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
string
is
=
intToString
(
i
);
string
is
=
cl
.
intToString
(
i
);
compute
<<
"force -= deriv"
<<
energyDerivs
->
getParameterSuffix
(
i
)
<<
"*dV"
<<
is
<<
"dR;
\n
"
;
}
map
<
string
,
string
>
replacements
;
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
replacements
[
"COMPUTE_FORCES"
]
=
compute
.
str
();
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
replaceStrings
(
OpenCLKernelSources
::
customGBGradientChainRule
,
replacements
),
defines
);
gradientChainRuleKernel
=
cl
::
Kernel
(
program
,
"computeGradientChainRuleTerms"
);
}
...
...
@@ -2779,7 +2826,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
globalVariables
;
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
"globals["
+
intToString
(
i
)
+
"]"
;
string
value
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
globalVariables
.
push_back
(
makeVariable
(
name
,
prefix
+
value
));
}
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
variables
=
globalVariables
;
...
...
@@ -2800,7 +2847,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
Lepton
::
ParsedExpression
dVdR
=
Lepton
::
Parser
::
parse
(
computedValueExpressions
[
0
],
functions
).
differentiate
(
"r"
).
optimize
();
derivExpressions
[
"float dV0dR1 = "
]
=
dVdR
;
derivExpressions
[
"float dV0dR2 = "
]
=
dVdR
.
renameVariables
(
rename
);
chainSource
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
derivExpressions
,
variables
,
functionDefinitions
,
prefix
+
"temp0_"
,
prefix
+
"functionParams"
);
chainSource
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
derivExpressions
,
variables
,
functionDefinitions
,
prefix
+
"temp0_"
,
prefix
+
"functionParams"
);
if
(
needChainForValue
[
0
])
{
if
(
useExclusionsForValue
)
chainSource
<<
"if (!isExcluded) {
\n
"
;
...
...
@@ -2823,20 +2870,20 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
vector
<
OpenCLNonbondedUtilities
::
ParameterInfo
>
arguments
;
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
prefix
+
"params"
+
intToString
(
i
+
1
);
string
paramName
=
prefix
+
"params"
+
cl
.
intToString
(
i
+
1
);
if
(
chainStr
.
find
(
paramName
+
"1"
)
!=
chainStr
.
npos
||
chainStr
.
find
(
paramName
+
"2"
)
!=
chainStr
.
npos
)
parameters
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
paramName
=
prefix
+
"values"
+
intToString
(
i
+
1
);
string
paramName
=
prefix
+
"values"
+
cl
.
intToString
(
i
+
1
);
if
(
chainStr
.
find
(
paramName
+
"1"
)
!=
chainStr
.
npos
||
chainStr
.
find
(
paramName
+
"2"
)
!=
chainStr
.
npos
)
parameters
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
if
(
needChainForValue
[
i
])
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
string
paramName
=
prefix
+
"dEdV"
+
intToString
(
i
+
1
);
string
paramName
=
prefix
+
"dEdV"
+
cl
.
intToString
(
i
+
1
);
parameters
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
}
...
...
@@ -2852,11 +2899,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
}
cl
.
addForce
(
new
OpenCLCustomGBForceInfo
(
cl
.
getNonbondedUtilities
().
getNumForceBuffers
(),
force
));
if
(
useLong
)
cl
.
addAutoclearBuffer
(
longEnergyDerivs
->
getDeviceBuffer
(),
2
*
longEnergyDerivs
->
getSize
()
);
cl
.
addAutoclearBuffer
(
*
longEnergyDerivs
);
else
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
cl
.
addAutoclearBuffer
(
buffer
.
getMemory
(),
buffer
.
getSize
()
*
energyDerivs
->
getNumObjects
()
/
sizeof
(
cl_float
)
);
cl
.
addAutoclearBuffer
(
buffer
.
getMemory
(),
buffer
.
getSize
()
*
energyDerivs
->
getNumObjects
());
}
}
}
...
...
@@ -2870,12 +2917,12 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
bool
useLong
=
(
cl
.
getSupports64BitGlobalAtomics
()
&&
!
deviceIsCpu
);
if
(
useLong
)
{
longValueBuffers
=
OpenCLArray
::
create
<
cl_long
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"customGBLongValueBuffers"
);
cl
.
addAutoclearBuffer
(
longValueBuffers
->
getDeviceBuffer
(),
2
*
longValueBuffers
->
getSize
()
);
cl
.
clearBuffer
(
longValueBuffers
->
getDeviceBuffer
(),
2
*
longValueBuffers
->
getSize
()
);
cl
.
addAutoclearBuffer
(
*
longValueBuffers
);
cl
.
clearBuffer
(
*
longValueBuffers
);
}
else
{
valueBuffers
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
()
*
nb
.
getNumForceBuffers
(),
"customGBValueBuffers"
);
cl
.
addAutoclearBuffer
(
valueBuffers
->
getDeviceBuffer
(),
valueBuffers
->
getSize
()
);
cl
.
addAutoclearBuffer
(
*
valueBuffers
);
cl
.
clearBuffer
(
*
valueBuffers
);
}
int
index
=
0
;
...
...
@@ -3151,9 +3198,9 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
Lepton
::
ParsedExpression
forceExpressionZ
=
energyExpression
.
differentiate
(
"z"
).
optimize
();
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
expressions
[
"energy += "
]
=
energyExpression
;
expressions
[
"
float
dEdX = "
]
=
forceExpressionX
;
expressions
[
"
float
dEdY = "
]
=
forceExpressionY
;
expressions
[
"
float
dEdZ = "
]
=
forceExpressionZ
;
expressions
[
"
real
dEdX = "
]
=
forceExpressionX
;
expressions
[
"
real
dEdY = "
]
=
forceExpressionY
;
expressions
[
"
real
dEdZ = "
]
=
forceExpressionZ
;
// Create the kernels.
...
...
@@ -3171,7 +3218,7 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
string
argName
=
cl
.
getBondedUtilities
().
addArgument
(
globals
->
getDeviceBuffer
(),
"float"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
argName
+
"["
+
intToString
(
i
)
+
"]"
;
string
value
=
argName
+
"["
+
cl
.
intToString
(
i
)
+
"]"
;
variables
[
name
]
=
value
;
}
}
...
...
@@ -3182,7 +3229,7 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
compute
<<
buffer
.
getType
()
<<
" particleParams"
<<
(
i
+
1
)
<<
" = "
<<
argName
<<
"[index];
\n
"
;
}
vector
<
pair
<
string
,
string
>
>
functions
;
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
cl
.
replaceStrings
(
OpenCLKernelSources
::
customExternalForce
,
replacements
),
force
.
getForceGroup
());
...
...
@@ -3455,11 +3502,11 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
vector
<
double
>
values
;
double
min
,
max
;
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
string
arrayName
=
"table"
+
intToString
(
i
);
string
arrayName
=
"table"
+
cl
.
intToString
(
i
);
functionDefinitions
.
push_back
(
make_pair
(
name
,
arrayName
));
functions
[
name
]
=
&
fp
;
tabulatedFunctionParamsVec
[
i
]
=
mm_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
vector
<
mm_float4
>
f
=
OpenCL
ExpressionUtilities
::
computeFunctionCoefficients
(
values
,
min
,
max
);
vector
<
mm_float4
>
f
=
cl
.
get
ExpressionUtilities
().
computeFunctionCoefficients
(
values
,
min
,
max
);
tabulatedFunctions
.
push_back
(
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
tableArgs
<<
", __global const float4* restrict "
<<
arrayName
;
...
...
@@ -3491,7 +3538,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
}
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
variables
[
name
]
=
"globals["
+
intToString
(
i
)
+
"]"
;
variables
[
name
]
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
...
...
@@ -3512,12 +3559,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
const
vector
<
int
>&
atoms
=
iter
->
second
;
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
if
(
computedDeltas
.
count
(
deltaName
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 delta"
+
deltaName
+
" = delta("
+
atomNamesLower
[
atoms
[
0
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 delta"
+
deltaName
+
" = delta("
+
atomNamesLower
[
atoms
[
0
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName
);
}
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
r_"
+
deltaName
+
" =
sqrt
(delta"
+
deltaName
+
".w);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
r_"
+
deltaName
+
" =
SQRT
(delta"
+
deltaName
+
".w);
\n
"
);
variables
[
iter
->
first
]
=
"r_"
+
deltaName
;
forceExpressions
[
"
float
dEdDistance"
+
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
forceExpressions
[
"
real
dEdDistance"
+
cl
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
angles
.
begin
();
iter
!=
angles
.
end
();
++
iter
,
++
index
)
{
...
...
@@ -3526,16 +3573,16 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
string
angleName
=
"angle_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 delta"
+
deltaName1
+
" = delta("
+
atomNamesLower
[
atoms
[
1
]]
+
", "
+
atomNamesLower
[
atoms
[
0
]]
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 delta"
+
deltaName1
+
" = delta("
+
atomNamesLower
[
atoms
[
1
]]
+
", "
+
atomNamesLower
[
atoms
[
0
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName1
);
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 delta"
+
deltaName2
+
" = delta("
+
atomNamesLower
[
atoms
[
1
]]
+
", "
+
atomNamesLower
[
atoms
[
2
]]
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 delta"
+
deltaName2
+
" = delta("
+
atomNamesLower
[
atoms
[
1
]]
+
", "
+
atomNamesLower
[
atoms
[
2
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName2
);
}
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
"
+
angleName
+
" = computeAngle(delta"
+
deltaName1
+
", delta"
+
deltaName2
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
"
+
angleName
+
" = computeAngle(delta"
+
deltaName1
+
", delta"
+
deltaName2
+
");
\n
"
);
variables
[
iter
->
first
]
=
angleName
;
forceExpressions
[
"
float
dEdAngle"
+
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
forceExpressions
[
"
real
dEdAngle"
+
cl
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
dihedrals
.
begin
();
iter
!=
dihedrals
.
end
();
++
iter
,
++
index
)
{
...
...
@@ -3547,23 +3594,23 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
string
dihedralName
=
"dihedral_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
3
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 delta"
+
deltaName1
+
" = delta("
+
atomNamesLower
[
atoms
[
0
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 delta"
+
deltaName1
+
" = delta("
+
atomNamesLower
[
atoms
[
0
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName1
);
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 delta"
+
deltaName2
+
" = delta("
+
atomNamesLower
[
atoms
[
2
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 delta"
+
deltaName2
+
" = delta("
+
atomNamesLower
[
atoms
[
2
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName2
);
}
if
(
computedDeltas
.
count
(
deltaName3
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 delta"
+
deltaName3
+
" = delta("
+
atomNamesLower
[
atoms
[
2
]]
+
", "
+
atomNamesLower
[
atoms
[
3
]]
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 delta"
+
deltaName3
+
" = delta("
+
atomNamesLower
[
atoms
[
2
]]
+
", "
+
atomNamesLower
[
atoms
[
3
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName3
);
}
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 "
+
crossName1
+
" = computeCross(delta"
+
deltaName1
+
", delta"
+
deltaName2
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 "
+
crossName2
+
" = computeCross(delta"
+
deltaName2
+
", delta"
+
deltaName3
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
"
+
dihedralName
+
" = computeAngle("
+
crossName1
+
", "
+
crossName2
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 "
+
crossName1
+
" = computeCross(delta"
+
deltaName1
+
", delta"
+
deltaName2
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 "
+
crossName2
+
" = computeCross(delta"
+
deltaName2
+
", delta"
+
deltaName3
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
"
+
dihedralName
+
" = computeAngle("
+
crossName1
+
", "
+
crossName2
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
dihedralName
+
" *= (delta"
+
deltaName1
+
".x*"
+
crossName2
+
".x + delta"
+
deltaName1
+
".y*"
+
crossName2
+
".y + delta"
+
deltaName1
+
".z*"
+
crossName2
+
".z < 0 ? -1 : 1);
\n
"
);
variables
[
iter
->
first
]
=
dihedralName
;
forceExpressions
[
"
float
dEdDihedral"
+
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
forceExpressions
[
"
real
dEdDihedral"
+
cl
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
// Next it needs to load parameters from global memory.
...
...
@@ -3573,19 +3620,19 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
for
(
int
i
=
0
;
i
<
(
int
)
donorParams
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
donorParams
->
getBuffers
()[
i
];
extraArgs
<<
", __global const "
+
buffer
.
getType
()
+
"* restrict donor"
+
buffer
.
getName
();
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
buffer
.
getType
()
+
" donorParams"
+
intToString
(
i
+
1
)
+
" = donor"
+
buffer
.
getName
()
+
"[index];
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
buffer
.
getType
()
+
" donorParams"
+
cl
.
intToString
(
i
+
1
)
+
" = donor"
+
buffer
.
getName
()
+
"[index];
\n
"
);
}
for
(
int
i
=
0
;
i
<
(
int
)
acceptorParams
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
acceptorParams
->
getBuffers
()[
i
];
extraArgs
<<
", __global const "
+
buffer
.
getType
()
+
"* restrict acceptor"
+
buffer
.
getName
();
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
buffer
.
getType
()
+
" acceptorParams"
+
intToString
(
i
+
1
)
+
" = acceptor"
+
buffer
.
getName
()
+
"[index];
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
buffer
.
getType
()
+
" acceptorParams"
+
cl
.
intToString
(
i
+
1
)
+
" = acceptor"
+
buffer
.
getName
()
+
"[index];
\n
"
);
}
// Now evaluate the expressions.
computeAcceptor
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
"functionParams"
);
computeAcceptor
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
"functionParams"
);
forceExpressions
[
"energy += "
]
=
energyExpression
;
computeDonor
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
"functionParams"
);
computeDonor
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
"functionParams"
);
// Finally, apply forces to atoms.
...
...
@@ -3593,7 +3640,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
distances
.
begin
();
iter
!=
distances
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
atoms
=
iter
->
second
;
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
string
value
=
"(dEdDistance"
+
intToString
(
index
)
+
"/r_"
+
deltaName
+
")*delta"
+
deltaName
+
".xyz"
;
string
value
=
"(dEdDistance"
+
cl
.
intToString
(
index
)
+
"/r_"
+
deltaName
+
")*delta"
+
deltaName
+
".xyz"
;
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
0
],
"-"
+
value
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
1
],
value
);
}
...
...
@@ -3603,11 +3650,11 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string
deltaName1
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
0
]];
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"{
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 crossProd = cross(delta"
+
deltaName2
+
", delta"
+
deltaName1
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
lengthCross = max(length(crossProd), 1e-6f);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 deltaCross0 = -cross(delta"
+
deltaName1
+
", crossProd)*dEdAngle"
+
intToString
(
index
)
+
"/(delta"
+
deltaName1
+
".w*lengthCross);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 deltaCross2 = cross(delta"
+
deltaName2
+
", crossProd)*dEdAngle"
+
intToString
(
index
)
+
"/(delta"
+
deltaName2
+
".w*lengthCross);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 deltaCross1 = -(deltaCross0+deltaCross2);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 crossProd = cross(delta"
+
deltaName2
+
", delta"
+
deltaName1
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
lengthCross = max(length(crossProd),
(real)
1e-6f);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 deltaCross0 = -cross(delta"
+
deltaName1
+
", crossProd)*dEdAngle"
+
cl
.
intToString
(
index
)
+
"/(delta"
+
deltaName1
+
".w*lengthCross);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 deltaCross2 = cross(delta"
+
deltaName2
+
", crossProd)*dEdAngle"
+
cl
.
intToString
(
index
)
+
"/(delta"
+
deltaName2
+
".w*lengthCross);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 deltaCross1 = -(deltaCross0+deltaCross2);
\n
"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
0
],
"deltaCross0.xyz"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
1
],
"deltaCross1.xyz"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
2
],
"deltaCross2.xyz"
);
...
...
@@ -3622,15 +3669,15 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string
crossName1
=
"cross_"
+
deltaName1
+
"_"
+
deltaName2
;
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"{
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
r =
sqrt
(delta"
+
deltaName2
+
".w);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 ff;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.x = (-dEdDihedral"
+
intToString
(
index
)
+
"*r)/"
+
crossName1
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
r =
SQRT
(delta"
+
deltaName2
+
".w);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 ff;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.x = (-dEdDihedral"
+
cl
.
intToString
(
index
)
+
"*r)/"
+
crossName1
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.y = (delta"
+
deltaName1
+
".x*delta"
+
deltaName2
+
".x + delta"
+
deltaName1
+
".y*delta"
+
deltaName2
+
".y + delta"
+
deltaName1
+
".z*delta"
+
deltaName2
+
".z)/delta"
+
deltaName2
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.z = (delta"
+
deltaName3
+
".x*delta"
+
deltaName2
+
".x + delta"
+
deltaName3
+
".y*delta"
+
deltaName2
+
".y + delta"
+
deltaName3
+
".z*delta"
+
deltaName2
+
".z)/delta"
+
deltaName2
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.w = (dEdDihedral"
+
intToString
(
index
)
+
"*r)/"
+
crossName2
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 internalF0 = ff.x*"
+
crossName1
+
";
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 internalF3 = ff.w*"
+
crossName2
+
";
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 s = ff.y*internalF0 - ff.z*internalF3;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.w = (dEdDihedral"
+
cl
.
intToString
(
index
)
+
"*r)/"
+
crossName2
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 internalF0 = ff.x*"
+
crossName1
+
";
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 internalF3 = ff.w*"
+
crossName2
+
";
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 s = ff.y*internalF0 - ff.z*internalF3;
\n
"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
0
],
"internalF0.xyz"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
1
],
"s.xyz-internalF0.xyz"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
2
],
"-s.xyz-internalF3.xyz"
);
...
...
@@ -3645,13 +3692,13 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
replacements
[
"COMPUTE_ACCEPTOR_FORCE"
]
=
computeAcceptor
.
str
();
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
map
<
string
,
string
>
defines
;
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_DONORS"
]
=
intToString
(
numDonors
);
defines
[
"NUM_ACCEPTORS"
]
=
intToString
(
numAcceptors
);
defines
[
"PI"
]
=
doubleToString
(
M_PI
);
defines
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_DONORS"
]
=
cl
.
intToString
(
numDonors
);
defines
[
"NUM_ACCEPTORS"
]
=
cl
.
intToString
(
numAcceptors
);
defines
[
"PI"
]
=
cl
.
doubleToString
(
M_PI
);
if
(
force
.
getNonbondedMethod
()
!=
CustomHbondForce
::
NoCutoff
)
{
defines
[
"USE_CUTOFF"
]
=
"1"
;
defines
[
"CUTOFF_SQUARED"
]
=
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
());
defines
[
"CUTOFF_SQUARED"
]
=
cl
.
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
());
}
if
(
force
.
getNonbondedMethod
()
!=
CustomHbondForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
CustomHbondForce
::
CutoffNonPeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
...
...
@@ -3729,11 +3776,11 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
acceptorKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
tabulatedFunctionParams
->
getDeviceBuffer
());
}
}
donorKernel
.
setArg
<
mm_float4
>
(
8
,
cl
.
getPeriodicBoxSize
()
);
donorKernel
.
setArg
<
mm_float4
>
(
9
,
cl
.
getInvPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
cl
,
donorKernel
,
8
);
setInvPeriodicBoxSizeArg
(
cl
,
donorKernel
,
9
);
cl
.
executeKernel
(
donorKernel
,
max
(
numDonors
,
numAcceptors
));
acceptorKernel
.
setArg
<
mm_float4
>
(
8
,
cl
.
getPeriodicBoxSize
()
);
acceptorKernel
.
setArg
<
mm_float4
>
(
9
,
cl
.
getInvPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
cl
,
acceptorKernel
,
8
);
setInvPeriodicBoxSizeArg
(
cl
,
acceptorKernel
,
9
);
cl
.
executeKernel
(
acceptorKernel
,
max
(
numDonors
,
numAcceptors
));
return
0.0
;
}
...
...
@@ -3848,7 +3895,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
functions
[
name
]
=
&
fp
;
tabulatedFunctionParamsVec
[
i
]
=
mm_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
vector
<
mm_float4
>
f
=
OpenCL
ExpressionUtilities
::
computeFunctionCoefficients
(
values
,
min
,
max
);
vector
<
mm_float4
>
f
=
cl
.
get
ExpressionUtilities
().
computeFunctionCoefficients
(
values
,
min
,
max
);
OpenCLArray
*
array
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
);
tabulatedFunctions
.
push_back
(
array
);
array
->
upload
(
f
);
...
...
@@ -3872,7 +3919,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
}
map
<
string
,
string
>
variables
;
for
(
int
i
=
0
;
i
<
particlesPerBond
;
i
++
)
{
string
index
=
intToString
(
i
+
1
);
string
index
=
cl
.
intToString
(
i
+
1
);
variables
[
"x"
+
index
]
=
"pos"
+
index
+
".x"
;
variables
[
"y"
+
index
]
=
"pos"
+
index
+
".y"
;
variables
[
"z"
+
index
]
=
"pos"
+
index
+
".z"
;
...
...
@@ -3887,7 +3934,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
string
argName
=
cl
.
getBondedUtilities
().
addArgument
(
globals
->
getDeviceBuffer
(),
"float"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
argName
+
"["
+
intToString
(
i
)
+
"]"
;
string
value
=
argName
+
"["
+
cl
.
intToString
(
i
)
+
"]"
;
variables
[
name
]
=
value
;
}
}
...
...
@@ -3903,7 +3950,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
set
<
string
>
computedDeltas
;
vector
<
string
>
atomNames
,
posNames
;
for
(
int
i
=
0
;
i
<
particlesPerBond
;
i
++
)
{
string
index
=
intToString
(
i
+
1
);
string
index
=
cl
.
intToString
(
i
+
1
);
atomNames
.
push_back
(
"P"
+
index
);
posNames
.
push_back
(
"pos"
+
index
);
}
...
...
@@ -3913,12 +3960,12 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
const
vector
<
int
>&
atoms
=
iter
->
second
;
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
if
(
computedDeltas
.
count
(
deltaName
)
==
0
)
{
compute
<<
"
float
4 delta"
<<
deltaName
<<
" = ccb_delta("
<<
posNames
[
atoms
[
0
]]
<<
", "
<<
posNames
[
atoms
[
1
]]
<<
");
\n
"
;
compute
<<
"
real
4 delta"
<<
deltaName
<<
" = ccb_delta("
<<
posNames
[
atoms
[
0
]]
<<
", "
<<
posNames
[
atoms
[
1
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName
);
}
compute
<<
"
float
r_"
<<
deltaName
<<
" = sqrt(delta"
<<
deltaName
<<
".w);
\n
"
;
compute
<<
"
real
r_"
<<
deltaName
<<
" = sqrt(delta"
<<
deltaName
<<
".w);
\n
"
;
variables
[
iter
->
first
]
=
"r_"
+
deltaName
;
forceExpressions
[
"
float
dEdDistance"
+
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
forceExpressions
[
"
real
dEdDistance"
+
cl
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
angles
.
begin
();
iter
!=
angles
.
end
();
++
iter
,
++
index
)
{
...
...
@@ -3927,16 +3974,16 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
string
angleName
=
"angle_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
compute
<<
"
float
4 delta"
<<
deltaName1
<<
" = ccb_delta("
<<
posNames
[
atoms
[
1
]]
<<
", "
<<
posNames
[
atoms
[
0
]]
<<
");
\n
"
;
compute
<<
"
real
4 delta"
<<
deltaName1
<<
" = ccb_delta("
<<
posNames
[
atoms
[
1
]]
<<
", "
<<
posNames
[
atoms
[
0
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName1
);
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
compute
<<
"
float
4 delta"
<<
deltaName2
<<
" = ccb_delta("
<<
posNames
[
atoms
[
1
]]
<<
", "
<<
posNames
[
atoms
[
2
]]
<<
");
\n
"
;
compute
<<
"
real
4 delta"
<<
deltaName2
<<
" = ccb_delta("
<<
posNames
[
atoms
[
1
]]
<<
", "
<<
posNames
[
atoms
[
2
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName2
);
}
compute
<<
"
float
"
<<
angleName
<<
" = ccb_computeAngle(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
compute
<<
"
real
"
<<
angleName
<<
" = ccb_computeAngle(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
variables
[
iter
->
first
]
=
angleName
;
forceExpressions
[
"
float
dEdAngle"
+
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
forceExpressions
[
"
real
dEdAngle"
+
cl
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
dihedrals
.
begin
();
iter
!=
dihedrals
.
end
();
++
iter
,
++
index
)
{
...
...
@@ -3948,23 +3995,23 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
string
dihedralName
=
"dihedral_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
3
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
compute
<<
"
float
4 delta"
<<
deltaName1
<<
" = ccb_delta("
<<
posNames
[
atoms
[
0
]]
<<
", "
<<
posNames
[
atoms
[
1
]]
<<
");
\n
"
;
compute
<<
"
real
4 delta"
<<
deltaName1
<<
" = ccb_delta("
<<
posNames
[
atoms
[
0
]]
<<
", "
<<
posNames
[
atoms
[
1
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName1
);
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
compute
<<
"
float
4 delta"
<<
deltaName2
<<
" = ccb_delta("
<<
posNames
[
atoms
[
2
]]
<<
", "
<<
posNames
[
atoms
[
1
]]
<<
");
\n
"
;
compute
<<
"
real
4 delta"
<<
deltaName2
<<
" = ccb_delta("
<<
posNames
[
atoms
[
2
]]
<<
", "
<<
posNames
[
atoms
[
1
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName2
);
}
if
(
computedDeltas
.
count
(
deltaName3
)
==
0
)
{
compute
<<
"
float
4 delta"
<<
deltaName3
<<
" = ccb_delta("
<<
posNames
[
atoms
[
2
]]
<<
", "
<<
posNames
[
atoms
[
3
]]
<<
");
\n
"
;
compute
<<
"
real
4 delta"
<<
deltaName3
<<
" = ccb_delta("
<<
posNames
[
atoms
[
2
]]
<<
", "
<<
posNames
[
atoms
[
3
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName3
);
}
compute
<<
"
float
4 "
<<
crossName1
<<
" = ccb_computeCross(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
compute
<<
"
float
4 "
<<
crossName2
<<
" = ccb_computeCross(delta"
<<
deltaName2
<<
", delta"
<<
deltaName3
<<
");
\n
"
;
compute
<<
"
float
"
<<
dihedralName
<<
" = ccb_computeAngle("
<<
crossName1
<<
", "
<<
crossName2
<<
");
\n
"
;
compute
<<
"
real
4 "
<<
crossName1
<<
" = ccb_computeCross(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
compute
<<
"
real
4 "
<<
crossName2
<<
" = ccb_computeCross(delta"
<<
deltaName2
<<
", delta"
<<
deltaName3
<<
");
\n
"
;
compute
<<
"
real
"
<<
dihedralName
<<
" = ccb_computeAngle("
<<
crossName1
<<
", "
<<
crossName2
<<
");
\n
"
;
compute
<<
dihedralName
<<
" *= (delta"
<<
deltaName1
<<
".x*"
<<
crossName2
<<
".x + delta"
<<
deltaName1
<<
".y*"
<<
crossName2
<<
".y + delta"
<<
deltaName1
<<
".z*"
<<
crossName2
<<
".z < 0 ? -1 : 1);
\n
"
;
variables
[
iter
->
first
]
=
dihedralName
;
forceExpressions
[
"
float
dEdDihedral"
+
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
forceExpressions
[
"
real
dEdDihedral"
+
cl
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
// Now evaluate the expressions.
...
...
@@ -3975,16 +4022,16 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
compute
<<
buffer
.
getType
()
<<
" bondParams"
<<
(
i
+
1
)
<<
" = "
<<
argName
<<
"[index];
\n
"
;
}
forceExpressions
[
"energy += "
]
=
energyExpression
;
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
functionParamsName
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
functionParamsName
);
// Finally, apply forces to atoms.
vector
<
string
>
forceNames
;
for
(
int
i
=
0
;
i
<
particlesPerBond
;
i
++
)
{
string
istr
=
intToString
(
i
+
1
);
string
istr
=
cl
.
intToString
(
i
+
1
);
string
forceName
=
"force"
+
istr
;
forceNames
.
push_back
(
forceName
);
compute
<<
"
float
4 "
<<
forceName
<<
" = (
float4) (0.0f, 0.0f, 0.0f, 0.0f)
;
\n
"
;
compute
<<
"
real
4 "
<<
forceName
<<
" = (
real4) 0
;
\n
"
;
compute
<<
"{
\n
"
;
Lepton
::
ParsedExpression
forceExpressionX
=
energyExpression
.
differentiate
(
"x"
+
istr
).
optimize
();
Lepton
::
ParsedExpression
forceExpressionY
=
energyExpression
.
differentiate
(
"y"
+
istr
).
optimize
();
...
...
@@ -3997,14 +4044,14 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
if
(
!
isZeroExpression
(
forceExpressionZ
))
expressions
[
forceName
+
".z -= "
]
=
forceExpressionZ
;
if
(
expressions
.
size
()
>
0
)
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functionDefinitions
,
"coordtemp"
,
functionParamsName
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functionDefinitions
,
"coordtemp"
,
functionParamsName
);
compute
<<
"}
\n
"
;
}
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
distances
.
begin
();
iter
!=
distances
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
atoms
=
iter
->
second
;
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
string
value
=
"(dEdDistance"
+
intToString
(
index
)
+
"/r_"
+
deltaName
+
")*delta"
+
deltaName
+
".xyz"
;
string
value
=
"(dEdDistance"
+
cl
.
intToString
(
index
)
+
"/r_"
+
deltaName
+
")*delta"
+
deltaName
+
".xyz"
;
compute
<<
forceNames
[
atoms
[
0
]]
<<
".xyz += "
<<
"-"
<<
value
<<
";
\n
"
;
compute
<<
forceNames
[
atoms
[
1
]]
<<
".xyz += "
<<
value
<<
";
\n
"
;
}
...
...
@@ -4014,11 +4061,11 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
string
deltaName1
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
0
]];
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
compute
<<
"{
\n
"
;
compute
<<
"
float
4 crossProd = cross(delta"
<<
deltaName2
<<
", delta"
<<
deltaName1
<<
");
\n
"
;
compute
<<
"
float
lengthCross = max(length(crossProd), 1e-6f);
\n
"
;
compute
<<
"
float
4 deltaCross0 = -cross(delta"
<<
deltaName1
<<
", crossProd)*dEdAngle"
<<
intToString
(
index
)
<<
"/(delta"
<<
deltaName1
<<
".w*lengthCross);
\n
"
;
compute
<<
"
float
4 deltaCross2 = cross(delta"
<<
deltaName2
<<
", crossProd)*dEdAngle"
<<
intToString
(
index
)
<<
"/(delta"
<<
deltaName2
<<
".w*lengthCross);
\n
"
;
compute
<<
"
float
4 deltaCross1 = -(deltaCross0+deltaCross2);
\n
"
;
compute
<<
"
real
4 crossProd = cross(delta"
<<
deltaName2
<<
", delta"
<<
deltaName1
<<
");
\n
"
;
compute
<<
"
real
lengthCross = max(length(crossProd),
(real)
1e-6f);
\n
"
;
compute
<<
"
real
4 deltaCross0 = -cross(delta"
<<
deltaName1
<<
", crossProd)*dEdAngle"
<<
cl
.
intToString
(
index
)
<<
"/(delta"
<<
deltaName1
<<
".w*lengthCross);
\n
"
;
compute
<<
"
real
4 deltaCross2 = cross(delta"
<<
deltaName2
<<
", crossProd)*dEdAngle"
<<
cl
.
intToString
(
index
)
<<
"/(delta"
<<
deltaName2
<<
".w*lengthCross);
\n
"
;
compute
<<
"
real
4 deltaCross1 = -(deltaCross0+deltaCross2);
\n
"
;
compute
<<
forceNames
[
atoms
[
0
]]
<<
".xyz += deltaCross0.xyz;
\n
"
;
compute
<<
forceNames
[
atoms
[
1
]]
<<
".xyz += deltaCross1.xyz;
\n
"
;
compute
<<
forceNames
[
atoms
[
2
]]
<<
".xyz += deltaCross2.xyz;
\n
"
;
...
...
@@ -4033,15 +4080,15 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
string
crossName1
=
"cross_"
+
deltaName1
+
"_"
+
deltaName2
;
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
compute
<<
"{
\n
"
;
compute
<<
"
float
r =
sqrt
(delta"
<<
deltaName2
<<
".w);
\n
"
;
compute
<<
"
float
4 ff;
\n
"
;
compute
<<
"ff.x = (-dEdDihedral"
<<
intToString
(
index
)
<<
"*r)/"
<<
crossName1
<<
".w;
\n
"
;
compute
<<
"
real
r =
SQRT
(delta"
<<
deltaName2
<<
".w);
\n
"
;
compute
<<
"
real
4 ff;
\n
"
;
compute
<<
"ff.x = (-dEdDihedral"
<<
cl
.
intToString
(
index
)
<<
"*r)/"
<<
crossName1
<<
".w;
\n
"
;
compute
<<
"ff.y = (delta"
<<
deltaName1
<<
".x*delta"
<<
deltaName2
<<
".x + delta"
<<
deltaName1
<<
".y*delta"
<<
deltaName2
<<
".y + delta"
<<
deltaName1
<<
".z*delta"
<<
deltaName2
<<
".z)/delta"
<<
deltaName2
<<
".w;
\n
"
;
compute
<<
"ff.z = (delta"
<<
deltaName3
<<
".x*delta"
<<
deltaName2
<<
".x + delta"
<<
deltaName3
<<
".y*delta"
<<
deltaName2
<<
".y + delta"
<<
deltaName3
<<
".z*delta"
<<
deltaName2
<<
".z)/delta"
<<
deltaName2
<<
".w;
\n
"
;
compute
<<
"ff.w = (dEdDihedral"
<<
intToString
(
index
)
<<
"*r)/"
<<
crossName2
<<
".w;
\n
"
;
compute
<<
"
float
4 internalF0 = ff.x*"
<<
crossName1
<<
";
\n
"
;
compute
<<
"
float
4 internalF3 = ff.w*"
<<
crossName2
<<
";
\n
"
;
compute
<<
"
float
4 s = ff.y*internalF0 - ff.z*internalF3;
\n
"
;
compute
<<
"ff.w = (dEdDihedral"
<<
cl
.
intToString
(
index
)
<<
"*r)/"
<<
crossName2
<<
".w;
\n
"
;
compute
<<
"
real
4 internalF0 = ff.x*"
<<
crossName1
<<
";
\n
"
;
compute
<<
"
real
4 internalF3 = ff.w*"
<<
crossName2
<<
";
\n
"
;
compute
<<
"
real
4 s = ff.y*internalF0 - ff.z*internalF3;
\n
"
;
compute
<<
forceNames
[
atoms
[
0
]]
<<
".xyz += internalF0.xyz;
\n
"
;
compute
<<
forceNames
[
atoms
[
1
]]
<<
".xyz += s.xyz-internalF0.xyz;
\n
"
;
compute
<<
forceNames
[
atoms
[
2
]]
<<
".xyz += -s.xyz-internalF3.xyz;
\n
"
;
...
...
@@ -4050,7 +4097,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
}
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
compute
.
str
(),
force
.
getForceGroup
());
map
<
string
,
string
>
replacements
;
replacements
[
"M_PI"
]
=
doubleToString
(
M_PI
);
replacements
[
"M_PI"
]
=
cl
.
doubleToString
(
M_PI
);
cl
.
getBondedUtilities
().
addPrefixCode
(
cl
.
replaceStrings
(
OpenCLKernelSources
::
customCompoundBond
,
replacements
));;
}
...
...
@@ -4173,8 +4220,8 @@ void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const L
cl
.
getPlatformData
().
initializeContexts
(
system
);
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
langevin
,
defines
,
""
);
kernel1
=
cl
::
Kernel
(
program
,
"integrateLangevinPart1"
);
kernel2
=
cl
::
Kernel
(
program
,
"integrateLangevinPart2"
);
...
...
@@ -4266,7 +4313,7 @@ void OpenCLIntegrateBrownianStepKernel::initialize(const System& system, const B
cl
.
getPlatformData
().
initializeContexts
(
system
);
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
brownian
,
defines
,
""
);
kernel1
=
cl
::
Kernel
(
program
,
"integrateBrownianPart1"
);
kernel2
=
cl
::
Kernel
(
program
,
"integrateBrownianPart2"
);
...
...
@@ -4437,8 +4484,8 @@ void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system,
cl
.
getPlatformData
().
initializeContexts
(
system
);
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
langevin
,
defines
,
""
);
kernel1
=
cl
::
Kernel
(
program
,
"integrateLangevinPart1"
);
kernel2
=
cl
::
Kernel
(
program
,
"integrateLangevinPart2"
);
...
...
@@ -4635,10 +4682,10 @@ string OpenCLIntegrateCustomStepKernel::createGlobalComputation(const string& va
else
{
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
if
(
variable
==
integrator
.
getGlobalVariableName
(
i
))
expressions
[
"globals["
+
intToString
(
i
)
+
"] = "
]
=
expr
;
expressions
[
"globals["
+
cl
.
intToString
(
i
)
+
"] = "
]
=
expr
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
if
(
variable
==
parameterNames
[
i
])
{
expressions
[
"params["
+
intToString
(
i
)
+
"] = "
]
=
expr
;
expressions
[
"params["
+
cl
.
intToString
(
i
)
+
"] = "
]
=
expr
;
modifiesParameters
=
true
;
}
}
...
...
@@ -4650,11 +4697,11 @@ string OpenCLIntegrateCustomStepKernel::createGlobalComputation(const string& va
variables
[
"gaussian"
]
=
"gaussian"
;
variables
[
energyName
]
=
"energy[0]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
intToString
(
i
)
+
"]"
;
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
variables
[
parameterNames
[
i
]]
=
"params["
+
intToString
(
i
)
+
"]"
;
variables
[
parameterNames
[
i
]]
=
"params["
+
cl
.
intToString
(
i
)
+
"]"
;
vector
<
pair
<
string
,
string
>
>
functions
;
return
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
return
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
}
string
OpenCLIntegrateCustomStepKernel
::
createPerDofComputation
(
const
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
string
&
forceName
,
const
string
&
energyName
)
{
...
...
@@ -4666,7 +4713,7 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
else
if
(
variable
==
"v"
)
expressions
[
"velocity"
+
suffix
+
" = "
]
=
expr
;
else
if
(
variable
==
""
)
expressions
[
"sum[3*index+"
+
intToString
(
component
)
+
"] = "
]
=
expr
;
expressions
[
"sum[3*index+"
+
cl
.
intToString
(
component
)
+
"] = "
]
=
expr
;
else
{
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
if
(
variable
==
integrator
.
getPerDofVariableName
(
i
))
...
...
@@ -4684,14 +4731,14 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
variables
[
"dt"
]
=
"stepSize"
;
variables
[
energyName
]
=
"energy[0]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
intToString
(
i
)
+
"]"
;
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
variables
[
integrator
.
getPerDofVariableName
(
i
)]
=
"perDof"
+
suffix
.
substr
(
1
)
+
perDofValues
->
getParameterSuffix
(
i
);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
variables
[
parameterNames
[
i
]]
=
"params["
+
intToString
(
i
)
+
"]"
;
variables
[
parameterNames
[
i
]]
=
"params["
+
cl
.
intToString
(
i
)
+
"]"
;
vector
<
pair
<
string
,
string
>
>
functions
;
string
tempType
=
(
cl
.
getSupportsDoublePrecision
()
?
"double"
:
"float"
);
return
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
+
intToString
(
component
)
+
"_"
,
""
,
tempType
);
return
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
+
cl
.
intToString
(
component
)
+
"_"
,
""
,
tempType
);
}
void
OpenCLIntegrateCustomStepKernel
::
execute
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
)
{
...
...
@@ -4733,8 +4780,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
merged
.
resize
(
numSteps
,
false
);
modifiesParameters
=
false
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"WORK_GROUP_SIZE"
]
=
intToString
(
OpenCLContext
::
ThreadBlockSize
);
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"WORK_GROUP_SIZE"
]
=
cl
.
intToString
(
OpenCLContext
::
ThreadBlockSize
);
// Initialize the random number generator.
...
...
@@ -4858,9 +4905,9 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
stringstream
compute
;
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
perDofValues
->
getBuffers
()[
i
];
compute
<<
buffer
.
getType
()
<<
" perDofx"
<<
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
intToString
(
i
+
1
)
<<
"[3*index];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" perDofy"
<<
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
intToString
(
i
+
1
)
<<
"[3*index+1];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" perDofz"
<<
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
intToString
(
i
+
1
)
<<
"[3*index+2];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" perDofx"
<<
cl
.
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" perDofy"
<<
cl
.
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index+1];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" perDofz"
<<
cl
.
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index+2];
\n
"
;
}
int
numGaussian
=
0
,
numUniform
=
0
;
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
...
...
@@ -4882,9 +4929,9 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
else
{
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
perDofValues
->
getBuffers
()[
i
];
compute
<<
"perDofValues"
<<
intToString
(
i
+
1
)
<<
"[3*index] = perDofx"
<<
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
intToString
(
i
+
1
)
<<
"[3*index+1] = perDofy"
<<
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index] = perDofx"
<<
cl
.
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index+1] = perDofy"
<<
cl
.
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
cl
.
intToString
(
i
+
1
)
<<
";
\n
"
;
}
}
compute
<<
"}
\n
"
;
...
...
@@ -4896,7 +4943,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
stringstream
args
;
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
perDofValues
->
getBuffers
()[
i
];
string
valueName
=
"perDofValues"
+
intToString
(
i
+
1
);
string
valueName
=
"perDofValues"
+
cl
.
intToString
(
i
+
1
);
args
<<
", __global "
<<
buffer
.
getType
()
<<
"* restrict "
<<
valueName
;
}
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
...
...
@@ -5216,7 +5263,7 @@ OpenCLApplyAndersenThermostatKernel::~OpenCLApplyAndersenThermostatKernel() {
void
OpenCLApplyAndersenThermostatKernel
::
initialize
(
const
System
&
system
,
const
AndersenThermostat
&
thermostat
)
{
randomSeed
=
thermostat
.
getRandomNumberSeed
();
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
andersenThermostat
,
defines
);
kernel
=
cl
::
Kernel
(
program
,
"applyAndersenThermostat"
);
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
randomSeed
);
...
...
@@ -5349,7 +5396,7 @@ void OpenCLRemoveCMMotionKernel::initialize(const System& system, const CMMotion
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
totalMass
+=
system
.
getParticleMass
(
i
);
map
<
string
,
string
>
defines
;
defines
[
"INVERSE_TOTAL_MASS"
]
=
doubleToString
(
1.0
/
totalMass
);
defines
[
"INVERSE_TOTAL_MASS"
]
=
cl
.
doubleToString
(
1.0
/
totalMass
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
removeCM
,
defines
);
kernel1
=
cl
::
Kernel
(
program
,
"calcCenterOfMassMomentum"
);
kernel1
.
setArg
<
cl_int
>
(
0
,
numAtoms
);
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
c8dac206
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
1
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
2
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -267,7 +267,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
forceKernel
=
createInteractionKernel
(
kernelSource
,
parameters
,
arguments
,
true
,
true
);
if
(
useCutoff
)
{
map
<
string
,
string
>
defines
;
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
if
(
forceBufferPerAtomBlock
)
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
if
(
usePeriodic
)
...
...
@@ -281,6 +281,9 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
blockBoundingBox
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
if
(
context
.
getUseDoublePrecision
())
findInteractingBlocksKernel
.
setArg
<
cl_double
>
(
0
,
cutoff
*
cutoff
);
else
findInteractingBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
blockCenter
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
blockBoundingBox
->
getDeviceBuffer
());
...
...
@@ -293,6 +296,9 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
startTileIndex
+
numTiles
);
if
(
context
.
getSIMDWidth
()
==
32
&&
!
deviceIsCpu
)
{
findInteractionsWithinBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findInteractionsWithinBlocks"
);
if
(
context
.
getUseDoublePrecision
())
findInteractionsWithinBlocksKernel
.
setArg
<
cl_double
>
(
0
,
cutoff
*
cutoff
);
else
findInteractionsWithinBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
interactingTiles
->
getDeviceBuffer
());
...
...
@@ -315,6 +321,20 @@ int OpenCLNonbondedUtilities::findExclusionIndex(int x, int y, const vector<cl_u
throw
OpenMMException
(
"Internal error: exclusion in unexpected tile"
);
}
static
void
setPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getPeriodicBoxSizeDouble
());
else
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getPeriodicBoxSize
());
}
static
void
setInvPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getInvPeriodicBoxSizeDouble
());
else
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getInvPeriodicBoxSize
());
}
void
OpenCLNonbondedUtilities
::
prepareInteractions
()
{
if
(
!
useCutoff
)
return
;
...
...
@@ -327,15 +347,15 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
// Compute the neighbor list.
findBlockBoundsKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
()
);
findBlockBoundsKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
context
,
findBlockBoundsKernel
,
1
);
setInvPeriodicBoxSizeArg
(
context
,
findBlockBoundsKernel
,
2
);
context
.
executeKernel
(
findBlockBoundsKernel
,
context
.
getNumAtoms
());
findInteractingBlocksKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
()
);
findInteractingBlocksKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
context
,
findInteractingBlocksKernel
,
1
);
setInvPeriodicBoxSizeArg
(
context
,
findInteractingBlocksKernel
,
2
);
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
deviceIsCpu
?
1
:
-
1
);
if
(
context
.
getSIMDWidth
()
==
32
&&
!
deviceIsCpu
)
{
findInteractionsWithinBlocksKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
()
);
findInteractionsWithinBlocksKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
context
,
findInteractionsWithinBlocksKernel
,
1
);
setInvPeriodicBoxSizeArg
(
context
,
findInteractionsWithinBlocksKernel
,
2
);
context
.
executeKernel
(
findInteractionsWithinBlocksKernel
,
context
.
getNumAtoms
(),
128
);
}
}
...
...
@@ -343,8 +363,8 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
if
(
cutoff
!=
-
1.0
)
{
if
(
useCutoff
)
{
forceKernel
.
setArg
<
mm_float4
>
(
10
,
context
.
getPeriodicBoxSize
()
);
forceKernel
.
setArg
<
mm_float4
>
(
11
,
context
.
getInvPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
context
,
forceKernel
,
10
);
setInvPeriodicBoxSizeArg
(
context
,
forceKernel
,
11
);
}
context
.
executeKernel
(
forceKernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
...
...
@@ -498,11 +518,11 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
if
(
isSymmetric
)
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
forceThreadBlockSize
);
defines
[
"CUTOFF_SQUARED"
]
=
OpenCLExpressionUtilities
::
doubleToString
(
cutoff
*
cutoff
);
defines
[
"NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
defines
[
"CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
cutoff
*
cutoff
);
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
if
((
localDataSize
/
4
)
%
2
==
0
)
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
string
file
;
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.h
View file @
c8dac206
...
...
@@ -30,6 +30,7 @@
#include "OpenCLContext.h"
#include "openmm/System.h"
#include "OpenCLExpressionUtilities.h"
#include <sstream>
#include <string>
#include <vector>
...
...
@@ -287,8 +288,11 @@ public:
name
(
name
),
componentType
(
componentType
),
numComponents
(
numComponents
),
size
(
size
),
memory
(
&
memory
)
{
if
(
numComponents
==
1
)
type
=
componentType
;
else
type
=
componentType
+
OpenCLExpressionUtilities
::
intToString
(
numComponents
);
else
{
std
::
stringstream
s
;
s
<<
componentType
<<
numComponents
;
type
=
s
.
str
();
}
}
const
std
::
string
&
getName
()
const
{
return
name
;
...
...
platforms/opencl/src/OpenCLParallelKernels.cpp
View file @
c8dac206
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011 Stanford University and the Authors.
*
* Portions copyright (c) 2011
-2012
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -54,14 +54,14 @@ using namespace std;
class
OpenCLParallelCalcForcesAndEnergyKernel
::
BeginComputationTask
:
public
OpenCLContext
::
WorkTask
{
public:
BeginComputationTask
(
ContextImpl
&
context
,
OpenCLContext
&
cl
,
OpenCLCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
mm_float4
*
pinnedMemory
)
:
context
(
context
),
cl
(
cl
),
kernel
(
kernel
),
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
void
*
pinnedMemory
)
:
context
(
context
),
cl
(
cl
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
pinnedMemory
(
pinnedMemory
)
{
}
void
execute
()
{
// Copy coordinates over to this device and execute the kernel.
if
(
cl
.
getContextIndex
()
>
0
)
cl
.
getQueue
().
enqueueWriteBuffer
(
cl
.
getPosq
().
getDeviceBuffer
(),
CL_FALSE
,
0
,
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
),
pinnedMemory
);
cl
.
getQueue
().
enqueueWriteBuffer
(
cl
.
getPosq
().
getDeviceBuffer
(),
CL_FALSE
,
0
,
cl
.
getPaddedNumAtoms
()
*
cl
.
getPosq
().
getElementSize
(
),
pinnedMemory
);
kernel
.
beginComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
}
private:
...
...
@@ -70,13 +70,13 @@ private:
OpenCLCalcForcesAndEnergyKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
int
groups
;
mm_float4
*
pinnedMemory
;
void
*
pinnedMemory
;
};
class
OpenCLParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
OpenCLContext
::
WorkTask
{
public:
FinishComputationTask
(
ContextImpl
&
context
,
OpenCLContext
&
cl
,
OpenCLCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
mm_float4
*
pinnedMemory
)
:
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
void
*
pinnedMemory
)
:
context
(
context
),
cl
(
cl
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
energy
(
energy
),
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
)
{
}
...
...
@@ -87,8 +87,9 @@ public:
if
(
includeForce
)
{
if
(
cl
.
getContextIndex
()
>
0
)
{
int
numAtoms
=
cl
.
getPaddedNumAtoms
();
void
*
dest
=
(
cl
.
getUseDoublePrecision
()
?
(
void
*
)
&
((
mm_double4
*
)
pinnedMemory
)[(
cl
.
getContextIndex
()
-
1
)
*
numAtoms
]
:
(
void
*
)
&
((
mm_float4
*
)
pinnedMemory
)[(
cl
.
getContextIndex
()
-
1
)
*
numAtoms
]);
cl
.
getQueue
().
enqueueReadBuffer
(
cl
.
getForce
().
getDeviceBuffer
(),
CL_TRUE
,
0
,
numAtoms
*
sizeof
(
mm_float4
),
&
pinnedMemory
[(
cl
.
getContextIndex
()
-
1
)
*
numAtoms
]
);
numAtoms
*
cl
.
getForce
().
getElementSize
(),
dest
);
}
else
cl
.
getQueue
().
finish
();
...
...
@@ -103,7 +104,7 @@ private:
int
groups
;
double
&
energy
;
long
long
&
completionTime
;
mm_float4
*
pinnedMemory
;
void
*
pinnedMemory
;
};
OpenCLParallelCalcForcesAndEnergyKernel
::
OpenCLParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
OpenCLPlatform
::
PlatformData
&
data
)
:
...
...
@@ -129,19 +130,20 @@ void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
void
OpenCLParallelCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
OpenCLContext
&
cl0
=
*
data
.
contexts
[
0
];
int
elementSize
=
(
cl0
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
if
(
contextForces
==
NULL
)
{
contextForces
=
OpenCLArray
::
create
<
mm_float4
>
(
cl0
,
&
cl0
.
getForceBuffers
().
getDeviceBuffer
(),
data
.
contexts
.
size
()
*
cl0
.
getPaddedNumAtoms
(),
"contextForces"
);
int
bufferBytes
=
(
data
.
contexts
.
size
()
-
1
)
*
cl0
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
)
;
int
bufferBytes
=
(
data
.
contexts
.
size
()
-
1
)
*
cl0
.
getPaddedNumAtoms
()
*
elementSize
;
pinnedPositionBuffer
=
new
cl
::
Buffer
(
cl0
.
getContext
(),
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedPositionMemory
=
(
mm_float4
*
)
cl0
.
getQueue
().
enqueueMapBuffer
(
*
pinnedPositionBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
pinnedPositionMemory
=
cl0
.
getQueue
().
enqueueMapBuffer
(
*
pinnedPositionBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
pinnedForceBuffer
=
new
cl
::
Buffer
(
cl0
.
getContext
(),
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedForceMemory
=
(
mm_float4
*
)
cl0
.
getQueue
().
enqueueMapBuffer
(
*
pinnedForceBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
pinnedForceMemory
=
cl0
.
getQueue
().
enqueueMapBuffer
(
*
pinnedForceBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
}
// Copy coordinates over to each device and execute the kernel.
cl0
.
getQueue
().
enqueueReadBuffer
(
cl0
.
getPosq
().
getDeviceBuffer
(),
CL_TRUE
,
0
,
cl0
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
)
,
pinnedPositionMemory
);
cl0
.
getQueue
().
enqueueReadBuffer
(
cl0
.
getPosq
().
getDeviceBuffer
(),
CL_TRUE
,
0
,
cl0
.
getPaddedNumAtoms
()
*
elementSize
,
pinnedPositionMemory
);
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
data
.
contextEnergy
[
i
]
=
0.0
;
OpenCLContext
&
cl
=
*
data
.
contexts
[
i
];
...
...
@@ -165,8 +167,9 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
OpenCLContext
&
cl
=
*
data
.
contexts
[
0
];
int
numAtoms
=
cl
.
getPaddedNumAtoms
();
cl
.
getQueue
().
enqueueWriteBuffer
(
contextForces
->
getDeviceBuffer
(),
CL_FALSE
,
numAtoms
*
sizeof
(
mm_float4
),
numAtoms
*
(
data
.
contexts
.
size
()
-
1
)
*
sizeof
(
mm_float4
),
pinnedForceMemory
);
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
cl
.
getQueue
().
enqueueWriteBuffer
(
contextForces
->
getDeviceBuffer
(),
CL_FALSE
,
numAtoms
*
elementSize
,
numAtoms
*
(
data
.
contexts
.
size
()
-
1
)
*
elementSize
,
pinnedForceMemory
);
cl
.
reduceBuffer
(
*
contextForces
,
data
.
contexts
.
size
());
// Balance work between the contexts by transferring a few nonbonded tiles from the context that
...
...
platforms/opencl/src/OpenCLParallelKernels.h
View file @
c8dac206
...
...
@@ -84,8 +84,8 @@ private:
OpenCLArray
*
contextForces
;
cl
::
Buffer
*
pinnedPositionBuffer
;
cl
::
Buffer
*
pinnedForceBuffer
;
mm_float4
*
pinnedPositionMemory
;
mm_float4
*
pinnedForceMemory
;
void
*
pinnedPositionMemory
;
void
*
pinnedForceMemory
;
};
/**
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
c8dac206
...
...
@@ -141,7 +141,7 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
device
<<
contexts
[
i
]
->
getDeviceIndex
();
}
propertyValues
[
OpenCLPlatform
::
OpenCLDeviceIndex
()]
=
device
.
str
();
propertyValues
[
OpenCLPlatform
::
OpenCLPlatformIndex
()]
=
OpenCLExpressionUtilities
::
intToString
(
platformIndex
);
propertyValues
[
OpenCLPlatform
::
OpenCLPlatformIndex
()]
=
contexts
[
0
]
->
intToString
(
platformIndex
);
propertyValues
[
OpenCLPlatform
::
OpenCLPrecision
()]
=
precisionProperty
;
contextEnergy
.
resize
(
contexts
.
size
());
}
...
...
platforms/opencl/src/OpenCLSort.h
View file @
c8dac206
...
...
@@ -162,7 +162,7 @@ public:
// Assign array elements to buckets.
unsigned
int
numBuckets
=
bucketOffset
->
getSize
();
context
.
clearBuffer
(
bucketOffset
->
getDeviceBuffer
(),
numBuckets
);
context
.
clearBuffer
(
*
bucketOffset
);
assignElementsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
assignElementsKernel
.
setArg
<
cl_int
>
(
1
,
data
.
getSize
());
assignElementsKernel
.
setArg
<
cl_int
>
(
2
,
numBuckets
);
...
...
platforms/opencl/src/kernels/angleForce.cl
View file @
c8dac206
float
4
v0
=
pos2-pos1
;
float
4
v1
=
pos2-pos3
;
float
4
cp
=
cross
(
v0,
v1
)
;
float
rp
=
cp.x*cp.x
+
cp.y*cp.y
+
cp.z*cp.z
;
rp
=
max
(
SQRT
(
rp
)
,
1.0e-06f
)
;
float
r21
=
v0.x*v0.x
+
v0.y*v0.y
+
v0.z*v0.z
;
float
r23
=
v1.x*v1.x
+
v1.y*v1.y
+
v1.z*v1.z
;
float
dot
=
v0.x*v1.x
+
v0.y*v1.y
+
v0.z*v1.z
;
float
cosine
=
clamp
(
dot*RSQRT
(
r21*r23
)
,
-1.0f,
1.0f
)
;
float
theta
=
acos
(
cosine
)
;
real
4
v0
=
pos2-pos1
;
real
4
v1
=
pos2-pos3
;
real
4
cp
=
cross
(
v0,
v1
)
;
real
rp
=
cp.x*cp.x
+
cp.y*cp.y
+
cp.z*cp.z
;
rp
=
max
(
SQRT
(
rp
)
,
(
real
)
1.0e-06f
)
;
real
r21
=
v0.x*v0.x
+
v0.y*v0.y
+
v0.z*v0.z
;
real
r23
=
v1.x*v1.x
+
v1.y*v1.y
+
v1.z*v1.z
;
real
dot
=
v0.x*v1.x
+
v0.y*v1.y
+
v0.z*v1.z
;
real
cosine
=
clamp
(
dot*RSQRT
(
r21*r23
)
,
(
real
)
-1
,
(
real
)
1
)
;
real
theta
=
acos
(
cosine
)
;
COMPUTE_FORCE
float
4
force1
=
cross
(
v0,
cp
)
*
(
dEdAngle/
(
r21*rp
))
;
float
4
force3
=
cross
(
cp,
v1
)
*
(
dEdAngle/
(
r23*rp
))
;
float
4
force2
=
-force1-force3
;
real
4
force1
=
cross
(
v0,
cp
)
*
(
dEdAngle/
(
r21*rp
))
;
real
4
force3
=
cross
(
cp,
v1
)
*
(
dEdAngle/
(
r23*rp
))
;
real
4
force2
=
-force1-force3
;
platforms/opencl/src/kernels/bondForce.cl
View file @
c8dac206
float
4
delta
=
pos2-pos1
;
float
r
=
SQRT
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
real
4
delta
=
pos2-pos1
;
real
r
=
SQRT
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
COMPUTE_FORCE
dEdR
=
(
r
>
0.0f
)
?
(
dEdR
/
r
)
:
0.0f
;
delta.xyz
*=
dEdR
;
float4
force1
=
delta
;
float4
force2
=
-delta
;
\ No newline at end of file
real4
force1
=
delta
;
real4
force2
=
-delta
;
\ No newline at end of file
platforms/opencl/src/kernels/cmapTorsionForce.cl
View file @
c8dac206
const
float
PI
=
3.14159265358979323846f
;
const
real
PI
=
3.14159265358979323846f
;
//
Compute
the
first
angle.
float
4
v0a
=
(
float
4
)
(
pos1.xyz-pos2.xyz,
0.0f
)
;
float
4
v1a
=
(
float
4
)
(
pos3.xyz-pos2.xyz,
0.0f
)
;
float
4
v2a
=
(
float
4
)
(
pos3.xyz-pos4.xyz,
0.0f
)
;
float
4
cp0a
=
cross
(
v0a,
v1a
)
;
float
4
cp1a
=
cross
(
v1a,
v2a
)
;
float
cosangle
=
dot
(
normalize
(
cp0a
)
,
normalize
(
cp1a
))
;
float
angleA
;
real
4
v0a
=
(
real
4
)
(
pos1.xyz-pos2.xyz,
0.0f
)
;
real
4
v1a
=
(
real
4
)
(
pos3.xyz-pos2.xyz,
0.0f
)
;
real
4
v2a
=
(
real
4
)
(
pos3.xyz-pos4.xyz,
0.0f
)
;
real
4
cp0a
=
cross
(
v0a,
v1a
)
;
real
4
cp1a
=
cross
(
v1a,
v2a
)
;
real
cosangle
=
dot
(
normalize
(
cp0a
)
,
normalize
(
cp1a
))
;
real
angleA
;
if
(
cosangle
>
0.99f
|
| cosangle < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
float
4 cross_prod = cross(cp0a, cp1a);
float
scale = dot(cp0a, cp0a)*dot(cp1a, cp1a);
real
4 cross_prod = cross(cp0a, cp1a);
real
scale = dot(cp0a, cp0a)*dot(cp1a, cp1a);
angleA = asin(SQRT(dot(cross_prod, cross_prod)/scale));
if (cosangle < 0.0f)
angleA = PI-angleA;
...
...
@@ -25,18 +25,18 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
// Compute the second angle.
float
4 v0b = (
float
4) (pos5.xyz-pos6.xyz, 0.0f);
float
4 v1b = (
float
4) (pos7.xyz-pos6.xyz, 0.0f);
float
4 v2b = (
float
4) (pos7.xyz-pos8.xyz, 0.0f);
float
4 cp0b = cross(v0b, v1b);
float
4 cp1b = cross(v1b, v2b);
real
4 v0b = (
real
4) (pos5.xyz-pos6.xyz, 0.0f);
real
4 v1b = (
real
4) (pos7.xyz-pos6.xyz, 0.0f);
real
4 v2b = (
real
4) (pos7.xyz-pos8.xyz, 0.0f);
real
4 cp0b = cross(v0b, v1b);
real
4 cp1b = cross(v1b, v2b);
cosangle = dot(normalize(cp0b), normalize(cp1b));
float
angleB;
real
angleB;
if (cosangle > 0.99f |
|
cosangle
<
-0.99f
)
{
//
We
're
close
to
the
singularity
in
acos
()
,
so
take
the
cross
product
and
use
asin
()
instead.
float
4
cross_prod
=
cross
(
cp0b,
cp1b
)
;
float
scale
=
dot
(
cp0b,
cp0b
)
*dot
(
cp1b,
cp1b
)
;
real
4
cross_prod
=
cross
(
cp0b,
cp1b
)
;
real
scale
=
dot
(
cp0b,
cp0b
)
*dot
(
cp1b,
cp1b
)
;
angleB
=
asin
(
SQRT
(
dot
(
cross_prod,
cross_prod
)
/scale
))
;
if
(
cosangle
<
0.0f
)
angleB
=
PI-angleB
;
...
...
@@ -50,7 +50,7 @@ angleB = fmod(angleB+2.0f*PI, 2.0f*PI);
int2
pos
=
MAP_POS[MAPS[index]]
;
int
size
=
pos.y
;
float
delta
=
2*PI/size
;
real
delta
=
2*PI/size
;
int
s
=
(
int
)
(
angleA/delta
)
;
int
t
=
(
int
)
(
angleB/delta
)
;
float4
c[4]
;
...
...
@@ -59,14 +59,14 @@ c[0] = COEFF[coeffIndex];
c[1]
=
COEFF[coeffIndex+1]
;
c[2]
=
COEFF[coeffIndex+2]
;
c[3]
=
COEFF[coeffIndex+3]
;
float
da
=
angleA/delta-s
;
float
db
=
angleB/delta-t
;
real
da
=
angleA/delta-s
;
real
db
=
angleB/delta-t
;
//
Evaluate
the
spline
to
determine
the
energy
and
gradients.
float
torsionEnergy
=
0.0f
;
float
dEdA
=
0.0f
;
float
dEdB
=
0.0f
;
real
torsionEnergy
=
0.0f
;
real
dEdA
=
0.0f
;
real
dEdB
=
0.0f
;
torsionEnergy
=
da*torsionEnergy
+
((
c[3].w*db
+
c[3].z
)
*db
+
c[3].y
)
*db
+
c[3].x
;
dEdA
=
db*dEdA
+
(
3.0f*c[3].w*da
+
2.0f*c[2].w
)
*da
+
c[1].w
;
dEdB
=
da*dEdB
+
(
3.0f*c[3].w*db
+
2.0f*c[3].z
)
*db
+
c[3].y
;
...
...
@@ -85,17 +85,17 @@ energy += torsionEnergy;
//
Apply
the
force
to
the
first
torsion.
float
normCross1
=
dot
(
cp0a,
cp0a
)
;
float
normSqrBC
=
dot
(
v1a,
v1a
)
;
float
normBC
=
SQRT
(
normSqrBC
)
;
float
normCross2
=
dot
(
cp1a,
cp1a
)
;
float
dp
=
1.0f/normSqrBC
;
float
4
ff
=
(
float
4
)
((
-dEdA*normBC
)
/normCross1,
dot
(
v0a,
v1a
)
*dp,
dot
(
v2a,
v1a
)
*dp,
(
dEdA*normBC
)
/normCross2
)
;
float
4
force1
=
ff.x*cp0a
;
float
4
force4
=
ff.w*cp1a
;
float
4
d
=
ff.y*force1
-
ff.z*force4
;
float
4
force2
=
d-force1
;
float
4
force3
=
-d-force4
;
real
normCross1
=
dot
(
cp0a,
cp0a
)
;
real
normSqrBC
=
dot
(
v1a,
v1a
)
;
real
normBC
=
SQRT
(
normSqrBC
)
;
real
normCross2
=
dot
(
cp1a,
cp1a
)
;
real
dp
=
1.0f/normSqrBC
;
real
4
ff
=
(
real
4
)
((
-dEdA*normBC
)
/normCross1,
dot
(
v0a,
v1a
)
*dp,
dot
(
v2a,
v1a
)
*dp,
(
dEdA*normBC
)
/normCross2
)
;
real
4
force1
=
ff.x*cp0a
;
real
4
force4
=
ff.w*cp1a
;
real
4
d
=
ff.y*force1
-
ff.z*force4
;
real
4
force2
=
d-force1
;
real
4
force3
=
-d-force4
;
//
Apply
the
force
to
the
second
torsion.
...
...
@@ -104,9 +104,9 @@ normSqrBC = dot(v1b, v1b);
normBC
=
SQRT
(
normSqrBC
)
;
normCross2
=
dot
(
cp1b,
cp1b
)
;
dp
=
1.0f/normSqrBC
;
ff
=
(
float
4
)
((
-dEdB*normBC
)
/normCross1,
dot
(
v0b,
v1b
)
*dp,
dot
(
v2b,
v1b
)
*dp,
(
dEdB*normBC
)
/normCross2
)
;
float
4
force5
=
ff.x*cp0b
;
float
4
force8
=
ff.w*cp1b
;
ff
=
(
real
4
)
((
-dEdB*normBC
)
/normCross1,
dot
(
v0b,
v1b
)
*dp,
dot
(
v2b,
v1b
)
*dp,
(
dEdB*normBC
)
/normCross2
)
;
real
4
force5
=
ff.x*cp0b
;
real
4
force8
=
ff.w*cp1b
;
d
=
ff.y*force5
-
ff.z*force8
;
float
4
force6
=
d-force5
;
float
4
force7
=
-d-force8
;
real
4
force6
=
d-force5
;
real
4
force7
=
-d-force8
;
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
c8dac206
#
if
USE_EWALD
bool
needCorrection
=
isExcluded
&&
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
;
if
(
!isExcluded
|
| needCorrection) {
float
tempForce = 0
.0f
;
real
tempForce = 0;
if (r2 < CUTOFF_SQUARED |
|
needCorrection
)
{
const
float
alphaR
=
EWALD_ALPHA*r
;
const
float
expAlphaRSqr
=
EXP
(
-alphaR*alphaR
)
;
const
float
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
const
real
alphaR
=
EWALD_ALPHA*r
;
const
real
expAlphaRSqr
=
EXP
(
-alphaR*alphaR
)
;
const
real
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
//
This
approximation
for
erfc
is
from
Abramowitz
and
Stegun
(
1964
)
p.
299.
They
cite
the
following
as
//
the
original
source:
C.
Hastings,
Jr.,
Approximations
for
Digital
Computers
(
1955
)
.
It
has
a
maximum
//
error
of
3e-7.
float
t
=
1.0f+
(
0.0705230784f+
(
0.0422820123f+
(
0.0092705272f+
(
0.0001520143f+
(
0.0002765672f+0.0000430638f*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
;
real
t
=
1.0f+
(
0.0705230784f+
(
0.0422820123f+
(
0.0092705272f+
(
0.0001520143f+
(
0.0002765672f+0.0000430638f*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
const
float
erfcAlphaR
=
RECIP
(
t*t
)
;
const
real
erfcAlphaR
=
RECIP
(
t*t
)
;
if
(
needCorrection
)
{
//
Subtract
off
the
part
of
this
interaction
that
was
included
in
the
reciprocal
space
contribution.
...
...
@@ -24,11 +24,11 @@ if (!isExcluded || needCorrection) {
}
else
{
#
if
HAS_LENNARD_JONES
float
sig
=
sigmaEpsilon1.x
+
sigmaEpsilon2.x
;
float
sig2
=
invR*sig
;
real
sig
=
sigmaEpsilon1.x
+
sigmaEpsilon2.x
;
real
sig2
=
invR*sig
;
sig2
*=
sig2
;
float
sig6
=
sig2*sig2*sig2
;
float
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
real
sig6
=
sig2*sig2*sig2
;
real
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
tempForce
=
epssig6*
(
12.0f*sig6
-
6.0f
)
+
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
epssig6*
(
sig6
-
1.0f
)
+
prefactor*erfcAlphaR
;
#
else
...
...
@@ -41,32 +41,37 @@ if (!isExcluded || needCorrection) {
}
#
else
{
#
ifdef
USE_DOUBLE_PRECISION
unsigned
long
includeInteraction
;
#
else
unsigned
int
includeInteraction
;
#
endif
#
ifdef
USE_CUTOFF
unsigned
int
includeInteraction
=
(
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
;
includeInteraction
=
(
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
;
#
else
unsigned
int
includeInteraction
=
(
!isExcluded
)
;
includeInteraction
=
(
!isExcluded
)
;
#
endif
float
tempForce
=
0
.0f
;
real
tempForce
=
0
;
#
if
HAS_LENNARD_JONES
float
sig
=
sigmaEpsilon1.x
+
sigmaEpsilon2.x
;
float
sig2
=
invR*sig
;
real
sig
=
sigmaEpsilon1.x
+
sigmaEpsilon2.x
;
real
sig2
=
invR*sig
;
sig2
*=
sig2
;
float
sig6
=
sig2*sig2*sig2
;
float
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
real
sig6
=
sig2*sig2*sig2
;
real
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
tempForce
=
epssig6*
(
12.0f*sig6
-
6.0f
)
;
tempEnergy
+=
select
(
0.0f
,
epssig6*
(
sig6
-
1.0f
)
,
includeInteraction
)
;
tempEnergy
+=
select
(
(
real
)
0
,
epssig6*
(
sig6
-1
)
,
includeInteraction
)
;
#
endif
#
if
HAS_COULOMB
#
ifdef
USE_CUTOFF
const
float
prefactor
=
138.935456f*posq1.w*posq2.w
;
const
real
prefactor
=
138.935456f*posq1.w*posq2.w
;
tempForce
+=
prefactor*
(
invR
-
2.0f*REACTION_FIELD_K*r2
)
;
tempEnergy
+=
select
(
0.0f
,
prefactor*
(
invR
+
REACTION_FIELD_K*r2
-
REACTION_FIELD_C
)
,
includeInteraction
)
;
tempEnergy
+=
select
(
(
real
)
0
,
prefactor*
(
invR
+
REACTION_FIELD_K*r2
-
REACTION_FIELD_C
)
,
includeInteraction
)
;
#
else
const
float
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
const
real
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
tempForce
+=
prefactor
;
tempEnergy
+=
select
(
0.0f
,
prefactor,
includeInteraction
)
;
tempEnergy
+=
select
(
(
real
)
0
,
prefactor,
includeInteraction
)
;
#
endif
#
endif
dEdR
+=
select
(
0.0f
,
tempForce*invR*invR,
includeInteraction
)
;
dEdR
+=
select
(
(
real
)
0
,
tempForce*invR*invR,
includeInteraction
)
;
}
#
endif
\ No newline at end of file
platforms/opencl/src/kernels/customCompoundBond.cl
View file @
c8dac206
/**
*
Compute
the
difference
between
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
float
4
ccb_delta
(
float
4
vec1,
float
4
vec2
)
{
float
4
result
=
(
float
4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
.0f
)
;
real
4
ccb_delta
(
real
4
vec1,
real
4
vec2
)
{
real
4
result
=
(
real
4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
}
...
...
@@ -10,17 +10,17 @@ float4 ccb_delta(float4 vec1, float4 vec2) {
/**
*
Compute
the
angle
between
two
vectors.
The
w
component
of
each
vector
should
contain
the
squared
magnitude.
*/
float
ccb_computeAngle
(
float
4
vec1,
float
4
vec2
)
{
float
dotProduct
=
vec1.x*vec2.x
+
vec1.y*vec2.y
+
vec1.z*vec2.z
;
float
cosine
=
dotProduct*RSQRT
(
vec1.w*vec2.w
)
;
float
angle
;
real
ccb_computeAngle
(
real
4
vec1,
real
4
vec2
)
{
real
dotProduct
=
vec1.x*vec2.x
+
vec1.y*vec2.y
+
vec1.z*vec2.z
;
real
cosine
=
dotProduct*RSQRT
(
vec1.w*vec2.w
)
;
real
angle
;
if
(
cosine
>
0.99f
||
cosine
<
-0.99f
)
{
//
We
're
close
to
the
singularity
in
acos
()
,
so
take
the
cross
product
and
use
asin
()
instead.
float
4
crossProduct
=
cross
(
vec1,
vec2
)
;
float
scale
=
vec1.w*vec2.w
;
real
4
crossProduct
=
cross
(
vec1,
vec2
)
;
real
scale
=
vec1.w*vec2.w
;
angle
=
asin
(
SQRT
(
dot
(
crossProduct,
crossProduct
)
/scale
))
;
if
(
cosine
<
0
.0f
)
if
(
cosine
<
0
)
angle
=
M_PI-angle
;
}
else
...
...
@@ -31,8 +31,8 @@ float ccb_computeAngle(float4 vec1, float4 vec2) {
/**
*
Compute
the
cross
product
of
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
float
4
ccb_computeCross
(
float
4
vec1,
float
4
vec2
)
{
float
4
result
=
cross
(
vec1,
vec2
)
;
real
4
ccb_computeCross
(
real
4
vec1,
real
4
vec2
)
{
real
4
result
=
cross
(
vec1,
vec2
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
}
platforms/opencl/src/kernels/customExternalForce.cl
View file @
c8dac206
COMPUTE_FORCE
float
4
force1
=
(
float
4
)
(
-dEdX,
-dEdY,
-dEdZ,
0
.0f
)
;
real
4
force1
=
(
real
4
)
(
-dEdX,
-dEdY,
-dEdZ,
0
)
;
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment