Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c8dac206
Commit
c8dac206
authored
Oct 17, 2012
by
Peter Eastman
Browse files
Continuing to implement double precision in OpenCL
parent
34938e2c
Changes
36
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
635 additions
and
529 deletions
+635
-529
platforms/opencl/src/OpenCLBondedUtilities.cpp
platforms/opencl/src/OpenCLBondedUtilities.cpp
+8
-8
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+38
-15
platforms/opencl/src/OpenCLContext.h
platforms/opencl/src/OpenCLContext.h
+25
-4
platforms/opencl/src/OpenCLExpressionUtilities.cpp
platforms/opencl/src/OpenCLExpressionUtilities.cpp
+7
-20
platforms/opencl/src/OpenCLExpressionUtilities.h
platforms/opencl/src/OpenCLExpressionUtilities.h
+12
-17
platforms/opencl/src/OpenCLFFT3D.cpp
platforms/opencl/src/OpenCLFFT3D.cpp
+46
-45
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+5
-5
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+334
-287
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+37
-17
platforms/opencl/src/OpenCLNonbondedUtilities.h
platforms/opencl/src/OpenCLNonbondedUtilities.h
+6
-2
platforms/opencl/src/OpenCLParallelKernels.cpp
platforms/opencl/src/OpenCLParallelKernels.cpp
+16
-13
platforms/opencl/src/OpenCLParallelKernels.h
platforms/opencl/src/OpenCLParallelKernels.h
+2
-2
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+1
-1
platforms/opencl/src/OpenCLSort.h
platforms/opencl/src/OpenCLSort.h
+1
-1
platforms/opencl/src/kernels/angleForce.cl
platforms/opencl/src/kernels/angleForce.cl
+13
-13
platforms/opencl/src/kernels/bondForce.cl
platforms/opencl/src/kernels/bondForce.cl
+4
-4
platforms/opencl/src/kernels/cmapTorsionForce.cl
platforms/opencl/src/kernels/cmapTorsionForce.cl
+40
-40
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+28
-23
platforms/opencl/src/kernels/customCompoundBond.cl
platforms/opencl/src/kernels/customCompoundBond.cl
+11
-11
platforms/opencl/src/kernels/customExternalForce.cl
platforms/opencl/src/kernels/customExternalForce.cl
+1
-1
No files found.
platforms/opencl/src/OpenCLBondedUtilities.cpp
View file @
c8dac206
...
...
@@ -58,7 +58,7 @@ void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, co
std
::
string
OpenCLBondedUtilities
::
addArgument
(
cl
::
Memory
&
data
,
const
string
&
type
)
{
arguments
.
push_back
(
&
data
);
argTypes
.
push_back
(
type
);
return
"customArg"
+
OpenCLExpressionUtilities
::
intToString
(
arguments
.
size
());
return
"customArg"
+
context
.
intToString
(
arguments
.
size
());
}
void
OpenCLBondedUtilities
::
addPrefixCode
(
const
string
&
source
)
{
...
...
@@ -164,17 +164,17 @@ void OpenCLBondedUtilities::initialize(const System& system) {
stringstream
s
;
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
s
<<
prefixCode
[
i
];
s
<<
"__kernel void computeBondedForces(__global
float
4* restrict forceBuffers, __global
float
* restrict energyBuffer, __global const
float
4* restrict posq, int groups"
;
s
<<
"__kernel void computeBondedForces(__global
real
4* restrict forceBuffers, __global
real
* restrict energyBuffer, __global const
real
4* restrict posq, int groups"
;
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
int
force
=
set
[
i
];
string
indexType
=
"uint"
+
(
indexWidth
[
force
]
==
1
?
""
:
OpenCLExpressionUtilities
::
intToString
(
indexWidth
[
force
]));
string
indexType
=
"uint"
+
(
indexWidth
[
force
]
==
1
?
""
:
context
.
intToString
(
indexWidth
[
force
]));
s
<<
", __global const "
<<
indexType
<<
"* restrict atomIndices"
<<
i
;
s
<<
", __global const "
<<
indexType
<<
"* restrict bufferIndices"
<<
i
;
}
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
s
<<
", __global "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
s
<<
") {
\n
"
;
s
<<
"
float
energy = 0.0f;
\n
"
;
s
<<
"
real
energy = 0.0f;
\n
"
;
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
int
force
=
set
[
i
];
s
<<
createForceSource
(
i
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
...
...
@@ -182,7 +182,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
s
<<
"energyBuffer[get_global_id(0)] += energy;
\n
"
;
s
<<
"}
\n
"
;
map
<
string
,
string
>
defines
;
defines
[
"PADDED_NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
cl
::
Program
program
=
context
.
createProgram
(
s
.
str
(),
defines
);
kernels
.
push_back
(
cl
::
Kernel
(
program
,
"computeBondedForces"
));
}
...
...
@@ -206,7 +206,7 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
suffix
=
suffix4
;
else
suffix
=
suffix16
;
string
indexType
=
"uint"
+
(
width
==
1
?
""
:
OpenCLExpressionUtilities
::
intToString
(
width
));
string
indexType
=
"uint"
+
(
width
==
1
?
""
:
context
.
intToString
(
width
));
stringstream
s
;
s
<<
"if ((groups&"
<<
(
1
<<
group
)
<<
") != 0)
\n
"
;
s
<<
"for (unsigned int index = get_global_id(0); index < "
<<
numBonds
<<
"; index += get_global_size(0)) {
\n
"
;
...
...
@@ -214,13 +214,13 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
s
<<
" "
<<
indexType
<<
" buffers = bufferIndices"
<<
forceIndex
<<
"[index];
\n
"
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
s
<<
" unsigned int atom"
<<
(
i
+
1
)
<<
" = atoms"
<<
suffix
[
i
]
<<
";
\n
"
;
s
<<
"
float
4 pos"
<<
(
i
+
1
)
<<
" = posq[atom"
<<
(
i
+
1
)
<<
"];
\n
"
;
s
<<
"
real
4 pos"
<<
(
i
+
1
)
<<
" = posq[atom"
<<
(
i
+
1
)
<<
"];
\n
"
;
}
s
<<
computeForce
<<
"
\n
"
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
s
<<
" {
\n
"
;
s
<<
" unsigned int offset = atom"
<<
(
i
+
1
)
<<
"+buffers"
<<
suffix
[
i
]
<<
"*PADDED_NUM_ATOMS;
\n
"
;
s
<<
"
float
4 force = forceBuffers[offset];
\n
"
;
s
<<
"
real
4 force = forceBuffers[offset];
\n
"
;
s
<<
" force.xyz += force"
<<
(
i
+
1
)
<<
".xyz;
\n
"
;
s
<<
" forceBuffers[offset] = force;
\n
"
;
s
<<
" }
\n
"
;
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
c8dac206
...
...
@@ -68,7 +68,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
atomsWereReordered
(
false
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
integration
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
if
(
precision
==
"single"
)
{
useDoublePrecision
=
false
;
useMixedPrecision
=
false
;
...
...
@@ -145,7 +145,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
this
->
deviceIndex
=
deviceIndex
;
if
(
device
.
getInfo
<
CL_DEVICE_MAX_WORK_GROUP_SIZE
>
()
<
minThreadBlockSize
)
throw
OpenMMException
(
"The specified OpenCL device is not compatible with OpenMM"
);
compilationDefines
[
"WORK_GROUP_SIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
ThreadBlockSize
);
compilationDefines
[
"WORK_GROUP_SIZE"
]
=
intToString
(
ThreadBlockSize
);
if
(
platformVendor
.
size
()
>=
5
&&
platformVendor
.
substr
(
0
,
5
)
==
"Intel"
)
defaultOptimizationOptions
=
""
;
else
...
...
@@ -269,7 +269,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
clearFourBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearFourBuffers"
);
clearFiveBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearFiveBuffers"
);
clearSixBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearSixBuffers"
);
reduce
Float
4Kernel
=
cl
::
Kernel
(
utilities
,
"reduce
Float
4Buffer"
);
reduce
Real
4Kernel
=
cl
::
Kernel
(
utilities
,
"reduce
Real
4Buffer"
);
reduceForcesKernel
=
cl
::
Kernel
(
utilities
,
"reduceForces"
);
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
...
...
@@ -316,9 +316,10 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
thread
=
new
WorkThread
();
// Create
the integration
utilities object.
// Create utilities object
s
.
integration
=
new
OpenCLIntegrationUtilities
(
*
this
,
system
);
expression
=
new
OpenCLExpressionUtilities
(
*
this
);
}
OpenCLContext
::~
OpenCLContext
()
{
...
...
@@ -346,6 +347,8 @@ OpenCLContext::~OpenCLContext() {
delete
atomIndexDevice
;
if
(
integration
!=
NULL
)
delete
integration
;
if
(
expression
!=
NULL
)
delete
expression
;
if
(
bonded
!=
NULL
)
delete
bonded
;
if
(
nonbonded
!=
NULL
)
...
...
@@ -376,10 +379,10 @@ void OpenCLContext::initialize() {
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forceBuffers
->
getDeviceBuffer
());
reduceForcesKernel
.
setArg
<
cl_int
>
(
2
,
paddedNumAtoms
);
reduceForcesKernel
.
setArg
<
cl_int
>
(
3
,
numForceBuffers
);
addAutoclearBuffer
(
longForceBuffer
->
getDeviceBuffer
(),
longForceBuffer
->
getSize
()
*
2
);
addAutoclearBuffer
(
*
longForceBuffer
);
}
addAutoclearBuffer
(
forceBuffers
->
getDeviceBuffer
(),
forceBuffers
->
getSize
()
*
4
);
addAutoclearBuffer
(
energyBuffer
->
getDeviceBuffer
(),
energyBuffer
->
getSize
()
);
addAutoclearBuffer
(
*
forceBuffers
);
addAutoclearBuffer
(
*
energyBuffer
);
int
bufferBytes
=
max
(
posq
->
getSize
()
*
posq
->
getElementSize
(),
energyBuffer
->
getSize
()
*
energyBuffer
->
getElementSize
());
pinnedBuffer
=
new
cl
::
Buffer
(
context
,
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedMemory
=
queue
.
enqueueMapBuffer
(
*
pinnedBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
...
...
@@ -479,6 +482,21 @@ cl::Program OpenCLContext::createProgram(const string source, const map<string,
return
program
;
}
string
OpenCLContext
::
doubleToString
(
double
value
)
{
stringstream
s
;
s
.
precision
(
useDoublePrecision
?
16
:
8
);
s
<<
scientific
<<
value
;
if
(
!
useDoublePrecision
)
s
<<
"f"
;
return
s
.
str
();
}
string
OpenCLContext
::
intToString
(
int
value
)
{
stringstream
s
;
s
<<
value
;
return
s
.
str
();
}
void
OpenCLContext
::
executeKernel
(
cl
::
Kernel
&
kernel
,
int
workUnits
,
int
blockSize
)
{
if
(
blockSize
==
-
1
)
blockSize
=
ThreadBlockSize
;
...
...
@@ -494,18 +512,23 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
}
void
OpenCLContext
::
clearBuffer
(
OpenCLArray
&
array
)
{
clearBuffer
(
array
.
getDeviceBuffer
(),
array
.
getSize
()
*
array
.
getElementSize
()
/
sizeof
(
cl_float
)
);
clearBuffer
(
array
.
getDeviceBuffer
(),
array
.
getSize
()
*
array
.
getElementSize
());
}
void
OpenCLContext
::
clearBuffer
(
cl
::
Memory
&
memory
,
int
size
)
{
int
words
=
size
/
4
;
clearBufferKernel
.
setArg
<
cl
::
Memory
>
(
0
,
memory
);
clearBufferKernel
.
setArg
<
cl_int
>
(
1
,
size
);
executeKernel
(
clearBufferKernel
,
size
,
128
);
clearBufferKernel
.
setArg
<
cl_int
>
(
1
,
words
);
executeKernel
(
clearBufferKernel
,
words
,
128
);
}
void
OpenCLContext
::
addAutoclearBuffer
(
OpenCLArray
&
array
)
{
addAutoclearBuffer
(
array
.
getDeviceBuffer
(),
array
.
getSize
()
*
array
.
getElementSize
());
}
void
OpenCLContext
::
addAutoclearBuffer
(
cl
::
Memory
&
memory
,
int
size
)
{
autoclearBuffers
.
push_back
(
&
memory
);
autoclearBufferSizes
.
push_back
(
size
);
autoclearBufferSizes
.
push_back
(
size
/
4
);
}
void
OpenCLContext
::
clearAutoclearBuffers
()
{
...
...
@@ -581,10 +604,10 @@ void OpenCLContext::reduceForces() {
void
OpenCLContext
::
reduceBuffer
(
OpenCLArray
&
array
,
int
numBuffers
)
{
int
bufferSize
=
array
.
getSize
()
/
numBuffers
;
reduce
Float
4Kernel
.
setArg
<
cl
::
Buffer
>
(
0
,
array
.
getDeviceBuffer
());
reduce
Float
4Kernel
.
setArg
<
cl_int
>
(
1
,
bufferSize
);
reduce
Float
4Kernel
.
setArg
<
cl_int
>
(
2
,
numBuffers
);
executeKernel
(
reduce
Float
4Kernel
,
bufferSize
,
128
);
reduce
Real
4Kernel
.
setArg
<
cl
::
Buffer
>
(
0
,
array
.
getDeviceBuffer
());
reduce
Real
4Kernel
.
setArg
<
cl_int
>
(
1
,
bufferSize
);
reduce
Real
4Kernel
.
setArg
<
cl_int
>
(
2
,
numBuffers
);
executeKernel
(
reduce
Real
4Kernel
,
bufferSize
,
128
);
}
void
OpenCLContext
::
tagAtomsInMolecule
(
int
atom
,
int
molecule
,
vector
<
int
>&
atomMolecule
,
vector
<
vector
<
int
>
>&
atomBonds
)
{
...
...
platforms/opencl/src/OpenCLContext.h
View file @
c8dac206
...
...
@@ -45,6 +45,7 @@ namespace OpenMM {
class
OpenCLArray
;
class
OpenCLForceInfo
;
class
OpenCLIntegrationUtilities
;
class
OpenCLExpressionUtilities
;
class
OpenCLBondedUtilities
;
class
OpenCLNonbondedUtilities
;
class
System
;
...
...
@@ -314,14 +315,18 @@ public:
* Set all elements of an array to 0.
*
* @param memory the Memory to clear
* @param size the
number of float elements in
the buffer
* @param size the
size of
the buffer
in bytes
*/
void
clearBuffer
(
cl
::
Memory
&
memory
,
int
size
);
/**
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
*/
void
addAutoclearBuffer
(
OpenCLArray
&
array
);
/**
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
*
* @param memory the Memory to clear
* @param size the
number of float elements in
the buffer
* @param size the
size of
the buffer
in bytes
*/
void
addAutoclearBuffer
(
cl
::
Memory
&
memory
,
int
size
);
/**
...
...
@@ -329,7 +334,7 @@ public:
*/
void
clearAutoclearBuffers
();
/**
* Given a collection of buffers packed into an array, sum them and store
* Given a collection of
floating point
buffers packed into an array, sum them and store
* the sum in the first buffer.
*
* @param array the array containing the buffers to reduce
...
...
@@ -437,6 +442,15 @@ public:
bool
getUseMixedPrecision
()
{
return
useMixedPrecision
;
}
/**
* Convert a number to a string in a format suitable for including in a kernel.
* This takes into account whether the context uses single or double precision.
*/
std
::
string
doubleToString
(
double
value
);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
std
::
string
intToString
(
int
value
);
/**
* Get the size of the periodic box.
*/
...
...
@@ -476,6 +490,12 @@ public:
OpenCLIntegrationUtilities
&
getIntegrationUtilities
()
{
return
*
integration
;
}
/**
* Get the OpenCLExpressionUtilities for this context.
*/
OpenCLExpressionUtilities
&
getExpressionUtilities
()
{
return
*
expression
;
}
/**
* Get the OpenCLBondedUtilities for this context.
*/
...
...
@@ -580,7 +600,7 @@ private:
cl
::
Kernel
clearFourBuffersKernel
;
cl
::
Kernel
clearFiveBuffersKernel
;
cl
::
Kernel
clearSixBuffersKernel
;
cl
::
Kernel
reduce
Float
4Kernel
;
cl
::
Kernel
reduce
Real
4Kernel
;
cl
::
Kernel
reduceForcesKernel
;
std
::
vector
<
OpenCLForceInfo
*>
forces
;
std
::
vector
<
Molecule
>
molecules
;
...
...
@@ -601,6 +621,7 @@ private:
std
::
vector
<
int
>
autoclearBufferSizes
;
std
::
vector
<
ReorderListener
*>
reorderListeners
;
OpenCLIntegrationUtilities
*
integration
;
OpenCLExpressionUtilities
*
expression
;
OpenCLBondedUtilities
*
bonded
;
OpenCLNonbondedUtilities
*
nonbonded
;
WorkThread
*
thread
;
...
...
platforms/opencl/src/OpenCLExpressionUtilities.cpp
View file @
c8dac206
...
...
@@ -33,19 +33,6 @@ using namespace OpenMM;
using
namespace
Lepton
;
using
namespace
std
;
string
OpenCLExpressionUtilities
::
doubleToString
(
double
value
)
{
stringstream
s
;
s
.
precision
(
8
);
s
<<
scientific
<<
value
<<
"f"
;
return
s
.
str
();
}
string
OpenCLExpressionUtilities
::
intToString
(
int
value
)
{
stringstream
s
;
s
<<
value
;
return
s
.
str
();
}
string
OpenCLExpressionUtilities
::
createExpressions
(
const
map
<
string
,
ParsedExpression
>&
expressions
,
const
map
<
string
,
string
>&
variables
,
const
vector
<
pair
<
string
,
string
>
>&
functions
,
const
string
&
prefix
,
const
string
&
functionParams
,
const
string
&
tempType
)
{
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
variableNodes
;
...
...
@@ -75,13 +62,13 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
return
;
for
(
int
i
=
0
;
i
<
(
int
)
node
.
getChildren
().
size
();
i
++
)
processExpression
(
out
,
node
.
getChildren
()[
i
],
temps
,
functions
,
prefix
,
functionParams
,
allExpressions
,
tempType
);
string
name
=
prefix
+
intToString
(
temps
.
size
());
string
name
=
prefix
+
context
.
intToString
(
temps
.
size
());
bool
hasRecordedNode
=
false
;
out
<<
tempType
<<
" "
<<
name
<<
" = "
;
switch
(
node
.
getOperation
().
getId
())
{
case
Operation
::
CONSTANT
:
out
<<
doubleToString
(
dynamic_cast
<
const
Operation
::
Constant
*>
(
&
node
.
getOperation
())
->
getValue
());
out
<<
context
.
doubleToString
(
dynamic_cast
<
const
Operation
::
Constant
*>
(
&
node
.
getOperation
())
->
getValue
());
break
;
case
Operation
::
VARIABLE
:
throw
OpenMMException
(
"Unknown variable in expression: "
+
node
.
getOperation
().
getName
());
...
...
@@ -107,7 +94,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
string
valueName
=
name
;
string
derivName
=
name
;
if
(
valueNode
!=
NULL
&&
derivNode
!=
NULL
)
{
string
name2
=
prefix
+
intToString
(
temps
.
size
());
string
name2
=
prefix
+
context
.
intToString
(
temps
.
size
());
out
<<
tempType
<<
" "
<<
name2
<<
" = 0.0f;
\n
"
;
if
(
isDeriv
)
{
valueName
=
name2
;
...
...
@@ -236,10 +223,10 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out
<<
"RECIP("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
ADD_CONSTANT
:
out
<<
doubleToString
(
dynamic_cast
<
const
Operation
::
AddConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"+"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
out
<<
context
.
doubleToString
(
dynamic_cast
<
const
Operation
::
AddConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"+"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
break
;
case
Operation
::
MULTIPLY_CONSTANT
:
out
<<
doubleToString
(
dynamic_cast
<
const
Operation
::
MultiplyConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"*"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
out
<<
context
.
doubleToString
(
dynamic_cast
<
const
Operation
::
MultiplyConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"*"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
break
;
case
Operation
::
POWER_CONSTANT
:
{
...
...
@@ -266,7 +253,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
for
(
map
<
int
,
const
ExpressionTreeNode
*>::
const_iterator
iter
=
powers
.
begin
();
iter
!=
powers
.
end
();
++
iter
)
{
if
(
iter
->
first
!=
exponent
)
{
exponents
.
push_back
(
iter
->
first
>=
0
?
iter
->
first
:
-
iter
->
first
);
string
name2
=
prefix
+
intToString
(
temps
.
size
());
string
name2
=
prefix
+
context
.
intToString
(
temps
.
size
());
names
.
push_back
(
name2
);
temps
.
push_back
(
make_pair
(
*
iter
->
second
,
name2
));
out
<<
tempType
<<
" "
<<
name2
<<
" = 0.0f;
\n
"
;
...
...
@@ -295,7 +282,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out
<<
"}"
;
}
else
out
<<
"pow("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
", "
<<
doubleToString
(
exponent
)
<<
")"
;
out
<<
"pow("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
", "
<<
context
.
doubleToString
(
exponent
)
<<
")"
;
break
;
}
case
Operation
::
MIN
:
...
...
platforms/opencl/src/OpenCLExpressionUtilities.h
View file @
c8dac206
...
...
@@ -45,6 +45,8 @@ namespace OpenMM {
class
OPENMM_EXPORT
OpenCLExpressionUtilities
{
public:
OpenCLExpressionUtilities
(
OpenCLContext
&
context
)
:
context
(
context
)
{
}
/**
* Generate the source code for calculating a set of expressions.
*
...
...
@@ -54,10 +56,10 @@ public:
* @param functions defines the variable name for each tabulated function that may appear in the expressions
* @param prefix a prefix to put in front of temporary variables
* @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "
float
")
* @param tempType the type of value to use for temporary variables (defaults to "
real
")
*/
static
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
variables
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"
float
"
);
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
variables
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"
real
"
);
/**
* Generate the source code for calculating a set of expressions.
*
...
...
@@ -69,7 +71,7 @@ public:
* @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "float")
*/
static
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variables
,
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variables
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"float"
);
/**
* Calculate the spline coefficients for a tabulated function that appears in expressions.
...
...
@@ -79,26 +81,19 @@ public:
* @param max the value of the independent variable corresponding to the last element of values
* @return the spline coefficients
*/
static
std
::
vector
<
mm_float4
>
computeFunctionCoefficients
(
const
std
::
vector
<
double
>&
values
,
double
min
,
double
max
);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static
std
::
string
doubleToString
(
double
value
);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static
std
::
string
intToString
(
int
value
);
std
::
vector
<
mm_float4
>
computeFunctionCoefficients
(
const
std
::
vector
<
double
>&
values
,
double
min
,
double
max
);
class
FunctionPlaceholder
;
private:
static
void
processExpression
(
std
::
stringstream
&
out
,
const
Lepton
::
ExpressionTreeNode
&
node
,
void
processExpression
(
std
::
stringstream
&
out
,
const
Lepton
::
ExpressionTreeNode
&
node
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
vector
<
Lepton
::
ParsedExpression
>&
allExpressions
,
const
std
::
string
&
tempType
);
static
std
::
string
getTempName
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
);
static
void
findRelatedTabulatedFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
std
::
string
getTempName
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
);
void
findRelatedTabulatedFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
const
Lepton
::
ExpressionTreeNode
*&
valueNode
,
const
Lepton
::
ExpressionTreeNode
*&
derivNode
);
static
void
findRelatedPowers
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
void
findRelatedPowers
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
std
::
map
<
int
,
const
Lepton
::
ExpressionTreeNode
*>&
powers
);
OpenCLContext
&
context
;
};
/**
...
...
platforms/opencl/src/OpenCLFFT3D.cpp
View file @
c8dac206
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
1
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
2
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -47,15 +47,15 @@ void OpenCLFFT3D::execFFT(OpenCLArray& in, OpenCLArray& out, bool forward) {
maxSize
=
1
;
zkernel
.
setArg
<
cl
::
Buffer
>
(
0
,
in
.
getDeviceBuffer
());
zkernel
.
setArg
<
cl
::
Buffer
>
(
1
,
out
.
getDeviceBuffer
());
zkernel
.
setArg
<
cl_
floa
t
>
(
2
,
forward
?
1
.0
f
:
-
1
.0
f
);
zkernel
.
setArg
<
cl_
in
t
>
(
2
,
forward
?
1
:
-
1
);
context
.
executeKernel
(
zkernel
,
xsize
*
ysize
*
zsize
,
min
(
zsize
,
(
int
)
maxSize
));
xkernel
.
setArg
<
cl
::
Buffer
>
(
0
,
out
.
getDeviceBuffer
());
xkernel
.
setArg
<
cl
::
Buffer
>
(
1
,
in
.
getDeviceBuffer
());
xkernel
.
setArg
<
cl_
floa
t
>
(
2
,
forward
?
1
.0
f
:
-
1
.0
f
);
xkernel
.
setArg
<
cl_
in
t
>
(
2
,
forward
?
1
:
-
1
);
context
.
executeKernel
(
xkernel
,
xsize
*
ysize
*
zsize
,
min
(
xsize
,
(
int
)
maxSize
));
ykernel
.
setArg
<
cl
::
Buffer
>
(
0
,
in
.
getDeviceBuffer
());
ykernel
.
setArg
<
cl
::
Buffer
>
(
1
,
out
.
getDeviceBuffer
());
ykernel
.
setArg
<
cl_
floa
t
>
(
2
,
forward
?
1
.0
f
:
-
1
.0
f
);
ykernel
.
setArg
<
cl_
in
t
>
(
2
,
forward
?
1
:
-
1
);
context
.
executeKernel
(
ykernel
,
xsize
*
ysize
*
zsize
,
min
(
ysize
,
(
int
)
maxSize
));
}
...
...
@@ -99,23 +99,23 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source
<<
"int i = get_local_id(0);
\n
"
;
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"
float
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
float
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c3 = data"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c4 = data"
<<
input
<<
"[i+"
<<
(
4
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 d0 = c1+c4;
\n
"
;
source
<<
"
float
2 d1 = c2+c3;
\n
"
;
source
<<
"
float
2 d2 = "
<<
OpenCLExpressionUtilities
::
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c1-c4);
\n
"
;
source
<<
"
float
2 d3 = "
<<
OpenCLExpressionUtilities
::
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c2-c3);
\n
"
;
source
<<
"
float
2 d4 = d0+d1;
\n
"
;
source
<<
"
float
2 d5 = "
<<
OpenCLExpressionUtilities
::
doubleToString
(
0.25
*
sqrt
(
5.0
))
<<
"*(d0-d1);
\n
"
;
source
<<
"
float
2 d6 = c0-0.25f*d4;
\n
"
;
source
<<
"
float
2 d7 = d6+d5;
\n
"
;
source
<<
"
float
2 d8 = d6-d5;
\n
"
;
string
coeff
=
OpenCLExpressionUtilities
::
doubleToString
(
sin
(
0.2
*
M_PI
)
/
sin
(
0.4
*
M_PI
));
source
<<
"
float
2 d9 = sign*(
float
2) (d2.y+"
<<
coeff
<<
"*d3.y, -d2.x-"
<<
coeff
<<
"*d3.x);
\n
"
;
source
<<
"
float
2 d10 = sign*(
float
2) ("
<<
coeff
<<
"*d2.y-d3.y, d3.x-"
<<
coeff
<<
"*d2.x);
\n
"
;
source
<<
"
real
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
real
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c3 = data"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c4 = data"
<<
input
<<
"[i+"
<<
(
4
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 d0 = c1+c4;
\n
"
;
source
<<
"
real
2 d1 = c2+c3;
\n
"
;
source
<<
"
real
2 d2 = "
<<
context
.
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c1-c4);
\n
"
;
source
<<
"
real
2 d3 = "
<<
context
.
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c2-c3);
\n
"
;
source
<<
"
real
2 d4 = d0+d1;
\n
"
;
source
<<
"
real
2 d5 = "
<<
context
.
doubleToString
(
0.25
*
sqrt
(
5.0
))
<<
"*(d0-d1);
\n
"
;
source
<<
"
real
2 d6 = c0-0.25f*d4;
\n
"
;
source
<<
"
real
2 d7 = d6+d5;
\n
"
;
source
<<
"
real
2 d8 = d6-d5;
\n
"
;
string
coeff
=
context
.
doubleToString
(
sin
(
0.2
*
M_PI
)
/
sin
(
0.4
*
M_PI
));
source
<<
"
real
2 d9 = sign*(
real
2) (d2.y+"
<<
coeff
<<
"*d3.y, -d2.x-"
<<
coeff
<<
"*d3.x);
\n
"
;
source
<<
"
real
2 d10 = sign*(
real
2) ("
<<
coeff
<<
"*d2.y-d3.y, d3.x-"
<<
coeff
<<
"*d2.x);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+4*j*"
<<
m
<<
"] = c0+d4;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
5
*
L
)
<<
"], d7+d9);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8+d10);
\n
"
;
...
...
@@ -134,14 +134,14 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source
<<
"int i = get_local_id(0);
\n
"
;
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"
float
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
float
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c3 = data"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 d0 = c0+c2;
\n
"
;
source
<<
"
float
2 d1 = c0-c2;
\n
"
;
source
<<
"
float
2 d2 = c1+c3;
\n
"
;
source
<<
"
float
2 d3 = sign*(
float
2) (c1.y-c3.y, c3.x-c1.x);
\n
"
;
source
<<
"
real
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
real
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c3 = data"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 d0 = c0+c2;
\n
"
;
source
<<
"
real
2 d1 = c0-c2;
\n
"
;
source
<<
"
real
2 d2 = c1+c3;
\n
"
;
source
<<
"
real
2 d3 = sign*(
real
2) (c1.y-c3.y, c3.x-c1.x);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+3*j*"
<<
m
<<
"] = d0+d2;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
4
*
L
)
<<
"], d1+d3);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
4
*
L
)
<<
"], d0-d2);
\n
"
;
...
...
@@ -159,12 +159,12 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source
<<
"int i = get_local_id(0);
\n
"
;
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"
float
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
float
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 d0 = c1+c2;
\n
"
;
source
<<
"
float
2 d1 = c0-0.5f*d0;
\n
"
;
source
<<
"
float
2 d2 = sign*"
<<
OpenCLExpressionUtilities
::
doubleToString
(
sin
(
M_PI
/
3.0
))
<<
"*(
float
2) (c1.y-c2.y, c2.x-c1.x);
\n
"
;
source
<<
"
real
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
real
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 d0 = c1+c2;
\n
"
;
source
<<
"
real
2 d1 = c0-0.5f*d0;
\n
"
;
source
<<
"
real
2 d2 = sign*"
<<
context
.
doubleToString
(
sin
(
M_PI
/
3.0
))
<<
"*(
real
2) (c1.y-c2.y, c2.x-c1.x);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+2*j*"
<<
m
<<
"] = c0+d0;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(2*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
3
*
L
)
<<
"], d1+d2);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1-d2);
\n
"
;
...
...
@@ -181,15 +181,15 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source
<<
"int i = get_local_id(0);
\n
"
;
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"
float
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
float
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
real
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"data"
<<
output
<<
"[i+j*"
<<
m
<<
"] = c0+c1;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
2
*
L
)
<<
"], c0-c1);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
2
;
}
else
throw
OpenMMException
(
"Illegal size for FFT: "
+
OpenCLExpressionUtilities
::
intToString
(
zsize
));
throw
OpenMMException
(
"Illegal size for FFT: "
+
context
.
intToString
(
zsize
));
source
<<
"barrier(CLK_LOCAL_MEM_FENCE);
\n
"
;
source
<<
"}
\n
"
;
++
stage
;
...
...
@@ -205,16 +205,17 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source
<<
"out[y*(ZSIZE*XSIZE)+get_local_id(0)*XSIZE+x] = data"
<<
(
stage
%
2
)
<<
"[get_local_id(0)];
\n
"
;
source
<<
"barrier(CLK_GLOBAL_MEM_FENCE);"
;
map
<
string
,
string
>
replacements
;
replacements
[
"XSIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
xsize
);
replacements
[
"YSIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
ysize
);
replacements
[
"ZSIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
zsize
);
replacements
[
"M_PI"
]
=
OpenCLExpressionUtilities
::
doubleToString
(
M_PI
);
replacements
[
"XSIZE"
]
=
context
.
intToString
(
xsize
);
replacements
[
"YSIZE"
]
=
context
.
intToString
(
ysize
);
replacements
[
"ZSIZE"
]
=
context
.
intToString
(
zsize
);
replacements
[
"M_PI"
]
=
context
.
doubleToString
(
M_PI
);
replacements
[
"COMPUTE_FFT"
]
=
source
.
str
();
replacements
[
"LOOP_REQUIRED"
]
=
(
loopRequired
?
"1"
:
"0"
);
cl
::
Program
program
=
context
.
createProgram
(
context
.
replaceStrings
(
OpenCLKernelSources
::
fft
,
replacements
));
cl
::
Kernel
kernel
(
program
,
"execFFT"
);
kernel
.
setArg
(
3
,
zsize
*
sizeof
(
mm_float2
),
NULL
);
kernel
.
setArg
(
4
,
zsize
*
sizeof
(
mm_float2
),
NULL
);
kernel
.
setArg
(
5
,
zsize
*
sizeof
(
mm_float2
),
NULL
);
int
bufferSize
=
zsize
*
(
context
.
getUseDoublePrecision
()
?
sizeof
(
mm_double2
)
:
sizeof
(
mm_float2
));
kernel
.
setArg
(
3
,
bufferSize
,
NULL
);
kernel
.
setArg
(
4
,
bufferSize
,
NULL
);
kernel
.
setArg
(
5
,
bufferSize
,
NULL
);
return
kernel
;
}
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
View file @
c8dac206
...
...
@@ -559,8 +559,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// Create the CCMA kernels.
map
<
string
,
string
>
defines
;
defines
[
"NUM_CONSTRAINTS"
]
=
OpenCLExpressionUtilities
::
intToString
(
numCCMA
);
defines
[
"NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
numAtoms
);
defines
[
"NUM_CONSTRAINTS"
]
=
context
.
intToString
(
numCCMA
);
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
numAtoms
);
cl
::
Program
ccmaProgram
=
context
.
createProgram
(
OpenCLKernelSources
::
ccma
,
defines
);
ccmaDirectionsKernel
=
cl
::
Kernel
(
ccmaProgram
,
"computeConstraintDirections"
);
ccmaPosForceKernel
=
cl
::
Kernel
(
ccmaProgram
,
"computeConstraintForce"
);
...
...
@@ -630,9 +630,9 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// Create the kernels for virtual sites.
map
<
string
,
string
>
defines
;
defines
[
"NUM_2_AVERAGE"
]
=
OpenCLExpressionUtilities
::
intToString
(
num2Avg
);
defines
[
"NUM_3_AVERAGE"
]
=
OpenCLExpressionUtilities
::
intToString
(
num3Avg
);
defines
[
"NUM_OUT_OF_PLANE"
]
=
OpenCLExpressionUtilities
::
intToString
(
numOutOfPlane
);
defines
[
"NUM_2_AVERAGE"
]
=
context
.
intToString
(
num2Avg
);
defines
[
"NUM_3_AVERAGE"
]
=
context
.
intToString
(
num3Avg
);
defines
[
"NUM_OUT_OF_PLANE"
]
=
context
.
intToString
(
numOutOfPlane
);
cl
::
Program
vsiteProgram
=
context
.
createProgram
(
OpenCLKernelSources
::
virtualSites
,
defines
);
vsitePositionKernel
=
cl
::
Kernel
(
vsiteProgram
,
"computeVirtualSites"
);
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
context
.
getPosq
().
getDeviceBuffer
());
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
c8dac206
This diff is collapsed.
Click to expand it.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
c8dac206
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
1
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
2
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -267,7 +267,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
forceKernel
=
createInteractionKernel
(
kernelSource
,
parameters
,
arguments
,
true
,
true
);
if
(
useCutoff
)
{
map
<
string
,
string
>
defines
;
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
if
(
forceBufferPerAtomBlock
)
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
if
(
usePeriodic
)
...
...
@@ -281,6 +281,9 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
blockBoundingBox
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
if
(
context
.
getUseDoublePrecision
())
findInteractingBlocksKernel
.
setArg
<
cl_double
>
(
0
,
cutoff
*
cutoff
);
else
findInteractingBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
blockCenter
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
blockBoundingBox
->
getDeviceBuffer
());
...
...
@@ -293,6 +296,9 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
startTileIndex
+
numTiles
);
if
(
context
.
getSIMDWidth
()
==
32
&&
!
deviceIsCpu
)
{
findInteractionsWithinBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findInteractionsWithinBlocks"
);
if
(
context
.
getUseDoublePrecision
())
findInteractionsWithinBlocksKernel
.
setArg
<
cl_double
>
(
0
,
cutoff
*
cutoff
);
else
findInteractionsWithinBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
interactingTiles
->
getDeviceBuffer
());
...
...
@@ -315,6 +321,20 @@ int OpenCLNonbondedUtilities::findExclusionIndex(int x, int y, const vector<cl_u
throw
OpenMMException
(
"Internal error: exclusion in unexpected tile"
);
}
static
void
setPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getPeriodicBoxSizeDouble
());
else
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getPeriodicBoxSize
());
}
static
void
setInvPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getInvPeriodicBoxSizeDouble
());
else
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getInvPeriodicBoxSize
());
}
void
OpenCLNonbondedUtilities
::
prepareInteractions
()
{
if
(
!
useCutoff
)
return
;
...
...
@@ -327,15 +347,15 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
// Compute the neighbor list.
findBlockBoundsKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
()
);
findBlockBoundsKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
context
,
findBlockBoundsKernel
,
1
);
setInvPeriodicBoxSizeArg
(
context
,
findBlockBoundsKernel
,
2
);
context
.
executeKernel
(
findBlockBoundsKernel
,
context
.
getNumAtoms
());
findInteractingBlocksKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
()
);
findInteractingBlocksKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
context
,
findInteractingBlocksKernel
,
1
);
setInvPeriodicBoxSizeArg
(
context
,
findInteractingBlocksKernel
,
2
);
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
deviceIsCpu
?
1
:
-
1
);
if
(
context
.
getSIMDWidth
()
==
32
&&
!
deviceIsCpu
)
{
findInteractionsWithinBlocksKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
()
);
findInteractionsWithinBlocksKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
context
,
findInteractionsWithinBlocksKernel
,
1
);
setInvPeriodicBoxSizeArg
(
context
,
findInteractionsWithinBlocksKernel
,
2
);
context
.
executeKernel
(
findInteractionsWithinBlocksKernel
,
context
.
getNumAtoms
(),
128
);
}
}
...
...
@@ -343,8 +363,8 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
if
(
cutoff
!=
-
1.0
)
{
if
(
useCutoff
)
{
forceKernel
.
setArg
<
mm_float4
>
(
10
,
context
.
getPeriodicBoxSize
()
);
forceKernel
.
setArg
<
mm_float4
>
(
11
,
context
.
getInvPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
context
,
forceKernel
,
10
);
setInvPeriodicBoxSizeArg
(
context
,
forceKernel
,
11
);
}
context
.
executeKernel
(
forceKernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
...
...
@@ -498,11 +518,11 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
if
(
isSymmetric
)
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
forceThreadBlockSize
);
defines
[
"CUTOFF_SQUARED"
]
=
OpenCLExpressionUtilities
::
doubleToString
(
cutoff
*
cutoff
);
defines
[
"NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
defines
[
"CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
cutoff
*
cutoff
);
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
if
((
localDataSize
/
4
)
%
2
==
0
)
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
string
file
;
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.h
View file @
c8dac206
...
...
@@ -30,6 +30,7 @@
#include "OpenCLContext.h"
#include "openmm/System.h"
#include "OpenCLExpressionUtilities.h"
#include <sstream>
#include <string>
#include <vector>
...
...
@@ -287,8 +288,11 @@ public:
name
(
name
),
componentType
(
componentType
),
numComponents
(
numComponents
),
size
(
size
),
memory
(
&
memory
)
{
if
(
numComponents
==
1
)
type
=
componentType
;
else
type
=
componentType
+
OpenCLExpressionUtilities
::
intToString
(
numComponents
);
else
{
std
::
stringstream
s
;
s
<<
componentType
<<
numComponents
;
type
=
s
.
str
();
}
}
const
std
::
string
&
getName
()
const
{
return
name
;
...
...
platforms/opencl/src/OpenCLParallelKernels.cpp
View file @
c8dac206
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011 Stanford University and the Authors.
*
* Portions copyright (c) 2011
-2012
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -54,14 +54,14 @@ using namespace std;
class
OpenCLParallelCalcForcesAndEnergyKernel
::
BeginComputationTask
:
public
OpenCLContext
::
WorkTask
{
public:
BeginComputationTask
(
ContextImpl
&
context
,
OpenCLContext
&
cl
,
OpenCLCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
mm_float4
*
pinnedMemory
)
:
context
(
context
),
cl
(
cl
),
kernel
(
kernel
),
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
void
*
pinnedMemory
)
:
context
(
context
),
cl
(
cl
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
pinnedMemory
(
pinnedMemory
)
{
}
void
execute
()
{
// Copy coordinates over to this device and execute the kernel.
if
(
cl
.
getContextIndex
()
>
0
)
cl
.
getQueue
().
enqueueWriteBuffer
(
cl
.
getPosq
().
getDeviceBuffer
(),
CL_FALSE
,
0
,
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
),
pinnedMemory
);
cl
.
getQueue
().
enqueueWriteBuffer
(
cl
.
getPosq
().
getDeviceBuffer
(),
CL_FALSE
,
0
,
cl
.
getPaddedNumAtoms
()
*
cl
.
getPosq
().
getElementSize
(
),
pinnedMemory
);
kernel
.
beginComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
}
private:
...
...
@@ -70,13 +70,13 @@ private:
OpenCLCalcForcesAndEnergyKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
int
groups
;
mm_float4
*
pinnedMemory
;
void
*
pinnedMemory
;
};
class
OpenCLParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
OpenCLContext
::
WorkTask
{
public:
FinishComputationTask
(
ContextImpl
&
context
,
OpenCLContext
&
cl
,
OpenCLCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
mm_float4
*
pinnedMemory
)
:
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
void
*
pinnedMemory
)
:
context
(
context
),
cl
(
cl
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
energy
(
energy
),
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
)
{
}
...
...
@@ -87,8 +87,9 @@ public:
if
(
includeForce
)
{
if
(
cl
.
getContextIndex
()
>
0
)
{
int
numAtoms
=
cl
.
getPaddedNumAtoms
();
void
*
dest
=
(
cl
.
getUseDoublePrecision
()
?
(
void
*
)
&
((
mm_double4
*
)
pinnedMemory
)[(
cl
.
getContextIndex
()
-
1
)
*
numAtoms
]
:
(
void
*
)
&
((
mm_float4
*
)
pinnedMemory
)[(
cl
.
getContextIndex
()
-
1
)
*
numAtoms
]);
cl
.
getQueue
().
enqueueReadBuffer
(
cl
.
getForce
().
getDeviceBuffer
(),
CL_TRUE
,
0
,
numAtoms
*
sizeof
(
mm_float4
),
&
pinnedMemory
[(
cl
.
getContextIndex
()
-
1
)
*
numAtoms
]
);
numAtoms
*
cl
.
getForce
().
getElementSize
(),
dest
);
}
else
cl
.
getQueue
().
finish
();
...
...
@@ -103,7 +104,7 @@ private:
int
groups
;
double
&
energy
;
long
long
&
completionTime
;
mm_float4
*
pinnedMemory
;
void
*
pinnedMemory
;
};
OpenCLParallelCalcForcesAndEnergyKernel
::
OpenCLParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
OpenCLPlatform
::
PlatformData
&
data
)
:
...
...
@@ -129,19 +130,20 @@ void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
void
OpenCLParallelCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
OpenCLContext
&
cl0
=
*
data
.
contexts
[
0
];
int
elementSize
=
(
cl0
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
if
(
contextForces
==
NULL
)
{
contextForces
=
OpenCLArray
::
create
<
mm_float4
>
(
cl0
,
&
cl0
.
getForceBuffers
().
getDeviceBuffer
(),
data
.
contexts
.
size
()
*
cl0
.
getPaddedNumAtoms
(),
"contextForces"
);
int
bufferBytes
=
(
data
.
contexts
.
size
()
-
1
)
*
cl0
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
)
;
int
bufferBytes
=
(
data
.
contexts
.
size
()
-
1
)
*
cl0
.
getPaddedNumAtoms
()
*
elementSize
;
pinnedPositionBuffer
=
new
cl
::
Buffer
(
cl0
.
getContext
(),
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedPositionMemory
=
(
mm_float4
*
)
cl0
.
getQueue
().
enqueueMapBuffer
(
*
pinnedPositionBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
pinnedPositionMemory
=
cl0
.
getQueue
().
enqueueMapBuffer
(
*
pinnedPositionBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
pinnedForceBuffer
=
new
cl
::
Buffer
(
cl0
.
getContext
(),
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedForceMemory
=
(
mm_float4
*
)
cl0
.
getQueue
().
enqueueMapBuffer
(
*
pinnedForceBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
pinnedForceMemory
=
cl0
.
getQueue
().
enqueueMapBuffer
(
*
pinnedForceBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
}
// Copy coordinates over to each device and execute the kernel.
cl0
.
getQueue
().
enqueueReadBuffer
(
cl0
.
getPosq
().
getDeviceBuffer
(),
CL_TRUE
,
0
,
cl0
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
)
,
pinnedPositionMemory
);
cl0
.
getQueue
().
enqueueReadBuffer
(
cl0
.
getPosq
().
getDeviceBuffer
(),
CL_TRUE
,
0
,
cl0
.
getPaddedNumAtoms
()
*
elementSize
,
pinnedPositionMemory
);
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
data
.
contextEnergy
[
i
]
=
0.0
;
OpenCLContext
&
cl
=
*
data
.
contexts
[
i
];
...
...
@@ -165,8 +167,9 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
OpenCLContext
&
cl
=
*
data
.
contexts
[
0
];
int
numAtoms
=
cl
.
getPaddedNumAtoms
();
cl
.
getQueue
().
enqueueWriteBuffer
(
contextForces
->
getDeviceBuffer
(),
CL_FALSE
,
numAtoms
*
sizeof
(
mm_float4
),
numAtoms
*
(
data
.
contexts
.
size
()
-
1
)
*
sizeof
(
mm_float4
),
pinnedForceMemory
);
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
cl
.
getQueue
().
enqueueWriteBuffer
(
contextForces
->
getDeviceBuffer
(),
CL_FALSE
,
numAtoms
*
elementSize
,
numAtoms
*
(
data
.
contexts
.
size
()
-
1
)
*
elementSize
,
pinnedForceMemory
);
cl
.
reduceBuffer
(
*
contextForces
,
data
.
contexts
.
size
());
// Balance work between the contexts by transferring a few nonbonded tiles from the context that
...
...
platforms/opencl/src/OpenCLParallelKernels.h
View file @
c8dac206
...
...
@@ -84,8 +84,8 @@ private:
OpenCLArray
*
contextForces
;
cl
::
Buffer
*
pinnedPositionBuffer
;
cl
::
Buffer
*
pinnedForceBuffer
;
mm_float4
*
pinnedPositionMemory
;
mm_float4
*
pinnedForceMemory
;
void
*
pinnedPositionMemory
;
void
*
pinnedForceMemory
;
};
/**
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
c8dac206
...
...
@@ -141,7 +141,7 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
device
<<
contexts
[
i
]
->
getDeviceIndex
();
}
propertyValues
[
OpenCLPlatform
::
OpenCLDeviceIndex
()]
=
device
.
str
();
propertyValues
[
OpenCLPlatform
::
OpenCLPlatformIndex
()]
=
OpenCLExpressionUtilities
::
intToString
(
platformIndex
);
propertyValues
[
OpenCLPlatform
::
OpenCLPlatformIndex
()]
=
contexts
[
0
]
->
intToString
(
platformIndex
);
propertyValues
[
OpenCLPlatform
::
OpenCLPrecision
()]
=
precisionProperty
;
contextEnergy
.
resize
(
contexts
.
size
());
}
...
...
platforms/opencl/src/OpenCLSort.h
View file @
c8dac206
...
...
@@ -162,7 +162,7 @@ public:
// Assign array elements to buckets.
unsigned
int
numBuckets
=
bucketOffset
->
getSize
();
context
.
clearBuffer
(
bucketOffset
->
getDeviceBuffer
(),
numBuckets
);
context
.
clearBuffer
(
*
bucketOffset
);
assignElementsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
assignElementsKernel
.
setArg
<
cl_int
>
(
1
,
data
.
getSize
());
assignElementsKernel
.
setArg
<
cl_int
>
(
2
,
numBuckets
);
...
...
platforms/opencl/src/kernels/angleForce.cl
View file @
c8dac206
float
4
v0
=
pos2-pos1
;
float
4
v1
=
pos2-pos3
;
float
4
cp
=
cross
(
v0,
v1
)
;
float
rp
=
cp.x*cp.x
+
cp.y*cp.y
+
cp.z*cp.z
;
rp
=
max
(
SQRT
(
rp
)
,
1.0e-06f
)
;
float
r21
=
v0.x*v0.x
+
v0.y*v0.y
+
v0.z*v0.z
;
float
r23
=
v1.x*v1.x
+
v1.y*v1.y
+
v1.z*v1.z
;
float
dot
=
v0.x*v1.x
+
v0.y*v1.y
+
v0.z*v1.z
;
float
cosine
=
clamp
(
dot*RSQRT
(
r21*r23
)
,
-1.0f,
1.0f
)
;
float
theta
=
acos
(
cosine
)
;
real
4
v0
=
pos2-pos1
;
real
4
v1
=
pos2-pos3
;
real
4
cp
=
cross
(
v0,
v1
)
;
real
rp
=
cp.x*cp.x
+
cp.y*cp.y
+
cp.z*cp.z
;
rp
=
max
(
SQRT
(
rp
)
,
(
real
)
1.0e-06f
)
;
real
r21
=
v0.x*v0.x
+
v0.y*v0.y
+
v0.z*v0.z
;
real
r23
=
v1.x*v1.x
+
v1.y*v1.y
+
v1.z*v1.z
;
real
dot
=
v0.x*v1.x
+
v0.y*v1.y
+
v0.z*v1.z
;
real
cosine
=
clamp
(
dot*RSQRT
(
r21*r23
)
,
(
real
)
-1
,
(
real
)
1
)
;
real
theta
=
acos
(
cosine
)
;
COMPUTE_FORCE
float
4
force1
=
cross
(
v0,
cp
)
*
(
dEdAngle/
(
r21*rp
))
;
float
4
force3
=
cross
(
cp,
v1
)
*
(
dEdAngle/
(
r23*rp
))
;
float
4
force2
=
-force1-force3
;
real
4
force1
=
cross
(
v0,
cp
)
*
(
dEdAngle/
(
r21*rp
))
;
real
4
force3
=
cross
(
cp,
v1
)
*
(
dEdAngle/
(
r23*rp
))
;
real
4
force2
=
-force1-force3
;
platforms/opencl/src/kernels/bondForce.cl
View file @
c8dac206
float
4
delta
=
pos2-pos1
;
float
r
=
SQRT
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
real
4
delta
=
pos2-pos1
;
real
r
=
SQRT
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
COMPUTE_FORCE
dEdR
=
(
r
>
0.0f
)
?
(
dEdR
/
r
)
:
0.0f
;
delta.xyz
*=
dEdR
;
float4
force1
=
delta
;
float4
force2
=
-delta
;
\ No newline at end of file
real4
force1
=
delta
;
real4
force2
=
-delta
;
\ No newline at end of file
platforms/opencl/src/kernels/cmapTorsionForce.cl
View file @
c8dac206
const
float
PI
=
3.14159265358979323846f
;
const
real
PI
=
3.14159265358979323846f
;
//
Compute
the
first
angle.
float
4
v0a
=
(
float
4
)
(
pos1.xyz-pos2.xyz,
0.0f
)
;
float
4
v1a
=
(
float
4
)
(
pos3.xyz-pos2.xyz,
0.0f
)
;
float
4
v2a
=
(
float
4
)
(
pos3.xyz-pos4.xyz,
0.0f
)
;
float
4
cp0a
=
cross
(
v0a,
v1a
)
;
float
4
cp1a
=
cross
(
v1a,
v2a
)
;
float
cosangle
=
dot
(
normalize
(
cp0a
)
,
normalize
(
cp1a
))
;
float
angleA
;
real
4
v0a
=
(
real
4
)
(
pos1.xyz-pos2.xyz,
0.0f
)
;
real
4
v1a
=
(
real
4
)
(
pos3.xyz-pos2.xyz,
0.0f
)
;
real
4
v2a
=
(
real
4
)
(
pos3.xyz-pos4.xyz,
0.0f
)
;
real
4
cp0a
=
cross
(
v0a,
v1a
)
;
real
4
cp1a
=
cross
(
v1a,
v2a
)
;
real
cosangle
=
dot
(
normalize
(
cp0a
)
,
normalize
(
cp1a
))
;
real
angleA
;
if
(
cosangle
>
0.99f
|
| cosangle < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
float
4 cross_prod = cross(cp0a, cp1a);
float
scale = dot(cp0a, cp0a)*dot(cp1a, cp1a);
real
4 cross_prod = cross(cp0a, cp1a);
real
scale = dot(cp0a, cp0a)*dot(cp1a, cp1a);
angleA = asin(SQRT(dot(cross_prod, cross_prod)/scale));
if (cosangle < 0.0f)
angleA = PI-angleA;
...
...
@@ -25,18 +25,18 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
// Compute the second angle.
float
4 v0b = (
float
4) (pos5.xyz-pos6.xyz, 0.0f);
float
4 v1b = (
float
4) (pos7.xyz-pos6.xyz, 0.0f);
float
4 v2b = (
float
4) (pos7.xyz-pos8.xyz, 0.0f);
float
4 cp0b = cross(v0b, v1b);
float
4 cp1b = cross(v1b, v2b);
real
4 v0b = (
real
4) (pos5.xyz-pos6.xyz, 0.0f);
real
4 v1b = (
real
4) (pos7.xyz-pos6.xyz, 0.0f);
real
4 v2b = (
real
4) (pos7.xyz-pos8.xyz, 0.0f);
real
4 cp0b = cross(v0b, v1b);
real
4 cp1b = cross(v1b, v2b);
cosangle = dot(normalize(cp0b), normalize(cp1b));
float
angleB;
real
angleB;
if (cosangle > 0.99f |
|
cosangle
<
-0.99f
)
{
//
We
're
close
to
the
singularity
in
acos
()
,
so
take
the
cross
product
and
use
asin
()
instead.
float
4
cross_prod
=
cross
(
cp0b,
cp1b
)
;
float
scale
=
dot
(
cp0b,
cp0b
)
*dot
(
cp1b,
cp1b
)
;
real
4
cross_prod
=
cross
(
cp0b,
cp1b
)
;
real
scale
=
dot
(
cp0b,
cp0b
)
*dot
(
cp1b,
cp1b
)
;
angleB
=
asin
(
SQRT
(
dot
(
cross_prod,
cross_prod
)
/scale
))
;
if
(
cosangle
<
0.0f
)
angleB
=
PI-angleB
;
...
...
@@ -50,7 +50,7 @@ angleB = fmod(angleB+2.0f*PI, 2.0f*PI);
int2
pos
=
MAP_POS[MAPS[index]]
;
int
size
=
pos.y
;
float
delta
=
2*PI/size
;
real
delta
=
2*PI/size
;
int
s
=
(
int
)
(
angleA/delta
)
;
int
t
=
(
int
)
(
angleB/delta
)
;
float4
c[4]
;
...
...
@@ -59,14 +59,14 @@ c[0] = COEFF[coeffIndex];
c[1]
=
COEFF[coeffIndex+1]
;
c[2]
=
COEFF[coeffIndex+2]
;
c[3]
=
COEFF[coeffIndex+3]
;
float
da
=
angleA/delta-s
;
float
db
=
angleB/delta-t
;
real
da
=
angleA/delta-s
;
real
db
=
angleB/delta-t
;
//
Evaluate
the
spline
to
determine
the
energy
and
gradients.
float
torsionEnergy
=
0.0f
;
float
dEdA
=
0.0f
;
float
dEdB
=
0.0f
;
real
torsionEnergy
=
0.0f
;
real
dEdA
=
0.0f
;
real
dEdB
=
0.0f
;
torsionEnergy
=
da*torsionEnergy
+
((
c[3].w*db
+
c[3].z
)
*db
+
c[3].y
)
*db
+
c[3].x
;
dEdA
=
db*dEdA
+
(
3.0f*c[3].w*da
+
2.0f*c[2].w
)
*da
+
c[1].w
;
dEdB
=
da*dEdB
+
(
3.0f*c[3].w*db
+
2.0f*c[3].z
)
*db
+
c[3].y
;
...
...
@@ -85,17 +85,17 @@ energy += torsionEnergy;
//
Apply
the
force
to
the
first
torsion.
float
normCross1
=
dot
(
cp0a,
cp0a
)
;
float
normSqrBC
=
dot
(
v1a,
v1a
)
;
float
normBC
=
SQRT
(
normSqrBC
)
;
float
normCross2
=
dot
(
cp1a,
cp1a
)
;
float
dp
=
1.0f/normSqrBC
;
float
4
ff
=
(
float
4
)
((
-dEdA*normBC
)
/normCross1,
dot
(
v0a,
v1a
)
*dp,
dot
(
v2a,
v1a
)
*dp,
(
dEdA*normBC
)
/normCross2
)
;
float
4
force1
=
ff.x*cp0a
;
float
4
force4
=
ff.w*cp1a
;
float
4
d
=
ff.y*force1
-
ff.z*force4
;
float
4
force2
=
d-force1
;
float
4
force3
=
-d-force4
;
real
normCross1
=
dot
(
cp0a,
cp0a
)
;
real
normSqrBC
=
dot
(
v1a,
v1a
)
;
real
normBC
=
SQRT
(
normSqrBC
)
;
real
normCross2
=
dot
(
cp1a,
cp1a
)
;
real
dp
=
1.0f/normSqrBC
;
real
4
ff
=
(
real
4
)
((
-dEdA*normBC
)
/normCross1,
dot
(
v0a,
v1a
)
*dp,
dot
(
v2a,
v1a
)
*dp,
(
dEdA*normBC
)
/normCross2
)
;
real
4
force1
=
ff.x*cp0a
;
real
4
force4
=
ff.w*cp1a
;
real
4
d
=
ff.y*force1
-
ff.z*force4
;
real
4
force2
=
d-force1
;
real
4
force3
=
-d-force4
;
//
Apply
the
force
to
the
second
torsion.
...
...
@@ -104,9 +104,9 @@ normSqrBC = dot(v1b, v1b);
normBC
=
SQRT
(
normSqrBC
)
;
normCross2
=
dot
(
cp1b,
cp1b
)
;
dp
=
1.0f/normSqrBC
;
ff
=
(
float
4
)
((
-dEdB*normBC
)
/normCross1,
dot
(
v0b,
v1b
)
*dp,
dot
(
v2b,
v1b
)
*dp,
(
dEdB*normBC
)
/normCross2
)
;
float
4
force5
=
ff.x*cp0b
;
float
4
force8
=
ff.w*cp1b
;
ff
=
(
real
4
)
((
-dEdB*normBC
)
/normCross1,
dot
(
v0b,
v1b
)
*dp,
dot
(
v2b,
v1b
)
*dp,
(
dEdB*normBC
)
/normCross2
)
;
real
4
force5
=
ff.x*cp0b
;
real
4
force8
=
ff.w*cp1b
;
d
=
ff.y*force5
-
ff.z*force8
;
float
4
force6
=
d-force5
;
float
4
force7
=
-d-force8
;
real
4
force6
=
d-force5
;
real
4
force7
=
-d-force8
;
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
c8dac206
#
if
USE_EWALD
bool
needCorrection
=
isExcluded
&&
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
;
if
(
!isExcluded
|
| needCorrection) {
float
tempForce = 0
.0f
;
real
tempForce = 0;
if (r2 < CUTOFF_SQUARED |
|
needCorrection
)
{
const
float
alphaR
=
EWALD_ALPHA*r
;
const
float
expAlphaRSqr
=
EXP
(
-alphaR*alphaR
)
;
const
float
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
const
real
alphaR
=
EWALD_ALPHA*r
;
const
real
expAlphaRSqr
=
EXP
(
-alphaR*alphaR
)
;
const
real
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
//
This
approximation
for
erfc
is
from
Abramowitz
and
Stegun
(
1964
)
p.
299.
They
cite
the
following
as
//
the
original
source:
C.
Hastings,
Jr.,
Approximations
for
Digital
Computers
(
1955
)
.
It
has
a
maximum
//
error
of
3e-7.
float
t
=
1.0f+
(
0.0705230784f+
(
0.0422820123f+
(
0.0092705272f+
(
0.0001520143f+
(
0.0002765672f+0.0000430638f*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
;
real
t
=
1.0f+
(
0.0705230784f+
(
0.0422820123f+
(
0.0092705272f+
(
0.0001520143f+
(
0.0002765672f+0.0000430638f*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
const
float
erfcAlphaR
=
RECIP
(
t*t
)
;
const
real
erfcAlphaR
=
RECIP
(
t*t
)
;
if
(
needCorrection
)
{
//
Subtract
off
the
part
of
this
interaction
that
was
included
in
the
reciprocal
space
contribution.
...
...
@@ -24,11 +24,11 @@ if (!isExcluded || needCorrection) {
}
else
{
#
if
HAS_LENNARD_JONES
float
sig
=
sigmaEpsilon1.x
+
sigmaEpsilon2.x
;
float
sig2
=
invR*sig
;
real
sig
=
sigmaEpsilon1.x
+
sigmaEpsilon2.x
;
real
sig2
=
invR*sig
;
sig2
*=
sig2
;
float
sig6
=
sig2*sig2*sig2
;
float
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
real
sig6
=
sig2*sig2*sig2
;
real
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
tempForce
=
epssig6*
(
12.0f*sig6
-
6.0f
)
+
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
epssig6*
(
sig6
-
1.0f
)
+
prefactor*erfcAlphaR
;
#
else
...
...
@@ -41,32 +41,37 @@ if (!isExcluded || needCorrection) {
}
#
else
{
#
ifdef
USE_DOUBLE_PRECISION
unsigned
long
includeInteraction
;
#
else
unsigned
int
includeInteraction
;
#
endif
#
ifdef
USE_CUTOFF
unsigned
int
includeInteraction
=
(
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
;
includeInteraction
=
(
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
;
#
else
unsigned
int
includeInteraction
=
(
!isExcluded
)
;
includeInteraction
=
(
!isExcluded
)
;
#
endif
float
tempForce
=
0
.0f
;
real
tempForce
=
0
;
#
if
HAS_LENNARD_JONES
float
sig
=
sigmaEpsilon1.x
+
sigmaEpsilon2.x
;
float
sig2
=
invR*sig
;
real
sig
=
sigmaEpsilon1.x
+
sigmaEpsilon2.x
;
real
sig2
=
invR*sig
;
sig2
*=
sig2
;
float
sig6
=
sig2*sig2*sig2
;
float
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
real
sig6
=
sig2*sig2*sig2
;
real
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
tempForce
=
epssig6*
(
12.0f*sig6
-
6.0f
)
;
tempEnergy
+=
select
(
0.0f
,
epssig6*
(
sig6
-
1.0f
)
,
includeInteraction
)
;
tempEnergy
+=
select
(
(
real
)
0
,
epssig6*
(
sig6
-1
)
,
includeInteraction
)
;
#
endif
#
if
HAS_COULOMB
#
ifdef
USE_CUTOFF
const
float
prefactor
=
138.935456f*posq1.w*posq2.w
;
const
real
prefactor
=
138.935456f*posq1.w*posq2.w
;
tempForce
+=
prefactor*
(
invR
-
2.0f*REACTION_FIELD_K*r2
)
;
tempEnergy
+=
select
(
0.0f
,
prefactor*
(
invR
+
REACTION_FIELD_K*r2
-
REACTION_FIELD_C
)
,
includeInteraction
)
;
tempEnergy
+=
select
(
(
real
)
0
,
prefactor*
(
invR
+
REACTION_FIELD_K*r2
-
REACTION_FIELD_C
)
,
includeInteraction
)
;
#
else
const
float
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
const
real
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
tempForce
+=
prefactor
;
tempEnergy
+=
select
(
0.0f
,
prefactor,
includeInteraction
)
;
tempEnergy
+=
select
(
(
real
)
0
,
prefactor,
includeInteraction
)
;
#
endif
#
endif
dEdR
+=
select
(
0.0f
,
tempForce*invR*invR,
includeInteraction
)
;
dEdR
+=
select
(
(
real
)
0
,
tempForce*invR*invR,
includeInteraction
)
;
}
#
endif
\ No newline at end of file
platforms/opencl/src/kernels/customCompoundBond.cl
View file @
c8dac206
/**
*
Compute
the
difference
between
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
float
4
ccb_delta
(
float
4
vec1,
float
4
vec2
)
{
float
4
result
=
(
float
4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
.0f
)
;
real
4
ccb_delta
(
real
4
vec1,
real
4
vec2
)
{
real
4
result
=
(
real
4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
}
...
...
@@ -10,17 +10,17 @@ float4 ccb_delta(float4 vec1, float4 vec2) {
/**
*
Compute
the
angle
between
two
vectors.
The
w
component
of
each
vector
should
contain
the
squared
magnitude.
*/
float
ccb_computeAngle
(
float
4
vec1,
float
4
vec2
)
{
float
dotProduct
=
vec1.x*vec2.x
+
vec1.y*vec2.y
+
vec1.z*vec2.z
;
float
cosine
=
dotProduct*RSQRT
(
vec1.w*vec2.w
)
;
float
angle
;
real
ccb_computeAngle
(
real
4
vec1,
real
4
vec2
)
{
real
dotProduct
=
vec1.x*vec2.x
+
vec1.y*vec2.y
+
vec1.z*vec2.z
;
real
cosine
=
dotProduct*RSQRT
(
vec1.w*vec2.w
)
;
real
angle
;
if
(
cosine
>
0.99f
||
cosine
<
-0.99f
)
{
//
We
're
close
to
the
singularity
in
acos
()
,
so
take
the
cross
product
and
use
asin
()
instead.
float
4
crossProduct
=
cross
(
vec1,
vec2
)
;
float
scale
=
vec1.w*vec2.w
;
real
4
crossProduct
=
cross
(
vec1,
vec2
)
;
real
scale
=
vec1.w*vec2.w
;
angle
=
asin
(
SQRT
(
dot
(
crossProduct,
crossProduct
)
/scale
))
;
if
(
cosine
<
0
.0f
)
if
(
cosine
<
0
)
angle
=
M_PI-angle
;
}
else
...
...
@@ -31,8 +31,8 @@ float ccb_computeAngle(float4 vec1, float4 vec2) {
/**
*
Compute
the
cross
product
of
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
float
4
ccb_computeCross
(
float
4
vec1,
float
4
vec2
)
{
float
4
result
=
cross
(
vec1,
vec2
)
;
real
4
ccb_computeCross
(
real
4
vec1,
real
4
vec2
)
{
real
4
result
=
cross
(
vec1,
vec2
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
}
platforms/opencl/src/kernels/customExternalForce.cl
View file @
c8dac206
COMPUTE_FORCE
float
4
force1
=
(
float
4
)
(
-dEdX,
-dEdY,
-dEdZ,
0
.0f
)
;
real
4
force1
=
(
real
4
)
(
-dEdX,
-dEdY,
-dEdZ,
0
)
;
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment