Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c8dac206
Commit
c8dac206
authored
Oct 17, 2012
by
Peter Eastman
Browse files
Continuing to implement double precision in OpenCL
parent
34938e2c
Changes
36
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
635 additions
and
529 deletions
+635
-529
platforms/opencl/src/OpenCLBondedUtilities.cpp
platforms/opencl/src/OpenCLBondedUtilities.cpp
+8
-8
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+38
-15
platforms/opencl/src/OpenCLContext.h
platforms/opencl/src/OpenCLContext.h
+25
-4
platforms/opencl/src/OpenCLExpressionUtilities.cpp
platforms/opencl/src/OpenCLExpressionUtilities.cpp
+7
-20
platforms/opencl/src/OpenCLExpressionUtilities.h
platforms/opencl/src/OpenCLExpressionUtilities.h
+12
-17
platforms/opencl/src/OpenCLFFT3D.cpp
platforms/opencl/src/OpenCLFFT3D.cpp
+46
-45
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+5
-5
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+334
-287
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+37
-17
platforms/opencl/src/OpenCLNonbondedUtilities.h
platforms/opencl/src/OpenCLNonbondedUtilities.h
+6
-2
platforms/opencl/src/OpenCLParallelKernels.cpp
platforms/opencl/src/OpenCLParallelKernels.cpp
+16
-13
platforms/opencl/src/OpenCLParallelKernels.h
platforms/opencl/src/OpenCLParallelKernels.h
+2
-2
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+1
-1
platforms/opencl/src/OpenCLSort.h
platforms/opencl/src/OpenCLSort.h
+1
-1
platforms/opencl/src/kernels/angleForce.cl
platforms/opencl/src/kernels/angleForce.cl
+13
-13
platforms/opencl/src/kernels/bondForce.cl
platforms/opencl/src/kernels/bondForce.cl
+4
-4
platforms/opencl/src/kernels/cmapTorsionForce.cl
platforms/opencl/src/kernels/cmapTorsionForce.cl
+40
-40
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+28
-23
platforms/opencl/src/kernels/customCompoundBond.cl
platforms/opencl/src/kernels/customCompoundBond.cl
+11
-11
platforms/opencl/src/kernels/customExternalForce.cl
platforms/opencl/src/kernels/customExternalForce.cl
+1
-1
No files found.
platforms/opencl/src/OpenCLBondedUtilities.cpp
View file @
c8dac206
...
@@ -58,7 +58,7 @@ void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, co
...
@@ -58,7 +58,7 @@ void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, co
std
::
string
OpenCLBondedUtilities
::
addArgument
(
cl
::
Memory
&
data
,
const
string
&
type
)
{
std
::
string
OpenCLBondedUtilities
::
addArgument
(
cl
::
Memory
&
data
,
const
string
&
type
)
{
arguments
.
push_back
(
&
data
);
arguments
.
push_back
(
&
data
);
argTypes
.
push_back
(
type
);
argTypes
.
push_back
(
type
);
return
"customArg"
+
OpenCLExpressionUtilities
::
intToString
(
arguments
.
size
());
return
"customArg"
+
context
.
intToString
(
arguments
.
size
());
}
}
void
OpenCLBondedUtilities
::
addPrefixCode
(
const
string
&
source
)
{
void
OpenCLBondedUtilities
::
addPrefixCode
(
const
string
&
source
)
{
...
@@ -164,17 +164,17 @@ void OpenCLBondedUtilities::initialize(const System& system) {
...
@@ -164,17 +164,17 @@ void OpenCLBondedUtilities::initialize(const System& system) {
stringstream
s
;
stringstream
s
;
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
s
<<
prefixCode
[
i
];
s
<<
prefixCode
[
i
];
s
<<
"__kernel void computeBondedForces(__global
float
4* restrict forceBuffers, __global
float
* restrict energyBuffer, __global const
float
4* restrict posq, int groups"
;
s
<<
"__kernel void computeBondedForces(__global
real
4* restrict forceBuffers, __global
real
* restrict energyBuffer, __global const
real
4* restrict posq, int groups"
;
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
int
force
=
set
[
i
];
int
force
=
set
[
i
];
string
indexType
=
"uint"
+
(
indexWidth
[
force
]
==
1
?
""
:
OpenCLExpressionUtilities
::
intToString
(
indexWidth
[
force
]));
string
indexType
=
"uint"
+
(
indexWidth
[
force
]
==
1
?
""
:
context
.
intToString
(
indexWidth
[
force
]));
s
<<
", __global const "
<<
indexType
<<
"* restrict atomIndices"
<<
i
;
s
<<
", __global const "
<<
indexType
<<
"* restrict atomIndices"
<<
i
;
s
<<
", __global const "
<<
indexType
<<
"* restrict bufferIndices"
<<
i
;
s
<<
", __global const "
<<
indexType
<<
"* restrict bufferIndices"
<<
i
;
}
}
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
s
<<
", __global "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
s
<<
", __global "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
s
<<
") {
\n
"
;
s
<<
") {
\n
"
;
s
<<
"
float
energy = 0.0f;
\n
"
;
s
<<
"
real
energy = 0.0f;
\n
"
;
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
int
force
=
set
[
i
];
int
force
=
set
[
i
];
s
<<
createForceSource
(
i
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
s
<<
createForceSource
(
i
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
...
@@ -182,7 +182,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
...
@@ -182,7 +182,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
s
<<
"energyBuffer[get_global_id(0)] += energy;
\n
"
;
s
<<
"energyBuffer[get_global_id(0)] += energy;
\n
"
;
s
<<
"}
\n
"
;
s
<<
"}
\n
"
;
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"PADDED_NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
cl
::
Program
program
=
context
.
createProgram
(
s
.
str
(),
defines
);
cl
::
Program
program
=
context
.
createProgram
(
s
.
str
(),
defines
);
kernels
.
push_back
(
cl
::
Kernel
(
program
,
"computeBondedForces"
));
kernels
.
push_back
(
cl
::
Kernel
(
program
,
"computeBondedForces"
));
}
}
...
@@ -206,7 +206,7 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
...
@@ -206,7 +206,7 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
suffix
=
suffix4
;
suffix
=
suffix4
;
else
else
suffix
=
suffix16
;
suffix
=
suffix16
;
string
indexType
=
"uint"
+
(
width
==
1
?
""
:
OpenCLExpressionUtilities
::
intToString
(
width
));
string
indexType
=
"uint"
+
(
width
==
1
?
""
:
context
.
intToString
(
width
));
stringstream
s
;
stringstream
s
;
s
<<
"if ((groups&"
<<
(
1
<<
group
)
<<
") != 0)
\n
"
;
s
<<
"if ((groups&"
<<
(
1
<<
group
)
<<
") != 0)
\n
"
;
s
<<
"for (unsigned int index = get_global_id(0); index < "
<<
numBonds
<<
"; index += get_global_size(0)) {
\n
"
;
s
<<
"for (unsigned int index = get_global_id(0); index < "
<<
numBonds
<<
"; index += get_global_size(0)) {
\n
"
;
...
@@ -214,13 +214,13 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
...
@@ -214,13 +214,13 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
s
<<
" "
<<
indexType
<<
" buffers = bufferIndices"
<<
forceIndex
<<
"[index];
\n
"
;
s
<<
" "
<<
indexType
<<
" buffers = bufferIndices"
<<
forceIndex
<<
"[index];
\n
"
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
s
<<
" unsigned int atom"
<<
(
i
+
1
)
<<
" = atoms"
<<
suffix
[
i
]
<<
";
\n
"
;
s
<<
" unsigned int atom"
<<
(
i
+
1
)
<<
" = atoms"
<<
suffix
[
i
]
<<
";
\n
"
;
s
<<
"
float
4 pos"
<<
(
i
+
1
)
<<
" = posq[atom"
<<
(
i
+
1
)
<<
"];
\n
"
;
s
<<
"
real
4 pos"
<<
(
i
+
1
)
<<
" = posq[atom"
<<
(
i
+
1
)
<<
"];
\n
"
;
}
}
s
<<
computeForce
<<
"
\n
"
;
s
<<
computeForce
<<
"
\n
"
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
s
<<
" {
\n
"
;
s
<<
" {
\n
"
;
s
<<
" unsigned int offset = atom"
<<
(
i
+
1
)
<<
"+buffers"
<<
suffix
[
i
]
<<
"*PADDED_NUM_ATOMS;
\n
"
;
s
<<
" unsigned int offset = atom"
<<
(
i
+
1
)
<<
"+buffers"
<<
suffix
[
i
]
<<
"*PADDED_NUM_ATOMS;
\n
"
;
s
<<
"
float
4 force = forceBuffers[offset];
\n
"
;
s
<<
"
real
4 force = forceBuffers[offset];
\n
"
;
s
<<
" force.xyz += force"
<<
(
i
+
1
)
<<
".xyz;
\n
"
;
s
<<
" force.xyz += force"
<<
(
i
+
1
)
<<
".xyz;
\n
"
;
s
<<
" forceBuffers[offset] = force;
\n
"
;
s
<<
" forceBuffers[offset] = force;
\n
"
;
s
<<
" }
\n
"
;
s
<<
" }
\n
"
;
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
c8dac206
...
@@ -68,7 +68,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
...
@@ -68,7 +68,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
)
:
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
atomsWereReordered
(
false
),
posq
(
NULL
),
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
atomsWereReordered
(
false
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
integration
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
integration
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
if
(
precision
==
"single"
)
{
if
(
precision
==
"single"
)
{
useDoublePrecision
=
false
;
useDoublePrecision
=
false
;
useMixedPrecision
=
false
;
useMixedPrecision
=
false
;
...
@@ -145,7 +145,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -145,7 +145,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
this
->
deviceIndex
=
deviceIndex
;
this
->
deviceIndex
=
deviceIndex
;
if
(
device
.
getInfo
<
CL_DEVICE_MAX_WORK_GROUP_SIZE
>
()
<
minThreadBlockSize
)
if
(
device
.
getInfo
<
CL_DEVICE_MAX_WORK_GROUP_SIZE
>
()
<
minThreadBlockSize
)
throw
OpenMMException
(
"The specified OpenCL device is not compatible with OpenMM"
);
throw
OpenMMException
(
"The specified OpenCL device is not compatible with OpenMM"
);
compilationDefines
[
"WORK_GROUP_SIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
ThreadBlockSize
);
compilationDefines
[
"WORK_GROUP_SIZE"
]
=
intToString
(
ThreadBlockSize
);
if
(
platformVendor
.
size
()
>=
5
&&
platformVendor
.
substr
(
0
,
5
)
==
"Intel"
)
if
(
platformVendor
.
size
()
>=
5
&&
platformVendor
.
substr
(
0
,
5
)
==
"Intel"
)
defaultOptimizationOptions
=
""
;
defaultOptimizationOptions
=
""
;
else
else
...
@@ -269,7 +269,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -269,7 +269,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
clearFourBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearFourBuffers"
);
clearFourBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearFourBuffers"
);
clearFiveBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearFiveBuffers"
);
clearFiveBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearFiveBuffers"
);
clearSixBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearSixBuffers"
);
clearSixBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearSixBuffers"
);
reduce
Float
4Kernel
=
cl
::
Kernel
(
utilities
,
"reduce
Float
4Buffer"
);
reduce
Real
4Kernel
=
cl
::
Kernel
(
utilities
,
"reduce
Real
4Buffer"
);
reduceForcesKernel
=
cl
::
Kernel
(
utilities
,
"reduceForces"
);
reduceForcesKernel
=
cl
::
Kernel
(
utilities
,
"reduceForces"
);
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
...
@@ -316,9 +316,10 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -316,9 +316,10 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
thread
=
new
WorkThread
();
thread
=
new
WorkThread
();
// Create
the integration
utilities object.
// Create utilities object
s
.
integration
=
new
OpenCLIntegrationUtilities
(
*
this
,
system
);
integration
=
new
OpenCLIntegrationUtilities
(
*
this
,
system
);
expression
=
new
OpenCLExpressionUtilities
(
*
this
);
}
}
OpenCLContext
::~
OpenCLContext
()
{
OpenCLContext
::~
OpenCLContext
()
{
...
@@ -346,6 +347,8 @@ OpenCLContext::~OpenCLContext() {
...
@@ -346,6 +347,8 @@ OpenCLContext::~OpenCLContext() {
delete
atomIndexDevice
;
delete
atomIndexDevice
;
if
(
integration
!=
NULL
)
if
(
integration
!=
NULL
)
delete
integration
;
delete
integration
;
if
(
expression
!=
NULL
)
delete
expression
;
if
(
bonded
!=
NULL
)
if
(
bonded
!=
NULL
)
delete
bonded
;
delete
bonded
;
if
(
nonbonded
!=
NULL
)
if
(
nonbonded
!=
NULL
)
...
@@ -376,10 +379,10 @@ void OpenCLContext::initialize() {
...
@@ -376,10 +379,10 @@ void OpenCLContext::initialize() {
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forceBuffers
->
getDeviceBuffer
());
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forceBuffers
->
getDeviceBuffer
());
reduceForcesKernel
.
setArg
<
cl_int
>
(
2
,
paddedNumAtoms
);
reduceForcesKernel
.
setArg
<
cl_int
>
(
2
,
paddedNumAtoms
);
reduceForcesKernel
.
setArg
<
cl_int
>
(
3
,
numForceBuffers
);
reduceForcesKernel
.
setArg
<
cl_int
>
(
3
,
numForceBuffers
);
addAutoclearBuffer
(
longForceBuffer
->
getDeviceBuffer
(),
longForceBuffer
->
getSize
()
*
2
);
addAutoclearBuffer
(
*
longForceBuffer
);
}
}
addAutoclearBuffer
(
forceBuffers
->
getDeviceBuffer
(),
forceBuffers
->
getSize
()
*
4
);
addAutoclearBuffer
(
*
forceBuffers
);
addAutoclearBuffer
(
energyBuffer
->
getDeviceBuffer
(),
energyBuffer
->
getSize
()
);
addAutoclearBuffer
(
*
energyBuffer
);
int
bufferBytes
=
max
(
posq
->
getSize
()
*
posq
->
getElementSize
(),
energyBuffer
->
getSize
()
*
energyBuffer
->
getElementSize
());
int
bufferBytes
=
max
(
posq
->
getSize
()
*
posq
->
getElementSize
(),
energyBuffer
->
getSize
()
*
energyBuffer
->
getElementSize
());
pinnedBuffer
=
new
cl
::
Buffer
(
context
,
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedBuffer
=
new
cl
::
Buffer
(
context
,
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedMemory
=
queue
.
enqueueMapBuffer
(
*
pinnedBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
pinnedMemory
=
queue
.
enqueueMapBuffer
(
*
pinnedBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
...
@@ -479,6 +482,21 @@ cl::Program OpenCLContext::createProgram(const string source, const map<string,
...
@@ -479,6 +482,21 @@ cl::Program OpenCLContext::createProgram(const string source, const map<string,
return
program
;
return
program
;
}
}
string
OpenCLContext
::
doubleToString
(
double
value
)
{
stringstream
s
;
s
.
precision
(
useDoublePrecision
?
16
:
8
);
s
<<
scientific
<<
value
;
if
(
!
useDoublePrecision
)
s
<<
"f"
;
return
s
.
str
();
}
string
OpenCLContext
::
intToString
(
int
value
)
{
stringstream
s
;
s
<<
value
;
return
s
.
str
();
}
void
OpenCLContext
::
executeKernel
(
cl
::
Kernel
&
kernel
,
int
workUnits
,
int
blockSize
)
{
void
OpenCLContext
::
executeKernel
(
cl
::
Kernel
&
kernel
,
int
workUnits
,
int
blockSize
)
{
if
(
blockSize
==
-
1
)
if
(
blockSize
==
-
1
)
blockSize
=
ThreadBlockSize
;
blockSize
=
ThreadBlockSize
;
...
@@ -494,18 +512,23 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
...
@@ -494,18 +512,23 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
}
}
void
OpenCLContext
::
clearBuffer
(
OpenCLArray
&
array
)
{
void
OpenCLContext
::
clearBuffer
(
OpenCLArray
&
array
)
{
clearBuffer
(
array
.
getDeviceBuffer
(),
array
.
getSize
()
*
array
.
getElementSize
()
/
sizeof
(
cl_float
)
);
clearBuffer
(
array
.
getDeviceBuffer
(),
array
.
getSize
()
*
array
.
getElementSize
());
}
}
void
OpenCLContext
::
clearBuffer
(
cl
::
Memory
&
memory
,
int
size
)
{
void
OpenCLContext
::
clearBuffer
(
cl
::
Memory
&
memory
,
int
size
)
{
int
words
=
size
/
4
;
clearBufferKernel
.
setArg
<
cl
::
Memory
>
(
0
,
memory
);
clearBufferKernel
.
setArg
<
cl
::
Memory
>
(
0
,
memory
);
clearBufferKernel
.
setArg
<
cl_int
>
(
1
,
size
);
clearBufferKernel
.
setArg
<
cl_int
>
(
1
,
words
);
executeKernel
(
clearBufferKernel
,
size
,
128
);
executeKernel
(
clearBufferKernel
,
words
,
128
);
}
void
OpenCLContext
::
addAutoclearBuffer
(
OpenCLArray
&
array
)
{
addAutoclearBuffer
(
array
.
getDeviceBuffer
(),
array
.
getSize
()
*
array
.
getElementSize
());
}
}
void
OpenCLContext
::
addAutoclearBuffer
(
cl
::
Memory
&
memory
,
int
size
)
{
void
OpenCLContext
::
addAutoclearBuffer
(
cl
::
Memory
&
memory
,
int
size
)
{
autoclearBuffers
.
push_back
(
&
memory
);
autoclearBuffers
.
push_back
(
&
memory
);
autoclearBufferSizes
.
push_back
(
size
);
autoclearBufferSizes
.
push_back
(
size
/
4
);
}
}
void
OpenCLContext
::
clearAutoclearBuffers
()
{
void
OpenCLContext
::
clearAutoclearBuffers
()
{
...
@@ -581,10 +604,10 @@ void OpenCLContext::reduceForces() {
...
@@ -581,10 +604,10 @@ void OpenCLContext::reduceForces() {
void
OpenCLContext
::
reduceBuffer
(
OpenCLArray
&
array
,
int
numBuffers
)
{
void
OpenCLContext
::
reduceBuffer
(
OpenCLArray
&
array
,
int
numBuffers
)
{
int
bufferSize
=
array
.
getSize
()
/
numBuffers
;
int
bufferSize
=
array
.
getSize
()
/
numBuffers
;
reduce
Float
4Kernel
.
setArg
<
cl
::
Buffer
>
(
0
,
array
.
getDeviceBuffer
());
reduce
Real
4Kernel
.
setArg
<
cl
::
Buffer
>
(
0
,
array
.
getDeviceBuffer
());
reduce
Float
4Kernel
.
setArg
<
cl_int
>
(
1
,
bufferSize
);
reduce
Real
4Kernel
.
setArg
<
cl_int
>
(
1
,
bufferSize
);
reduce
Float
4Kernel
.
setArg
<
cl_int
>
(
2
,
numBuffers
);
reduce
Real
4Kernel
.
setArg
<
cl_int
>
(
2
,
numBuffers
);
executeKernel
(
reduce
Float
4Kernel
,
bufferSize
,
128
);
executeKernel
(
reduce
Real
4Kernel
,
bufferSize
,
128
);
}
}
void
OpenCLContext
::
tagAtomsInMolecule
(
int
atom
,
int
molecule
,
vector
<
int
>&
atomMolecule
,
vector
<
vector
<
int
>
>&
atomBonds
)
{
void
OpenCLContext
::
tagAtomsInMolecule
(
int
atom
,
int
molecule
,
vector
<
int
>&
atomMolecule
,
vector
<
vector
<
int
>
>&
atomBonds
)
{
...
...
platforms/opencl/src/OpenCLContext.h
View file @
c8dac206
...
@@ -45,6 +45,7 @@ namespace OpenMM {
...
@@ -45,6 +45,7 @@ namespace OpenMM {
class
OpenCLArray
;
class
OpenCLArray
;
class
OpenCLForceInfo
;
class
OpenCLForceInfo
;
class
OpenCLIntegrationUtilities
;
class
OpenCLIntegrationUtilities
;
class
OpenCLExpressionUtilities
;
class
OpenCLBondedUtilities
;
class
OpenCLBondedUtilities
;
class
OpenCLNonbondedUtilities
;
class
OpenCLNonbondedUtilities
;
class
System
;
class
System
;
...
@@ -314,14 +315,18 @@ public:
...
@@ -314,14 +315,18 @@ public:
* Set all elements of an array to 0.
* Set all elements of an array to 0.
*
*
* @param memory the Memory to clear
* @param memory the Memory to clear
* @param size the
number of float elements in
the buffer
* @param size the
size of
the buffer
in bytes
*/
*/
void
clearBuffer
(
cl
::
Memory
&
memory
,
int
size
);
void
clearBuffer
(
cl
::
Memory
&
memory
,
int
size
);
/**
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
*/
void
addAutoclearBuffer
(
OpenCLArray
&
array
);
/**
/**
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
*
*
* @param memory the Memory to clear
* @param memory the Memory to clear
* @param size the
number of float elements in
the buffer
* @param size the
size of
the buffer
in bytes
*/
*/
void
addAutoclearBuffer
(
cl
::
Memory
&
memory
,
int
size
);
void
addAutoclearBuffer
(
cl
::
Memory
&
memory
,
int
size
);
/**
/**
...
@@ -329,7 +334,7 @@ public:
...
@@ -329,7 +334,7 @@ public:
*/
*/
void
clearAutoclearBuffers
();
void
clearAutoclearBuffers
();
/**
/**
* Given a collection of buffers packed into an array, sum them and store
* Given a collection of
floating point
buffers packed into an array, sum them and store
* the sum in the first buffer.
* the sum in the first buffer.
*
*
* @param array the array containing the buffers to reduce
* @param array the array containing the buffers to reduce
...
@@ -437,6 +442,15 @@ public:
...
@@ -437,6 +442,15 @@ public:
bool
getUseMixedPrecision
()
{
bool
getUseMixedPrecision
()
{
return
useMixedPrecision
;
return
useMixedPrecision
;
}
}
/**
* Convert a number to a string in a format suitable for including in a kernel.
* This takes into account whether the context uses single or double precision.
*/
std
::
string
doubleToString
(
double
value
);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
std
::
string
intToString
(
int
value
);
/**
/**
* Get the size of the periodic box.
* Get the size of the periodic box.
*/
*/
...
@@ -476,6 +490,12 @@ public:
...
@@ -476,6 +490,12 @@ public:
OpenCLIntegrationUtilities
&
getIntegrationUtilities
()
{
OpenCLIntegrationUtilities
&
getIntegrationUtilities
()
{
return
*
integration
;
return
*
integration
;
}
}
/**
* Get the OpenCLExpressionUtilities for this context.
*/
OpenCLExpressionUtilities
&
getExpressionUtilities
()
{
return
*
expression
;
}
/**
/**
* Get the OpenCLBondedUtilities for this context.
* Get the OpenCLBondedUtilities for this context.
*/
*/
...
@@ -580,7 +600,7 @@ private:
...
@@ -580,7 +600,7 @@ private:
cl
::
Kernel
clearFourBuffersKernel
;
cl
::
Kernel
clearFourBuffersKernel
;
cl
::
Kernel
clearFiveBuffersKernel
;
cl
::
Kernel
clearFiveBuffersKernel
;
cl
::
Kernel
clearSixBuffersKernel
;
cl
::
Kernel
clearSixBuffersKernel
;
cl
::
Kernel
reduce
Float
4Kernel
;
cl
::
Kernel
reduce
Real
4Kernel
;
cl
::
Kernel
reduceForcesKernel
;
cl
::
Kernel
reduceForcesKernel
;
std
::
vector
<
OpenCLForceInfo
*>
forces
;
std
::
vector
<
OpenCLForceInfo
*>
forces
;
std
::
vector
<
Molecule
>
molecules
;
std
::
vector
<
Molecule
>
molecules
;
...
@@ -601,6 +621,7 @@ private:
...
@@ -601,6 +621,7 @@ private:
std
::
vector
<
int
>
autoclearBufferSizes
;
std
::
vector
<
int
>
autoclearBufferSizes
;
std
::
vector
<
ReorderListener
*>
reorderListeners
;
std
::
vector
<
ReorderListener
*>
reorderListeners
;
OpenCLIntegrationUtilities
*
integration
;
OpenCLIntegrationUtilities
*
integration
;
OpenCLExpressionUtilities
*
expression
;
OpenCLBondedUtilities
*
bonded
;
OpenCLBondedUtilities
*
bonded
;
OpenCLNonbondedUtilities
*
nonbonded
;
OpenCLNonbondedUtilities
*
nonbonded
;
WorkThread
*
thread
;
WorkThread
*
thread
;
...
...
platforms/opencl/src/OpenCLExpressionUtilities.cpp
View file @
c8dac206
...
@@ -33,19 +33,6 @@ using namespace OpenMM;
...
@@ -33,19 +33,6 @@ using namespace OpenMM;
using
namespace
Lepton
;
using
namespace
Lepton
;
using
namespace
std
;
using
namespace
std
;
string
OpenCLExpressionUtilities
::
doubleToString
(
double
value
)
{
stringstream
s
;
s
.
precision
(
8
);
s
<<
scientific
<<
value
<<
"f"
;
return
s
.
str
();
}
string
OpenCLExpressionUtilities
::
intToString
(
int
value
)
{
stringstream
s
;
s
<<
value
;
return
s
.
str
();
}
string
OpenCLExpressionUtilities
::
createExpressions
(
const
map
<
string
,
ParsedExpression
>&
expressions
,
const
map
<
string
,
string
>&
variables
,
string
OpenCLExpressionUtilities
::
createExpressions
(
const
map
<
string
,
ParsedExpression
>&
expressions
,
const
map
<
string
,
string
>&
variables
,
const
vector
<
pair
<
string
,
string
>
>&
functions
,
const
string
&
prefix
,
const
string
&
functionParams
,
const
string
&
tempType
)
{
const
vector
<
pair
<
string
,
string
>
>&
functions
,
const
string
&
prefix
,
const
string
&
functionParams
,
const
string
&
tempType
)
{
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
variableNodes
;
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
variableNodes
;
...
@@ -75,13 +62,13 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
...
@@ -75,13 +62,13 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
return
;
return
;
for
(
int
i
=
0
;
i
<
(
int
)
node
.
getChildren
().
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
node
.
getChildren
().
size
();
i
++
)
processExpression
(
out
,
node
.
getChildren
()[
i
],
temps
,
functions
,
prefix
,
functionParams
,
allExpressions
,
tempType
);
processExpression
(
out
,
node
.
getChildren
()[
i
],
temps
,
functions
,
prefix
,
functionParams
,
allExpressions
,
tempType
);
string
name
=
prefix
+
intToString
(
temps
.
size
());
string
name
=
prefix
+
context
.
intToString
(
temps
.
size
());
bool
hasRecordedNode
=
false
;
bool
hasRecordedNode
=
false
;
out
<<
tempType
<<
" "
<<
name
<<
" = "
;
out
<<
tempType
<<
" "
<<
name
<<
" = "
;
switch
(
node
.
getOperation
().
getId
())
{
switch
(
node
.
getOperation
().
getId
())
{
case
Operation
::
CONSTANT
:
case
Operation
::
CONSTANT
:
out
<<
doubleToString
(
dynamic_cast
<
const
Operation
::
Constant
*>
(
&
node
.
getOperation
())
->
getValue
());
out
<<
context
.
doubleToString
(
dynamic_cast
<
const
Operation
::
Constant
*>
(
&
node
.
getOperation
())
->
getValue
());
break
;
break
;
case
Operation
::
VARIABLE
:
case
Operation
::
VARIABLE
:
throw
OpenMMException
(
"Unknown variable in expression: "
+
node
.
getOperation
().
getName
());
throw
OpenMMException
(
"Unknown variable in expression: "
+
node
.
getOperation
().
getName
());
...
@@ -107,7 +94,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
...
@@ -107,7 +94,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
string
valueName
=
name
;
string
valueName
=
name
;
string
derivName
=
name
;
string
derivName
=
name
;
if
(
valueNode
!=
NULL
&&
derivNode
!=
NULL
)
{
if
(
valueNode
!=
NULL
&&
derivNode
!=
NULL
)
{
string
name2
=
prefix
+
intToString
(
temps
.
size
());
string
name2
=
prefix
+
context
.
intToString
(
temps
.
size
());
out
<<
tempType
<<
" "
<<
name2
<<
" = 0.0f;
\n
"
;
out
<<
tempType
<<
" "
<<
name2
<<
" = 0.0f;
\n
"
;
if
(
isDeriv
)
{
if
(
isDeriv
)
{
valueName
=
name2
;
valueName
=
name2
;
...
@@ -236,10 +223,10 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
...
@@ -236,10 +223,10 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out
<<
"RECIP("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"RECIP("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
break
;
case
Operation
::
ADD_CONSTANT
:
case
Operation
::
ADD_CONSTANT
:
out
<<
doubleToString
(
dynamic_cast
<
const
Operation
::
AddConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"+"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
out
<<
context
.
doubleToString
(
dynamic_cast
<
const
Operation
::
AddConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"+"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
break
;
break
;
case
Operation
::
MULTIPLY_CONSTANT
:
case
Operation
::
MULTIPLY_CONSTANT
:
out
<<
doubleToString
(
dynamic_cast
<
const
Operation
::
MultiplyConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"*"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
out
<<
context
.
doubleToString
(
dynamic_cast
<
const
Operation
::
MultiplyConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"*"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
break
;
break
;
case
Operation
::
POWER_CONSTANT
:
case
Operation
::
POWER_CONSTANT
:
{
{
...
@@ -266,7 +253,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
...
@@ -266,7 +253,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
for
(
map
<
int
,
const
ExpressionTreeNode
*>::
const_iterator
iter
=
powers
.
begin
();
iter
!=
powers
.
end
();
++
iter
)
{
for
(
map
<
int
,
const
ExpressionTreeNode
*>::
const_iterator
iter
=
powers
.
begin
();
iter
!=
powers
.
end
();
++
iter
)
{
if
(
iter
->
first
!=
exponent
)
{
if
(
iter
->
first
!=
exponent
)
{
exponents
.
push_back
(
iter
->
first
>=
0
?
iter
->
first
:
-
iter
->
first
);
exponents
.
push_back
(
iter
->
first
>=
0
?
iter
->
first
:
-
iter
->
first
);
string
name2
=
prefix
+
intToString
(
temps
.
size
());
string
name2
=
prefix
+
context
.
intToString
(
temps
.
size
());
names
.
push_back
(
name2
);
names
.
push_back
(
name2
);
temps
.
push_back
(
make_pair
(
*
iter
->
second
,
name2
));
temps
.
push_back
(
make_pair
(
*
iter
->
second
,
name2
));
out
<<
tempType
<<
" "
<<
name2
<<
" = 0.0f;
\n
"
;
out
<<
tempType
<<
" "
<<
name2
<<
" = 0.0f;
\n
"
;
...
@@ -295,7 +282,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
...
@@ -295,7 +282,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out
<<
"}"
;
out
<<
"}"
;
}
}
else
else
out
<<
"pow("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
", "
<<
doubleToString
(
exponent
)
<<
")"
;
out
<<
"pow("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
", "
<<
context
.
doubleToString
(
exponent
)
<<
")"
;
break
;
break
;
}
}
case
Operation
::
MIN
:
case
Operation
::
MIN
:
...
...
platforms/opencl/src/OpenCLExpressionUtilities.h
View file @
c8dac206
...
@@ -45,6 +45,8 @@ namespace OpenMM {
...
@@ -45,6 +45,8 @@ namespace OpenMM {
class
OPENMM_EXPORT
OpenCLExpressionUtilities
{
class
OPENMM_EXPORT
OpenCLExpressionUtilities
{
public:
public:
OpenCLExpressionUtilities
(
OpenCLContext
&
context
)
:
context
(
context
)
{
}
/**
/**
* Generate the source code for calculating a set of expressions.
* Generate the source code for calculating a set of expressions.
*
*
...
@@ -54,10 +56,10 @@ public:
...
@@ -54,10 +56,10 @@ public:
* @param functions defines the variable name for each tabulated function that may appear in the expressions
* @param functions defines the variable name for each tabulated function that may appear in the expressions
* @param prefix a prefix to put in front of temporary variables
* @param prefix a prefix to put in front of temporary variables
* @param functionParams the variable name containing the parameters for each tabulated function
* @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "
float
")
* @param tempType the type of value to use for temporary variables (defaults to "
real
")
*/
*/
static
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
variables
,
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
variables
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"
float
"
);
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"
real
"
);
/**
/**
* Generate the source code for calculating a set of expressions.
* Generate the source code for calculating a set of expressions.
*
*
...
@@ -69,7 +71,7 @@ public:
...
@@ -69,7 +71,7 @@ public:
* @param functionParams the variable name containing the parameters for each tabulated function
* @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "float")
* @param tempType the type of value to use for temporary variables (defaults to "float")
*/
*/
static
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variables
,
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variables
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"float"
);
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"float"
);
/**
/**
* Calculate the spline coefficients for a tabulated function that appears in expressions.
* Calculate the spline coefficients for a tabulated function that appears in expressions.
...
@@ -79,26 +81,19 @@ public:
...
@@ -79,26 +81,19 @@ public:
* @param max the value of the independent variable corresponding to the last element of values
* @param max the value of the independent variable corresponding to the last element of values
* @return the spline coefficients
* @return the spline coefficients
*/
*/
static
std
::
vector
<
mm_float4
>
computeFunctionCoefficients
(
const
std
::
vector
<
double
>&
values
,
double
min
,
double
max
);
std
::
vector
<
mm_float4
>
computeFunctionCoefficients
(
const
std
::
vector
<
double
>&
values
,
double
min
,
double
max
);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static
std
::
string
doubleToString
(
double
value
);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static
std
::
string
intToString
(
int
value
);
class
FunctionPlaceholder
;
class
FunctionPlaceholder
;
private:
private:
static
void
processExpression
(
std
::
stringstream
&
out
,
const
Lepton
::
ExpressionTreeNode
&
node
,
void
processExpression
(
std
::
stringstream
&
out
,
const
Lepton
::
ExpressionTreeNode
&
node
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
vector
<
Lepton
::
ParsedExpression
>&
allExpressions
,
const
std
::
string
&
tempType
);
const
std
::
vector
<
Lepton
::
ParsedExpression
>&
allExpressions
,
const
std
::
string
&
tempType
);
static
std
::
string
getTempName
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
);
std
::
string
getTempName
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
);
static
void
findRelatedTabulatedFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
void
findRelatedTabulatedFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
const
Lepton
::
ExpressionTreeNode
*&
valueNode
,
const
Lepton
::
ExpressionTreeNode
*&
derivNode
);
const
Lepton
::
ExpressionTreeNode
*&
valueNode
,
const
Lepton
::
ExpressionTreeNode
*&
derivNode
);
static
void
findRelatedPowers
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
void
findRelatedPowers
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
std
::
map
<
int
,
const
Lepton
::
ExpressionTreeNode
*>&
powers
);
std
::
map
<
int
,
const
Lepton
::
ExpressionTreeNode
*>&
powers
);
OpenCLContext
&
context
;
};
};
/**
/**
...
...
platforms/opencl/src/OpenCLFFT3D.cpp
View file @
c8dac206
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
1
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
2
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -47,15 +47,15 @@ void OpenCLFFT3D::execFFT(OpenCLArray& in, OpenCLArray& out, bool forward) {
...
@@ -47,15 +47,15 @@ void OpenCLFFT3D::execFFT(OpenCLArray& in, OpenCLArray& out, bool forward) {
maxSize
=
1
;
maxSize
=
1
;
zkernel
.
setArg
<
cl
::
Buffer
>
(
0
,
in
.
getDeviceBuffer
());
zkernel
.
setArg
<
cl
::
Buffer
>
(
0
,
in
.
getDeviceBuffer
());
zkernel
.
setArg
<
cl
::
Buffer
>
(
1
,
out
.
getDeviceBuffer
());
zkernel
.
setArg
<
cl
::
Buffer
>
(
1
,
out
.
getDeviceBuffer
());
zkernel
.
setArg
<
cl_
floa
t
>
(
2
,
forward
?
1
.0
f
:
-
1
.0
f
);
zkernel
.
setArg
<
cl_
in
t
>
(
2
,
forward
?
1
:
-
1
);
context
.
executeKernel
(
zkernel
,
xsize
*
ysize
*
zsize
,
min
(
zsize
,
(
int
)
maxSize
));
context
.
executeKernel
(
zkernel
,
xsize
*
ysize
*
zsize
,
min
(
zsize
,
(
int
)
maxSize
));
xkernel
.
setArg
<
cl
::
Buffer
>
(
0
,
out
.
getDeviceBuffer
());
xkernel
.
setArg
<
cl
::
Buffer
>
(
0
,
out
.
getDeviceBuffer
());
xkernel
.
setArg
<
cl
::
Buffer
>
(
1
,
in
.
getDeviceBuffer
());
xkernel
.
setArg
<
cl
::
Buffer
>
(
1
,
in
.
getDeviceBuffer
());
xkernel
.
setArg
<
cl_
floa
t
>
(
2
,
forward
?
1
.0
f
:
-
1
.0
f
);
xkernel
.
setArg
<
cl_
in
t
>
(
2
,
forward
?
1
:
-
1
);
context
.
executeKernel
(
xkernel
,
xsize
*
ysize
*
zsize
,
min
(
xsize
,
(
int
)
maxSize
));
context
.
executeKernel
(
xkernel
,
xsize
*
ysize
*
zsize
,
min
(
xsize
,
(
int
)
maxSize
));
ykernel
.
setArg
<
cl
::
Buffer
>
(
0
,
in
.
getDeviceBuffer
());
ykernel
.
setArg
<
cl
::
Buffer
>
(
0
,
in
.
getDeviceBuffer
());
ykernel
.
setArg
<
cl
::
Buffer
>
(
1
,
out
.
getDeviceBuffer
());
ykernel
.
setArg
<
cl
::
Buffer
>
(
1
,
out
.
getDeviceBuffer
());
ykernel
.
setArg
<
cl_
floa
t
>
(
2
,
forward
?
1
.0
f
:
-
1
.0
f
);
ykernel
.
setArg
<
cl_
in
t
>
(
2
,
forward
?
1
:
-
1
);
context
.
executeKernel
(
ykernel
,
xsize
*
ysize
*
zsize
,
min
(
ysize
,
(
int
)
maxSize
));
context
.
executeKernel
(
ykernel
,
xsize
*
ysize
*
zsize
,
min
(
ysize
,
(
int
)
maxSize
));
}
}
...
@@ -99,23 +99,23 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
...
@@ -99,23 +99,23 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source
<<
"int i = get_local_id(0);
\n
"
;
source
<<
"int i = get_local_id(0);
\n
"
;
}
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"
float
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
real
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
float
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c3 = data"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c3 = data"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c4 = data"
<<
input
<<
"[i+"
<<
(
4
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c4 = data"
<<
input
<<
"[i+"
<<
(
4
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 d0 = c1+c4;
\n
"
;
source
<<
"
real
2 d0 = c1+c4;
\n
"
;
source
<<
"
float
2 d1 = c2+c3;
\n
"
;
source
<<
"
real
2 d1 = c2+c3;
\n
"
;
source
<<
"
float
2 d2 = "
<<
OpenCLExpressionUtilities
::
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c1-c4);
\n
"
;
source
<<
"
real
2 d2 = "
<<
context
.
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c1-c4);
\n
"
;
source
<<
"
float
2 d3 = "
<<
OpenCLExpressionUtilities
::
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c2-c3);
\n
"
;
source
<<
"
real
2 d3 = "
<<
context
.
doubleToString
(
sin
(
0.4
*
M_PI
))
<<
"*(c2-c3);
\n
"
;
source
<<
"
float
2 d4 = d0+d1;
\n
"
;
source
<<
"
real
2 d4 = d0+d1;
\n
"
;
source
<<
"
float
2 d5 = "
<<
OpenCLExpressionUtilities
::
doubleToString
(
0.25
*
sqrt
(
5.0
))
<<
"*(d0-d1);
\n
"
;
source
<<
"
real
2 d5 = "
<<
context
.
doubleToString
(
0.25
*
sqrt
(
5.0
))
<<
"*(d0-d1);
\n
"
;
source
<<
"
float
2 d6 = c0-0.25f*d4;
\n
"
;
source
<<
"
real
2 d6 = c0-0.25f*d4;
\n
"
;
source
<<
"
float
2 d7 = d6+d5;
\n
"
;
source
<<
"
real
2 d7 = d6+d5;
\n
"
;
source
<<
"
float
2 d8 = d6-d5;
\n
"
;
source
<<
"
real
2 d8 = d6-d5;
\n
"
;
string
coeff
=
OpenCLExpressionUtilities
::
doubleToString
(
sin
(
0.2
*
M_PI
)
/
sin
(
0.4
*
M_PI
));
string
coeff
=
context
.
doubleToString
(
sin
(
0.2
*
M_PI
)
/
sin
(
0.4
*
M_PI
));
source
<<
"
float
2 d9 = sign*(
float
2) (d2.y+"
<<
coeff
<<
"*d3.y, -d2.x-"
<<
coeff
<<
"*d3.x);
\n
"
;
source
<<
"
real
2 d9 = sign*(
real
2) (d2.y+"
<<
coeff
<<
"*d3.y, -d2.x-"
<<
coeff
<<
"*d3.x);
\n
"
;
source
<<
"
float
2 d10 = sign*(
float
2) ("
<<
coeff
<<
"*d2.y-d3.y, d3.x-"
<<
coeff
<<
"*d2.x);
\n
"
;
source
<<
"
real
2 d10 = sign*(
real
2) ("
<<
coeff
<<
"*d2.y-d3.y, d3.x-"
<<
coeff
<<
"*d2.x);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+4*j*"
<<
m
<<
"] = c0+d4;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+4*j*"
<<
m
<<
"] = c0+d4;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
5
*
L
)
<<
"], d7+d9);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
5
*
L
)
<<
"], d7+d9);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8+d10);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8+d10);
\n
"
;
...
@@ -134,14 +134,14 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
...
@@ -134,14 +134,14 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source
<<
"int i = get_local_id(0);
\n
"
;
source
<<
"int i = get_local_id(0);
\n
"
;
}
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"
float
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
real
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
float
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c3 = data"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c3 = data"
<<
input
<<
"[i+"
<<
(
3
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 d0 = c0+c2;
\n
"
;
source
<<
"
real
2 d0 = c0+c2;
\n
"
;
source
<<
"
float
2 d1 = c0-c2;
\n
"
;
source
<<
"
real
2 d1 = c0-c2;
\n
"
;
source
<<
"
float
2 d2 = c1+c3;
\n
"
;
source
<<
"
real
2 d2 = c1+c3;
\n
"
;
source
<<
"
float
2 d3 = sign*(
float
2) (c1.y-c3.y, c3.x-c1.x);
\n
"
;
source
<<
"
real
2 d3 = sign*(
real
2) (c1.y-c3.y, c3.x-c1.x);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+3*j*"
<<
m
<<
"] = d0+d2;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+3*j*"
<<
m
<<
"] = d0+d2;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
4
*
L
)
<<
"], d1+d3);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
4
*
L
)
<<
"], d1+d3);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
4
*
L
)
<<
"], d0-d2);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
4
*
L
)
<<
"], d0-d2);
\n
"
;
...
@@ -159,12 +159,12 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
...
@@ -159,12 +159,12 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source
<<
"int i = get_local_id(0);
\n
"
;
source
<<
"int i = get_local_id(0);
\n
"
;
}
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"
float
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
real
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
float
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c2 = data"
<<
input
<<
"[i+"
<<
(
2
*
L
*
m
)
<<
"];
\n
"
;
source
<<
"
float
2 d0 = c1+c2;
\n
"
;
source
<<
"
real
2 d0 = c1+c2;
\n
"
;
source
<<
"
float
2 d1 = c0-0.5f*d0;
\n
"
;
source
<<
"
real
2 d1 = c0-0.5f*d0;
\n
"
;
source
<<
"
float
2 d2 = sign*"
<<
OpenCLExpressionUtilities
::
doubleToString
(
sin
(
M_PI
/
3.0
))
<<
"*(
float
2) (c1.y-c2.y, c2.x-c1.x);
\n
"
;
source
<<
"
real
2 d2 = sign*"
<<
context
.
doubleToString
(
sin
(
M_PI
/
3.0
))
<<
"*(
real
2) (c1.y-c2.y, c2.x-c1.x);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+2*j*"
<<
m
<<
"] = c0+d0;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+2*j*"
<<
m
<<
"] = c0+d0;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(2*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
3
*
L
)
<<
"], d1+d2);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(2*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
3
*
L
)
<<
"], d1+d2);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1-d2);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
zsize
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1-d2);
\n
"
;
...
@@ -181,15 +181,15 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
...
@@ -181,15 +181,15 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source
<<
"int i = get_local_id(0);
\n
"
;
source
<<
"int i = get_local_id(0);
\n
"
;
}
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"
float
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
real
2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"
float
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"
real
2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"data"
<<
output
<<
"[i+j*"
<<
m
<<
"] = c0+c1;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+j*"
<<
m
<<
"] = c0+c1;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
2
*
L
)
<<
"], c0-c1);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
zsize
<<
"/"
<<
(
2
*
L
)
<<
"], c0-c1);
\n
"
;
source
<<
"}
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
2
;
m
=
m
*
2
;
}
}
else
else
throw
OpenMMException
(
"Illegal size for FFT: "
+
OpenCLExpressionUtilities
::
intToString
(
zsize
));
throw
OpenMMException
(
"Illegal size for FFT: "
+
context
.
intToString
(
zsize
));
source
<<
"barrier(CLK_LOCAL_MEM_FENCE);
\n
"
;
source
<<
"barrier(CLK_LOCAL_MEM_FENCE);
\n
"
;
source
<<
"}
\n
"
;
source
<<
"}
\n
"
;
++
stage
;
++
stage
;
...
@@ -205,16 +205,17 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
...
@@ -205,16 +205,17 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source
<<
"out[y*(ZSIZE*XSIZE)+get_local_id(0)*XSIZE+x] = data"
<<
(
stage
%
2
)
<<
"[get_local_id(0)];
\n
"
;
source
<<
"out[y*(ZSIZE*XSIZE)+get_local_id(0)*XSIZE+x] = data"
<<
(
stage
%
2
)
<<
"[get_local_id(0)];
\n
"
;
source
<<
"barrier(CLK_GLOBAL_MEM_FENCE);"
;
source
<<
"barrier(CLK_GLOBAL_MEM_FENCE);"
;
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"XSIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
xsize
);
replacements
[
"XSIZE"
]
=
context
.
intToString
(
xsize
);
replacements
[
"YSIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
ysize
);
replacements
[
"YSIZE"
]
=
context
.
intToString
(
ysize
);
replacements
[
"ZSIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
zsize
);
replacements
[
"ZSIZE"
]
=
context
.
intToString
(
zsize
);
replacements
[
"M_PI"
]
=
OpenCLExpressionUtilities
::
doubleToString
(
M_PI
);
replacements
[
"M_PI"
]
=
context
.
doubleToString
(
M_PI
);
replacements
[
"COMPUTE_FFT"
]
=
source
.
str
();
replacements
[
"COMPUTE_FFT"
]
=
source
.
str
();
replacements
[
"LOOP_REQUIRED"
]
=
(
loopRequired
?
"1"
:
"0"
);
replacements
[
"LOOP_REQUIRED"
]
=
(
loopRequired
?
"1"
:
"0"
);
cl
::
Program
program
=
context
.
createProgram
(
context
.
replaceStrings
(
OpenCLKernelSources
::
fft
,
replacements
));
cl
::
Program
program
=
context
.
createProgram
(
context
.
replaceStrings
(
OpenCLKernelSources
::
fft
,
replacements
));
cl
::
Kernel
kernel
(
program
,
"execFFT"
);
cl
::
Kernel
kernel
(
program
,
"execFFT"
);
kernel
.
setArg
(
3
,
zsize
*
sizeof
(
mm_float2
),
NULL
);
int
bufferSize
=
zsize
*
(
context
.
getUseDoublePrecision
()
?
sizeof
(
mm_double2
)
:
sizeof
(
mm_float2
));
kernel
.
setArg
(
4
,
zsize
*
sizeof
(
mm_float2
),
NULL
);
kernel
.
setArg
(
3
,
bufferSize
,
NULL
);
kernel
.
setArg
(
5
,
zsize
*
sizeof
(
mm_float2
),
NULL
);
kernel
.
setArg
(
4
,
bufferSize
,
NULL
);
kernel
.
setArg
(
5
,
bufferSize
,
NULL
);
return
kernel
;
return
kernel
;
}
}
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
View file @
c8dac206
...
@@ -559,8 +559,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
...
@@ -559,8 +559,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// Create the CCMA kernels.
// Create the CCMA kernels.
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_CONSTRAINTS"
]
=
OpenCLExpressionUtilities
::
intToString
(
numCCMA
);
defines
[
"NUM_CONSTRAINTS"
]
=
context
.
intToString
(
numCCMA
);
defines
[
"NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
numAtoms
);
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
numAtoms
);
cl
::
Program
ccmaProgram
=
context
.
createProgram
(
OpenCLKernelSources
::
ccma
,
defines
);
cl
::
Program
ccmaProgram
=
context
.
createProgram
(
OpenCLKernelSources
::
ccma
,
defines
);
ccmaDirectionsKernel
=
cl
::
Kernel
(
ccmaProgram
,
"computeConstraintDirections"
);
ccmaDirectionsKernel
=
cl
::
Kernel
(
ccmaProgram
,
"computeConstraintDirections"
);
ccmaPosForceKernel
=
cl
::
Kernel
(
ccmaProgram
,
"computeConstraintForce"
);
ccmaPosForceKernel
=
cl
::
Kernel
(
ccmaProgram
,
"computeConstraintForce"
);
...
@@ -630,9 +630,9 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
...
@@ -630,9 +630,9 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// Create the kernels for virtual sites.
// Create the kernels for virtual sites.
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_2_AVERAGE"
]
=
OpenCLExpressionUtilities
::
intToString
(
num2Avg
);
defines
[
"NUM_2_AVERAGE"
]
=
context
.
intToString
(
num2Avg
);
defines
[
"NUM_3_AVERAGE"
]
=
OpenCLExpressionUtilities
::
intToString
(
num3Avg
);
defines
[
"NUM_3_AVERAGE"
]
=
context
.
intToString
(
num3Avg
);
defines
[
"NUM_OUT_OF_PLANE"
]
=
OpenCLExpressionUtilities
::
intToString
(
numOutOfPlane
);
defines
[
"NUM_OUT_OF_PLANE"
]
=
context
.
intToString
(
numOutOfPlane
);
cl
::
Program
vsiteProgram
=
context
.
createProgram
(
OpenCLKernelSources
::
virtualSites
,
defines
);
cl
::
Program
vsiteProgram
=
context
.
createProgram
(
OpenCLKernelSources
::
virtualSites
,
defines
);
vsitePositionKernel
=
cl
::
Kernel
(
vsiteProgram
,
"computeVirtualSites"
);
vsitePositionKernel
=
cl
::
Kernel
(
vsiteProgram
,
"computeVirtualSites"
);
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
context
.
getPosq
().
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
context
.
getPosq
().
getDeviceBuffer
());
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
c8dac206
...
@@ -53,19 +53,6 @@ using namespace std;
...
@@ -53,19 +53,6 @@ using namespace std;
using
Lepton
::
ExpressionTreeNode
;
using
Lepton
::
ExpressionTreeNode
;
using
Lepton
::
Operation
;
using
Lepton
::
Operation
;
static
string
doubleToString
(
double
value
)
{
stringstream
s
;
s
.
precision
(
8
);
s
<<
scientific
<<
value
<<
"f"
;
return
s
.
str
();
}
static
string
intToString
(
int
value
)
{
stringstream
s
;
s
<<
value
;
return
s
.
str
();
}
static
void
setPosqCorrectionArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
static
void
setPosqCorrectionArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseMixedPrecision
())
if
(
cl
.
getUseMixedPrecision
())
kernel
.
setArg
<
cl
::
Buffer
>
(
index
,
cl
.
getPosqCorrection
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
,
cl
.
getPosqCorrection
().
getDeviceBuffer
());
...
@@ -73,6 +60,20 @@ static void setPosqCorrectionArg(OpenCLContext& cl, cl::Kernel& kernel, int inde
...
@@ -73,6 +60,20 @@ static void setPosqCorrectionArg(OpenCLContext& cl, cl::Kernel& kernel, int inde
kernel
.
setArg
<
void
*>
(
index
,
NULL
);
kernel
.
setArg
<
void
*>
(
index
,
NULL
);
}
}
static
void
setPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getPeriodicBoxSizeDouble
());
else
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getPeriodicBoxSize
());
}
static
void
setInvPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getInvPeriodicBoxSizeDouble
());
else
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getInvPeriodicBoxSize
());
}
static
bool
isZeroExpression
(
const
Lepton
::
ParsedExpression
&
expression
)
{
static
bool
isZeroExpression
(
const
Lepton
::
ParsedExpression
&
expression
)
{
const
Lepton
::
Operation
&
op
=
expression
.
getRootNode
().
getOperation
();
const
Lepton
::
Operation
&
op
=
expression
.
getRootNode
().
getOperation
();
if
(
op
.
getId
()
!=
Lepton
::
Operation
::
CONSTANT
)
if
(
op
.
getId
()
!=
Lepton
::
Operation
::
CONSTANT
)
...
@@ -124,11 +125,19 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
...
@@ -124,11 +125,19 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
double
sum
=
0.0
f
;
double
sum
=
0.0
f
;
if
(
includeEnergy
)
{
if
(
includeEnergy
)
{
OpenCLArray
&
energyArray
=
cl
.
getEnergyBuffer
();
OpenCLArray
&
energyArray
=
cl
.
getEnergyBuffer
();
cl_float
*
energy
=
(
cl_float
*
)
cl
.
getPinnedBuffer
();
if
(
cl
.
getUseDoublePrecision
())
{
double
*
energy
=
(
double
*
)
cl
.
getPinnedBuffer
();
energyArray
.
download
(
energy
);
energyArray
.
download
(
energy
);
for
(
int
i
=
0
;
i
<
energyArray
.
getSize
();
i
++
)
for
(
int
i
=
0
;
i
<
energyArray
.
getSize
();
i
++
)
sum
+=
energy
[
i
];
sum
+=
energy
[
i
];
}
}
else
{
float
*
energy
=
(
float
*
)
cl
.
getPinnedBuffer
();
energyArray
.
download
(
energy
);
for
(
int
i
=
0
;
i
<
energyArray
.
getSize
();
i
++
)
sum
+=
energy
[
i
];
}
}
return
sum
;
return
sum
;
}
}
...
@@ -401,7 +410,7 @@ void OpenCLApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
...
@@ -401,7 +410,7 @@ void OpenCLApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
if
(
!
hasInitializedKernel
)
{
if
(
!
hasInitializedKernel
)
{
hasInitializedKernel
=
true
;
hasInitializedKernel
=
true
;
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
constraints
,
defines
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
constraints
,
defines
);
applyDeltasKernel
=
cl
::
Kernel
(
program
,
"applyPositionDeltas"
);
applyDeltasKernel
=
cl
::
Kernel
(
program
,
"applyPositionDeltas"
);
applyDeltasKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
applyDeltasKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
...
@@ -571,7 +580,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
...
@@ -571,7 +580,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
Lepton
::
ParsedExpression
forceExpression
=
energyExpression
.
differentiate
(
"r"
).
optimize
();
Lepton
::
ParsedExpression
forceExpression
=
energyExpression
.
differentiate
(
"r"
).
optimize
();
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
expressions
[
"energy += "
]
=
energyExpression
;
expressions
[
"energy += "
]
=
energyExpression
;
expressions
[
"
float
dEdR = "
]
=
forceExpression
;
expressions
[
"
real
dEdR = "
]
=
forceExpression
;
// Create the kernels.
// Create the kernels.
...
@@ -587,7 +596,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
...
@@ -587,7 +596,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
string
argName
=
cl
.
getBondedUtilities
().
addArgument
(
globals
->
getDeviceBuffer
(),
"float"
);
string
argName
=
cl
.
getBondedUtilities
().
addArgument
(
globals
->
getDeviceBuffer
(),
"float"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
argName
+
"["
+
intToString
(
i
)
+
"]"
;
string
value
=
argName
+
"["
+
cl
.
intToString
(
i
)
+
"]"
;
variables
[
name
]
=
value
;
variables
[
name
]
=
value
;
}
}
}
}
...
@@ -598,7 +607,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
...
@@ -598,7 +607,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
compute
<<
buffer
.
getType
()
<<
" bondParams"
<<
(
i
+
1
)
<<
" = "
<<
argName
<<
"[index];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" bondParams"
<<
(
i
+
1
)
<<
" = "
<<
argName
<<
"[index];
\n
"
;
}
}
vector
<
pair
<
string
,
string
>
>
functions
;
vector
<
pair
<
string
,
string
>
>
functions
;
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
cl
.
replaceStrings
(
OpenCLKernelSources
::
bondForce
,
replacements
),
force
.
getForceGroup
());
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
cl
.
replaceStrings
(
OpenCLKernelSources
::
bondForce
,
replacements
),
force
.
getForceGroup
());
...
@@ -796,7 +805,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
...
@@ -796,7 +805,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
Lepton
::
ParsedExpression
forceExpression
=
energyExpression
.
differentiate
(
"theta"
).
optimize
();
Lepton
::
ParsedExpression
forceExpression
=
energyExpression
.
differentiate
(
"theta"
).
optimize
();
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
expressions
[
"energy += "
]
=
energyExpression
;
expressions
[
"energy += "
]
=
energyExpression
;
expressions
[
"
float
dEdAngle = "
]
=
forceExpression
;
expressions
[
"
real
dEdAngle = "
]
=
forceExpression
;
// Create the kernels.
// Create the kernels.
...
@@ -812,7 +821,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
...
@@ -812,7 +821,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
string
argName
=
cl
.
getBondedUtilities
().
addArgument
(
globals
->
getDeviceBuffer
(),
"float"
);
string
argName
=
cl
.
getBondedUtilities
().
addArgument
(
globals
->
getDeviceBuffer
(),
"float"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
argName
+
"["
+
intToString
(
i
)
+
"]"
;
string
value
=
argName
+
"["
+
cl
.
intToString
(
i
)
+
"]"
;
variables
[
name
]
=
value
;
variables
[
name
]
=
value
;
}
}
}
}
...
@@ -823,7 +832,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
...
@@ -823,7 +832,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
compute
<<
buffer
.
getType
()
<<
" angleParams"
<<
(
i
+
1
)
<<
" = "
<<
argName
<<
"[index];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" angleParams"
<<
(
i
+
1
)
<<
" = "
<<
argName
<<
"[index];
\n
"
;
}
}
vector
<
pair
<
string
,
string
>
>
functions
;
vector
<
pair
<
string
,
string
>
>
functions
;
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
cl
.
replaceStrings
(
OpenCLKernelSources
::
angleForce
,
replacements
),
force
.
getForceGroup
());
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
cl
.
replaceStrings
(
OpenCLKernelSources
::
angleForce
,
replacements
),
force
.
getForceGroup
());
...
@@ -1194,7 +1203,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
...
@@ -1194,7 +1203,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
Lepton
::
ParsedExpression
forceExpression
=
energyExpression
.
differentiate
(
"theta"
).
optimize
();
Lepton
::
ParsedExpression
forceExpression
=
energyExpression
.
differentiate
(
"theta"
).
optimize
();
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
expressions
[
"energy += "
]
=
energyExpression
;
expressions
[
"energy += "
]
=
energyExpression
;
expressions
[
"
float
dEdAngle = "
]
=
forceExpression
;
expressions
[
"
real
dEdAngle = "
]
=
forceExpression
;
// Create the kernels.
// Create the kernels.
...
@@ -1210,7 +1219,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
...
@@ -1210,7 +1219,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
string
argName
=
cl
.
getBondedUtilities
().
addArgument
(
globals
->
getDeviceBuffer
(),
"float"
);
string
argName
=
cl
.
getBondedUtilities
().
addArgument
(
globals
->
getDeviceBuffer
(),
"float"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
argName
+
"["
+
intToString
(
i
)
+
"]"
;
string
value
=
argName
+
"["
+
cl
.
intToString
(
i
)
+
"]"
;
variables
[
name
]
=
value
;
variables
[
name
]
=
value
;
}
}
}
}
...
@@ -1221,7 +1230,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
...
@@ -1221,7 +1230,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
compute
<<
buffer
.
getType
()
<<
" torsionParams"
<<
(
i
+
1
)
<<
" = "
<<
argName
<<
"[index];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" torsionParams"
<<
(
i
+
1
)
<<
" = "
<<
argName
<<
"[index];
\n
"
;
}
}
vector
<
pair
<
string
,
string
>
>
functions
;
vector
<
pair
<
string
,
string
>
>
functions
;
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
cl
.
replaceStrings
(
OpenCLKernelSources
::
torsionForce
,
replacements
),
force
.
getForceGroup
());
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
cl
.
replaceStrings
(
OpenCLKernelSources
::
torsionForce
,
replacements
),
force
.
getForceGroup
());
...
@@ -1349,7 +1358,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1349,7 +1358,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
int
numParticles
=
force
.
getNumParticles
();
int
numParticles
=
force
.
getNumParticles
();
sigmaEpsilon
=
OpenCLArray
::
create
<
mm_float2
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"sigmaEpsilon"
);
sigmaEpsilon
=
OpenCLArray
::
create
<
mm_float2
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"sigmaEpsilon"
);
vector
<
mm_float4
>
posq
(
cl
.
getPaddedNumAtoms
(),
mm_float4
(
0
,
0
,
0
,
0
));
vector
<
mm_float4
>
posqf
(
cl
.
getPaddedNumAtoms
());
vector
<
mm_double4
>
posqd
(
cl
.
getPaddedNumAtoms
());
vector
<
mm_float2
>
sigmaEpsilonVector
(
cl
.
getPaddedNumAtoms
());
vector
<
mm_float2
>
sigmaEpsilonVector
(
cl
.
getPaddedNumAtoms
());
vector
<
vector
<
int
>
>
exclusionList
(
numParticles
);
vector
<
vector
<
int
>
>
exclusionList
(
numParticles
);
double
sumSquaredCharges
=
0.0
;
double
sumSquaredCharges
=
0.0
;
...
@@ -1358,7 +1368,10 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1358,7 +1368,10 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
double
charge
,
sigma
,
epsilon
;
double
charge
,
sigma
,
epsilon
;
force
.
getParticleParameters
(
i
,
charge
,
sigma
,
epsilon
);
force
.
getParticleParameters
(
i
,
charge
,
sigma
,
epsilon
);
posq
[
i
].
w
=
(
float
)
charge
;
if
(
cl
.
getUseDoublePrecision
())
posqd
[
i
]
=
mm_double4
(
0
,
0
,
0
,
charge
);
else
posqf
[
i
]
=
mm_float4
(
0
,
0
,
0
,
(
float
)
charge
);
sigmaEpsilonVector
[
i
]
=
mm_float2
((
float
)
(
0.5
*
sigma
),
(
float
)
(
2.0
*
sqrt
(
epsilon
)));
sigmaEpsilonVector
[
i
]
=
mm_float2
((
float
)
(
0.5
*
sigma
),
(
float
)
(
2.0
*
sqrt
(
epsilon
)));
exclusionList
[
i
].
push_back
(
i
);
exclusionList
[
i
].
push_back
(
i
);
sumSquaredCharges
+=
charge
*
charge
;
sumSquaredCharges
+=
charge
*
charge
;
...
@@ -1371,7 +1384,10 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1371,7 +1384,10 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
exclusionList
[
exclusions
[
i
].
first
].
push_back
(
exclusions
[
i
].
second
);
exclusionList
[
exclusions
[
i
].
first
].
push_back
(
exclusions
[
i
].
second
);
exclusionList
[
exclusions
[
i
].
second
].
push_back
(
exclusions
[
i
].
first
);
exclusionList
[
exclusions
[
i
].
second
].
push_back
(
exclusions
[
i
].
first
);
}
}
cl
.
getPosq
().
upload
(
posq
);
if
(
cl
.
getUseDoublePrecision
())
cl
.
getPosq
().
upload
(
posqd
);
else
cl
.
getPosq
().
upload
(
posqf
);
sigmaEpsilon
->
upload
(
sigmaEpsilonVector
);
sigmaEpsilon
->
upload
(
sigmaEpsilonVector
);
bool
useCutoff
=
(
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
NoCutoff
);
bool
useCutoff
=
(
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
NoCutoff
);
bool
usePeriodic
=
(
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
CutoffNonPeriodic
);
bool
usePeriodic
=
(
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
CutoffNonPeriodic
);
...
@@ -1383,8 +1399,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1383,8 +1399,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
double
reactionFieldK
=
pow
(
force
.
getCutoffDistance
(),
-
3.0
)
*
(
force
.
getReactionFieldDielectric
()
-
1.0
)
/
(
2.0
*
force
.
getReactionFieldDielectric
()
+
1.0
);
double
reactionFieldK
=
pow
(
force
.
getCutoffDistance
(),
-
3.0
)
*
(
force
.
getReactionFieldDielectric
()
-
1.0
)
/
(
2.0
*
force
.
getReactionFieldDielectric
()
+
1.0
);
double
reactionFieldC
=
(
1.0
/
force
.
getCutoffDistance
())
*
(
3.0
*
force
.
getReactionFieldDielectric
())
/
(
2.0
*
force
.
getReactionFieldDielectric
()
+
1.0
);
double
reactionFieldC
=
(
1.0
/
force
.
getCutoffDistance
())
*
(
3.0
*
force
.
getReactionFieldDielectric
())
/
(
2.0
*
force
.
getReactionFieldDielectric
()
+
1.0
);
defines
[
"REACTION_FIELD_K"
]
=
doubleToString
(
reactionFieldK
);
defines
[
"REACTION_FIELD_K"
]
=
cl
.
doubleToString
(
reactionFieldK
);
defines
[
"REACTION_FIELD_C"
]
=
doubleToString
(
reactionFieldC
);
defines
[
"REACTION_FIELD_C"
]
=
cl
.
doubleToString
(
reactionFieldC
);
}
}
if
(
force
.
getUseDispersionCorrection
()
&&
cl
.
getContextIndex
()
==
0
)
if
(
force
.
getUseDispersionCorrection
()
&&
cl
.
getContextIndex
()
==
0
)
dispersionCoefficient
=
NonbondedForceImpl
::
calcDispersionCorrection
(
system
,
force
);
dispersionCoefficient
=
NonbondedForceImpl
::
calcDispersionCorrection
(
system
,
force
);
...
@@ -1396,23 +1412,24 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1396,23 +1412,24 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
int
kmaxx
,
kmaxy
,
kmaxz
;
int
kmaxx
,
kmaxy
,
kmaxz
;
NonbondedForceImpl
::
calcEwaldParameters
(
system
,
force
,
alpha
,
kmaxx
,
kmaxy
,
kmaxz
);
NonbondedForceImpl
::
calcEwaldParameters
(
system
,
force
,
alpha
,
kmaxx
,
kmaxy
,
kmaxz
);
defines
[
"EWALD_ALPHA"
]
=
doubleToString
(
alpha
);
defines
[
"EWALD_ALPHA"
]
=
cl
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"TWO_OVER_SQRT_PI"
]
=
cl
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
(
cl
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
);
ewaldSelfEnergy
=
(
cl
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
);
// Create the reciprocal space kernels.
// Create the reciprocal space kernels.
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"NUM_ATOMS"
]
=
intToString
(
numParticles
);
replacements
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
numParticles
);
replacements
[
"KMAX_X"
]
=
intToString
(
kmaxx
);
replacements
[
"KMAX_X"
]
=
cl
.
intToString
(
kmaxx
);
replacements
[
"KMAX_Y"
]
=
intToString
(
kmaxy
);
replacements
[
"KMAX_Y"
]
=
cl
.
intToString
(
kmaxy
);
replacements
[
"KMAX_Z"
]
=
intToString
(
kmaxz
);
replacements
[
"KMAX_Z"
]
=
cl
.
intToString
(
kmaxz
);
replacements
[
"EXP_COEFFICIENT"
]
=
doubleToString
(
-
1.0
/
(
4.0
*
alpha
*
alpha
));
replacements
[
"EXP_COEFFICIENT"
]
=
cl
.
doubleToString
(
-
1.0
/
(
4.0
*
alpha
*
alpha
));
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
ewald
,
replacements
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
ewald
,
replacements
);
ewaldSumsKernel
=
cl
::
Kernel
(
program
,
"calculateEwaldCosSinSums"
);
ewaldSumsKernel
=
cl
::
Kernel
(
program
,
"calculateEwaldCosSinSums"
);
ewaldForcesKernel
=
cl
::
Kernel
(
program
,
"calculateEwaldForces"
);
ewaldForcesKernel
=
cl
::
Kernel
(
program
,
"calculateEwaldForces"
);
cosSinSums
=
OpenCLArray
::
create
<
mm_float2
>
(
cl
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
"cosSinSums"
);
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double2
)
:
sizeof
(
mm_float2
));
cosSinSums
=
new
OpenCLArray
(
cl
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
elementSize
,
"cosSinSums"
);
}
}
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
)
{
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
)
{
// Compute the PME parameters.
// Compute the PME parameters.
...
@@ -1422,30 +1439,31 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1422,30 +1439,31 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
gridSizeX
=
OpenCLFFT3D
::
findLegalDimension
(
gridSizeX
);
gridSizeX
=
OpenCLFFT3D
::
findLegalDimension
(
gridSizeX
);
gridSizeY
=
OpenCLFFT3D
::
findLegalDimension
(
gridSizeY
);
gridSizeY
=
OpenCLFFT3D
::
findLegalDimension
(
gridSizeY
);
gridSizeZ
=
OpenCLFFT3D
::
findLegalDimension
(
gridSizeZ
);
gridSizeZ
=
OpenCLFFT3D
::
findLegalDimension
(
gridSizeZ
);
defines
[
"EWALD_ALPHA"
]
=
doubleToString
(
alpha
);
defines
[
"EWALD_ALPHA"
]
=
cl
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"TWO_OVER_SQRT_PI"
]
=
cl
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
(
cl
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
);
ewaldSelfEnergy
=
(
cl
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
);
pmeDefines
[
"PME_ORDER"
]
=
intToString
(
PmeOrder
);
pmeDefines
[
"PME_ORDER"
]
=
cl
.
intToString
(
PmeOrder
);
pmeDefines
[
"NUM_ATOMS"
]
=
intToString
(
numParticles
);
pmeDefines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
numParticles
);
pmeDefines
[
"RECIP_EXP_FACTOR"
]
=
doubleToString
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
pmeDefines
[
"RECIP_EXP_FACTOR"
]
=
cl
.
doubleToString
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
pmeDefines
[
"GRID_SIZE_X"
]
=
intToString
(
gridSizeX
);
pmeDefines
[
"GRID_SIZE_X"
]
=
cl
.
intToString
(
gridSizeX
);
pmeDefines
[
"GRID_SIZE_Y"
]
=
intToString
(
gridSizeY
);
pmeDefines
[
"GRID_SIZE_Y"
]
=
cl
.
intToString
(
gridSizeY
);
pmeDefines
[
"GRID_SIZE_Z"
]
=
intToString
(
gridSizeZ
);
pmeDefines
[
"GRID_SIZE_Z"
]
=
cl
.
intToString
(
gridSizeZ
);
pmeDefines
[
"EPSILON_FACTOR"
]
=
doubleToString
(
sqrt
(
ONE_4PI_EPS0
));
pmeDefines
[
"EPSILON_FACTOR"
]
=
cl
.
doubleToString
(
sqrt
(
ONE_4PI_EPS0
));
// Create required data structures.
// Create required data structures.
pmeGrid
=
OpenCLArray
::
create
<
mm_float2
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
"pmeGrid"
);
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
cl
.
addAutoclearBuffer
(
pmeGrid
->
getDeviceBuffer
(),
pmeGrid
->
getSize
()
*
2
);
pmeGrid
=
new
OpenCLArray
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid"
);
pmeGrid2
=
OpenCLArray
::
create
<
mm_float2
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
"pmeGrid2"
);
cl
.
addAutoclearBuffer
(
*
pmeGrid
);
pmeBsplineModuliX
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
gridSizeX
,
"pmeBsplineModuliX"
);
pmeGrid2
=
new
OpenCLArray
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid2"
);
pmeBsplineModuliY
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
gridSizeY
,
"pmeBsplineModuliY"
);
pmeBsplineModuliX
=
new
OpenCLArray
(
cl
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliZ
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
gridSizeZ
,
"pmeBsplineModuliZ"
);
pmeBsplineModuliY
=
new
OpenCLArray
(
cl
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineTheta
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
PmeOrder
*
numParticles
,
"pmeBsplineTheta"
);
pmeBsplineModuliZ
=
new
OpenCLArray
(
cl
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeBsplineTheta
=
new
OpenCLArray
(
cl
,
PmeOrder
*
numParticles
,
4
*
elementSize
,
"pmeBsplineTheta"
);
bool
deviceIsCpu
=
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
bool
deviceIsCpu
=
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
if
(
deviceIsCpu
)
if
(
deviceIsCpu
)
pmeBsplineDTheta
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
PmeOrder
*
numParticles
,
"pmeBsplineDTheta"
);
pmeBsplineDTheta
=
new
OpenCLArray
(
cl
,
PmeOrder
*
numParticles
,
4
*
elementSize
,
"pmeBsplineDTheta"
);
pmeAtomRange
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomRange
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
OpenCLArray
::
create
<
mm_int2
>
(
cl
,
numParticles
,
"pmeAtomGridIndex"
);
pmeAtomGridIndex
=
OpenCLArray
::
create
<
mm_int2
>
(
cl
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
OpenCLSort
<
SortTrait
>
(
cl
,
cl
.
getNumAtoms
());
sort
=
new
OpenCLSort
<
SortTrait
>
(
cl
,
cl
.
getNumAtoms
());
...
@@ -1487,7 +1505,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1487,7 +1505,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
for
(
int
dim
=
0
;
dim
<
3
;
dim
++
)
{
for
(
int
dim
=
0
;
dim
<
3
;
dim
++
)
{
int
ndata
=
(
dim
==
0
?
gridSizeX
:
dim
==
1
?
gridSizeY
:
gridSizeZ
);
int
ndata
=
(
dim
==
0
?
gridSizeX
:
dim
==
1
?
gridSizeY
:
gridSizeZ
);
vector
<
cl_
float
>
moduli
(
ndata
);
vector
<
cl_
double
>
moduli
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
double
sc
=
0.0
;
double
sc
=
0.0
;
double
ss
=
0.0
;
double
ss
=
0.0
;
...
@@ -1503,6 +1521,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1503,6 +1521,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if
(
moduli
[
i
]
<
1.0e-7
)
if
(
moduli
[
i
]
<
1.0e-7
)
moduli
[
i
]
=
(
moduli
[
i
-
1
]
+
moduli
[
i
+
1
])
*
0.5
f
;
moduli
[
i
]
=
(
moduli
[
i
-
1
]
+
moduli
[
i
+
1
])
*
0.5
f
;
}
}
if
(
cl
.
getUseDoublePrecision
())
{
if
(
dim
==
0
)
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
moduli
);
pmeBsplineModuliX
->
upload
(
moduli
);
else
if
(
dim
==
1
)
else
if
(
dim
==
1
)
...
@@ -1510,6 +1529,18 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1510,6 +1529,18 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
else
else
pmeBsplineModuliZ
->
upload
(
moduli
);
pmeBsplineModuliZ
->
upload
(
moduli
);
}
}
else
{
vector
<
float
>
modulif
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
modulif
[
i
]
=
(
float
)
moduli
[
i
];
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
modulif
);
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
modulif
);
else
pmeBsplineModuliZ
->
upload
(
modulif
);
}
}
}
}
else
else
ewaldSelfEnergy
=
0.0
;
ewaldSelfEnergy
=
0.0
;
...
@@ -1568,9 +1599,10 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1568,9 +1599,10 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeSpreadChargeKernel
=
cl
::
Kernel
(
program
,
"gridSpreadCharge"
);
pmeSpreadChargeKernel
=
cl
::
Kernel
(
program
,
"gridSpreadCharge"
);
pmeConvolutionKernel
=
cl
::
Kernel
(
program
,
"reciprocalConvolution"
);
pmeConvolutionKernel
=
cl
::
Kernel
(
program
,
"reciprocalConvolution"
);
pmeInterpolateForceKernel
=
cl
::
Kernel
(
program
,
"gridInterpolateForce"
);
pmeInterpolateForceKernel
=
cl
::
Kernel
(
program
,
"gridInterpolateForce"
);
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
pmeUpdateBsplinesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeUpdateBsplinesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeUpdateBsplinesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
pmeBsplineTheta
->
getDeviceBuffer
());
pmeUpdateBsplinesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
pmeBsplineTheta
->
getDeviceBuffer
());
pmeUpdateBsplinesKernel
.
setArg
(
2
,
OpenCLContext
::
ThreadBlockSize
*
PmeOrder
*
sizeof
(
mm_float4
)
,
NULL
);
pmeUpdateBsplinesKernel
.
setArg
(
2
,
OpenCLContext
::
ThreadBlockSize
*
PmeOrder
*
elementSize
,
NULL
);
pmeUpdateBsplinesKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pmeAtomGridIndex
->
getDeviceBuffer
());
pmeUpdateBsplinesKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pmeAtomGridIndex
->
getDeviceBuffer
());
if
(
deviceIsCpu
)
if
(
deviceIsCpu
)
pmeUpdateBsplinesKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
pmeBsplineDTheta
->
getDeviceBuffer
());
pmeUpdateBsplinesKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
pmeBsplineDTheta
->
getDeviceBuffer
());
...
@@ -1591,7 +1623,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1591,7 +1623,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeBsplineModuliX
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeBsplineModuliX
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pmeBsplineModuliY
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pmeBsplineModuliY
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
pmeBsplineModuliZ
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
pmeBsplineModuliZ
->
getDeviceBuffer
());
interpolateForceThreads
=
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_LOCAL_MEM_SIZE
>
()
>
2
*
128
*
PmeOrder
*
sizeof
(
mm_float4
)
?
128
:
64
);
interpolateForceThreads
=
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_LOCAL_MEM_SIZE
>
()
>
2
*
128
*
PmeOrder
*
elementSize
?
128
:
64
);
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getForceBuffers
().
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getForceBuffers
().
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeGrid
->
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeGrid
->
getDeviceBuffer
());
...
@@ -1600,7 +1632,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1600,7 +1632,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
pmeBsplineDTheta
->
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
pmeBsplineDTheta
->
getDeviceBuffer
());
}
}
else
else
pmeInterpolateForceKernel
.
setArg
(
5
,
2
*
interpolateForceThreads
*
PmeOrder
*
sizeof
(
mm_float4
)
,
NULL
);
pmeInterpolateForceKernel
.
setArg
(
5
,
2
*
interpolateForceThreads
*
PmeOrder
*
elementSize
,
NULL
);
if
(
cl
.
getSupports64BitGlobalAtomics
())
{
if
(
cl
.
getSupports64BitGlobalAtomics
())
{
pmeFinishSpreadChargeKernel
=
cl
::
Kernel
(
program
,
"finishSpreadCharge"
);
pmeFinishSpreadChargeKernel
=
cl
::
Kernel
(
program
,
"finishSpreadCharge"
);
pmeFinishSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeGrid
->
getDeviceBuffer
());
pmeFinishSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeGrid
->
getDeviceBuffer
());
...
@@ -1608,57 +1640,68 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1608,57 +1640,68 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
}
}
}
}
if
(
cosSinSums
!=
NULL
&&
cl
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
if
(
cosSinSums
!=
NULL
&&
cl
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
mm_float4
boxSize
=
cl
.
getPeriodicBoxSize
();
mm_double4
boxSize
=
cl
.
getPeriodicBoxSizeDouble
();
mm_float4
recipBoxSize
=
mm_float4
((
float
)
(
2
*
M_PI
/
boxSize
.
x
),
(
float
)
(
2
*
M_PI
/
boxSize
.
y
),
(
float
)
(
2
*
M_PI
/
boxSize
.
z
),
0
);
mm_double4
recipBoxSize
=
mm_double4
(
2
*
M_PI
/
boxSize
.
x
,
2
*
M_PI
/
boxSize
.
y
,
2
*
M_PI
/
boxSize
.
z
,
0.0
);
float
recipCoefficient
=
(
float
)
(
ONE_4PI_EPS0
*
4
*
M_PI
/
(
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
));
double
recipCoefficient
=
ONE_4PI_EPS0
*
4
*
M_PI
/
(
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
ewaldSumsKernel
.
setArg
<
mm_float4
>
(
3
,
recipBoxSize
);
if
(
cl
.
getUseDoublePrecision
())
{
ewaldSumsKernel
.
setArg
<
cl_float
>
(
4
,
recipCoefficient
);
ewaldSumsKernel
.
setArg
<
mm_double4
>
(
3
,
recipBoxSize
);
ewaldSumsKernel
.
setArg
<
cl_double
>
(
4
,
recipCoefficient
);
ewaldForcesKernel
.
setArg
<
mm_double4
>
(
3
,
recipBoxSize
);
ewaldForcesKernel
.
setArg
<
cl_double
>
(
4
,
recipCoefficient
);
}
else
{
ewaldSumsKernel
.
setArg
<
mm_float4
>
(
3
,
mm_float4
((
float
)
recipBoxSize
.
x
,
(
float
)
recipBoxSize
.
y
,
(
float
)
recipBoxSize
.
z
,
0
));
ewaldSumsKernel
.
setArg
<
cl_float
>
(
4
,
(
cl_float
)
recipCoefficient
);
ewaldForcesKernel
.
setArg
<
mm_float4
>
(
3
,
mm_float4
((
float
)
recipBoxSize
.
x
,
(
float
)
recipBoxSize
.
y
,
(
float
)
recipBoxSize
.
z
,
0
));
ewaldForcesKernel
.
setArg
<
cl_float
>
(
4
,
(
cl_float
)
recipCoefficient
);
}
cl
.
executeKernel
(
ewaldSumsKernel
,
cosSinSums
->
getSize
());
cl
.
executeKernel
(
ewaldSumsKernel
,
cosSinSums
->
getSize
());
ewaldForcesKernel
.
setArg
<
mm_float4
>
(
3
,
recipBoxSize
);
ewaldForcesKernel
.
setArg
<
cl_float
>
(
4
,
recipCoefficient
);
cl
.
executeKernel
(
ewaldForcesKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
ewaldForcesKernel
,
cl
.
getNumAtoms
());
}
}
if
(
pmeGrid
!=
NULL
&&
cl
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
if
(
pmeGrid
!=
NULL
&&
cl
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
mm_float4
boxSize
=
cl
.
getPeriodicBoxSize
();
setPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
4
);
mm_float4
invBoxSize
=
cl
.
getInvPeriodicBoxSize
();
setInvPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
5
);
pmeUpdateBsplinesKernel
.
setArg
<
mm_float4
>
(
4
,
boxSize
);
pmeUpdateBsplinesKernel
.
setArg
<
mm_float4
>
(
5
,
invBoxSize
);
cl
.
executeKernel
(
pmeUpdateBsplinesKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeUpdateBsplinesKernel
,
cl
.
getNumAtoms
());
if
(
deviceIsCpu
)
{
if
(
deviceIsCpu
)
{
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
5
,
boxSize
);
setPeriodicBoxSizeArg
(
cl
,
pmeSpreadChargeKernel
,
5
);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
6
,
invBoxSize
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeSpreadChargeKernel
,
6
);
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
2
*
cl
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
(),
1
);
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
2
*
cl
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
(),
1
);
}
}
else
{
else
{
sort
->
sort
(
*
pmeAtomGridIndex
);
sort
->
sort
(
*
pmeAtomGridIndex
);
pmeAtomRangeKernel
.
setArg
<
mm_float4
>
(
3
,
boxSize
);
setPeriodicBoxSizeArg
(
cl
,
pmeAtomRangeKernel
,
3
);
pmeAtomRangeKernel
.
setArg
<
mm_float4
>
(
4
,
invBoxSize
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeAtomRangeKernel
,
4
);
cl
.
executeKernel
(
pmeAtomRangeKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeAtomRangeKernel
,
cl
.
getNumAtoms
());
if
(
cl
.
getSupports64BitGlobalAtomics
())
{
if
(
cl
.
getSupports64BitGlobalAtomics
())
{
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
5
,
boxSize
);
setPeriodicBoxSizeArg
(
cl
,
pmeSpreadChargeKernel
,
5
);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
6
,
invBoxSize
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeSpreadChargeKernel
,
6
);
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
(),
PmeOrder
*
PmeOrder
*
PmeOrder
);
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
(),
PmeOrder
*
PmeOrder
*
PmeOrder
);
cl
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
pmeGrid
->
getSize
());
cl
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
pmeGrid
->
getSize
());
}
}
else
{
else
{
pmeZIndexKernel
.
setArg
<
mm_float4
>
(
2
,
boxSize
);
setPeriodicBoxSizeArg
(
cl
,
pmeZIndexKernel
,
2
);
pmeZIndexKernel
.
setArg
<
mm_float4
>
(
3
,
invBoxSize
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeZIndexKernel
,
3
);
cl
.
executeKernel
(
pmeZIndexKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeZIndexKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
());
}
}
}
}
fft
->
execFFT
(
*
pmeGrid
,
*
pmeGrid2
,
true
);
fft
->
execFFT
(
*
pmeGrid
,
*
pmeGrid2
,
true
);
pmeConvolutionKernel
.
setArg
<
mm_float4
>
(
5
,
invBoxSize
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeConvolutionKernel
,
5
);
pmeConvolutionKernel
.
setArg
<
cl_float
>
(
6
,
(
float
)
(
1.0
/
(
M_PI
*
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
)));
mm_double4
boxSize
=
cl
.
getPeriodicBoxSizeDouble
();
double
scaleFactor
=
1.0
/
(
M_PI
*
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
if
(
cl
.
getUseDoublePrecision
())
pmeConvolutionKernel
.
setArg
<
cl_double
>
(
6
,
scaleFactor
);
else
pmeConvolutionKernel
.
setArg
<
cl_float
>
(
6
,
(
float
)
scaleFactor
);
cl
.
executeKernel
(
pmeConvolutionKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeConvolutionKernel
,
cl
.
getNumAtoms
());
fft
->
execFFT
(
*
pmeGrid2
,
*
pmeGrid
,
false
);
fft
->
execFFT
(
*
pmeGrid2
,
*
pmeGrid
,
false
);
pmeInterpolateForceKernel
.
setArg
<
mm_float4
>
(
3
,
boxSize
);
setPeriodicBoxSizeArg
(
cl
,
pmeInterpolateForceKernel
,
3
);
pmeInterpolateForceKernel
.
setArg
<
mm_float4
>
(
4
,
invBoxSize
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeInterpolateForceKernel
,
4
);
cl
.
executeKernel
(
pmeInterpolateForceKernel
,
cl
.
getNumAtoms
(),
interpolateForceThreads
);
cl
.
executeKernel
(
pmeInterpolateForceKernel
,
cl
.
getNumAtoms
(),
interpolateForceThreads
);
}
}
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
if
(
dispersionCoefficient
!=
0.0
&&
includeDirect
)
{
if
(
dispersionCoefficient
!=
0.0
&&
includeDirect
)
{
mm_
float
4
boxSize
=
cl
.
getPeriodicBoxSize
();
mm_
double
4
boxSize
=
cl
.
getPeriodicBoxSize
Double
();
energy
+=
dispersionCoefficient
/
(
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
energy
+=
dispersionCoefficient
/
(
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
}
}
return
energy
;
return
energy
;
...
@@ -1697,8 +1740,9 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
...
@@ -1697,8 +1740,9 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
// Record the per-particle parameters.
// Record the per-particle parameters.
OpenCLArray
&
posq
=
cl
.
getPosq
();
OpenCLArray
&
posq
=
cl
.
getPosq
();
posq
.
download
(
(
mm_float4
*
)
cl
.
getPinnedBuffer
());
posq
.
download
(
cl
.
getPinnedBuffer
());
mm_float4
*
posqf
=
(
mm_float4
*
)
cl
.
getPinnedBuffer
();
mm_float4
*
posqf
=
(
mm_float4
*
)
cl
.
getPinnedBuffer
();
mm_double4
*
posqd
=
(
mm_double4
*
)
cl
.
getPinnedBuffer
();
vector
<
mm_float2
>
sigmaEpsilonVector
(
cl
.
getPaddedNumAtoms
());
vector
<
mm_float2
>
sigmaEpsilonVector
(
cl
.
getPaddedNumAtoms
());
double
sumSquaredCharges
=
0.0
;
double
sumSquaredCharges
=
0.0
;
const
vector
<
cl_int
>&
order
=
cl
.
getAtomIndex
();
const
vector
<
cl_int
>&
order
=
cl
.
getAtomIndex
();
...
@@ -1706,6 +1750,9 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
...
@@ -1706,6 +1750,9 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
int
index
=
order
[
i
];
int
index
=
order
[
i
];
double
charge
,
sigma
,
epsilon
;
double
charge
,
sigma
,
epsilon
;
force
.
getParticleParameters
(
index
,
charge
,
sigma
,
epsilon
);
force
.
getParticleParameters
(
index
,
charge
,
sigma
,
epsilon
);
if
(
cl
.
getUseDoublePrecision
())
posqd
[
i
].
w
=
charge
;
else
posqf
[
i
].
w
=
(
float
)
charge
;
posqf
[
i
].
w
=
(
float
)
charge
;
sigmaEpsilonVector
[
index
]
=
mm_float2
((
float
)
(
0.5
*
sigma
),
(
float
)
(
2.0
*
sqrt
(
epsilon
)));
sigmaEpsilonVector
[
index
]
=
mm_float2
((
float
)
(
0.5
*
sigma
),
(
float
)
(
2.0
*
sqrt
(
epsilon
)));
sumSquaredCharges
+=
charge
*
charge
;
sumSquaredCharges
+=
charge
*
charge
;
...
@@ -1782,7 +1829,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
...
@@ -1782,7 +1829,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
int
forceIndex
;
int
forceIndex
;
for
(
forceIndex
=
0
;
forceIndex
<
system
.
getNumForces
()
&&
&
system
.
getForce
(
forceIndex
)
!=
&
force
;
++
forceIndex
)
for
(
forceIndex
=
0
;
forceIndex
<
system
.
getNumForces
()
&&
&
system
.
getForce
(
forceIndex
)
!=
&
force
;
++
forceIndex
)
;
;
string
prefix
=
"custom"
+
intToString
(
forceIndex
)
+
"_"
;
string
prefix
=
"custom"
+
cl
.
intToString
(
forceIndex
)
+
"_"
;
// Record parameters and exclusions.
// Record parameters and exclusions.
...
@@ -1819,11 +1866,11 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
...
@@ -1819,11 +1866,11 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
vector
<
double
>
values
;
vector
<
double
>
values
;
double
min
,
max
;
double
min
,
max
;
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
string
arrayName
=
prefix
+
"table"
+
intToString
(
i
);
string
arrayName
=
prefix
+
"table"
+
cl
.
intToString
(
i
);
functionDefinitions
.
push_back
(
make_pair
(
name
,
arrayName
));
functionDefinitions
.
push_back
(
make_pair
(
name
,
arrayName
));
functions
[
name
]
=
&
fp
;
functions
[
name
]
=
&
fp
;
tabulatedFunctionParamsVec
[
i
]
=
mm_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
tabulatedFunctionParamsVec
[
i
]
=
mm_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
vector
<
mm_float4
>
f
=
OpenCL
ExpressionUtilities
::
computeFunctionCoefficients
(
values
,
min
,
max
);
vector
<
mm_float4
>
f
=
cl
.
get
ExpressionUtilities
().
computeFunctionCoefficients
(
values
,
min
,
max
);
tabulatedFunctions
.
push_back
(
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
.
push_back
(
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
arrayName
,
"float"
,
4
,
sizeof
(
cl_float4
),
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
getDeviceBuffer
()));
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
arrayName
,
"float"
,
4
,
sizeof
(
cl_float4
),
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
getDeviceBuffer
()));
...
@@ -1866,18 +1913,18 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
...
@@ -1866,18 +1913,18 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
}
}
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
"globals["
+
intToString
(
i
)
+
"]"
;
string
value
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
variables
.
push_back
(
makeVariable
(
name
,
prefix
+
value
));
variables
.
push_back
(
makeVariable
(
name
,
prefix
+
value
));
}
}
stringstream
compute
;
stringstream
compute
;
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
prefix
+
"temp"
,
prefix
+
"functionParams"
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
prefix
+
"temp"
,
prefix
+
"functionParams"
);
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
string
source
=
cl
.
replaceStrings
(
OpenCLKernelSources
::
customNonbonded
,
replacements
);
string
source
=
cl
.
replaceStrings
(
OpenCLKernelSources
::
customNonbonded
,
replacements
);
cl
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
());
cl
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
());
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"params"
+
intToString
(
i
+
1
),
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"params"
+
cl
.
intToString
(
i
+
1
),
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
}
if
(
globals
!=
NULL
)
{
if
(
globals
!=
NULL
)
{
globals
->
upload
(
globalParamValues
);
globals
->
upload
(
globalParamValues
);
...
@@ -1965,14 +2012,14 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
...
@@ -1965,14 +2012,14 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
longBornSum
=
OpenCLArray
::
create
<
cl_long
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"longBornSum"
);
longBornSum
=
OpenCLArray
::
create
<
cl_long
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"longBornSum"
);
longBornForce
=
OpenCLArray
::
create
<
cl_long
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"longBornForce"
);
longBornForce
=
OpenCLArray
::
create
<
cl_long
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"longBornForce"
);
bornForce
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"bornForce"
);
bornForce
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"bornForce"
);
cl
.
addAutoclearBuffer
(
longBornSum
->
getDeviceBuffer
(),
2
*
longBornSum
->
getSize
()
);
cl
.
addAutoclearBuffer
(
*
longBornSum
);
cl
.
addAutoclearBuffer
(
longBornForce
->
getDeviceBuffer
(),
2
*
longBornForce
->
getSize
()
);
cl
.
addAutoclearBuffer
(
*
longBornForce
);
}
}
else
{
else
{
bornSum
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
()
*
nb
.
getNumForceBuffers
(),
"bornSum"
);
bornSum
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
()
*
nb
.
getNumForceBuffers
(),
"bornSum"
);
bornForce
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
()
*
nb
.
getNumForceBuffers
(),
"bornForce"
);
bornForce
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
()
*
nb
.
getNumForceBuffers
(),
"bornForce"
);
cl
.
addAutoclearBuffer
(
bornSum
->
getDeviceBuffer
(),
bornSum
->
getSize
()
);
cl
.
addAutoclearBuffer
(
*
bornSum
);
cl
.
addAutoclearBuffer
(
bornForce
->
getDeviceBuffer
(),
bornForce
->
getSize
()
);
cl
.
addAutoclearBuffer
(
*
bornForce
);
}
}
vector
<
mm_float4
>
posq
(
cl
.
getPaddedNumAtoms
(),
mm_float4
(
0
,
0
,
0
,
0
));
vector
<
mm_float4
>
posq
(
cl
.
getPaddedNumAtoms
(),
mm_float4
(
0
,
0
,
0
,
0
));
int
numParticles
=
force
.
getNumParticles
();
int
numParticles
=
force
.
getNumParticles
();
...
@@ -2012,12 +2059,12 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -2012,12 +2059,12 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
defines
[
"USE_CUTOFF"
]
=
"1"
;
defines
[
"USE_CUTOFF"
]
=
"1"
;
if
(
nb
.
getUsePeriodic
())
if
(
nb
.
getUsePeriodic
())
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines
[
"CUTOFF_SQUARED"
]
=
doubleToString
(
nb
.
getCutoffDistance
()
*
nb
.
getCutoffDistance
());
defines
[
"CUTOFF_SQUARED"
]
=
cl
.
doubleToString
(
nb
.
getCutoffDistance
()
*
nb
.
getCutoffDistance
());
defines
[
"PREFACTOR"
]
=
doubleToString
(
prefactor
);
defines
[
"PREFACTOR"
]
=
cl
.
doubleToString
(
prefactor
);
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
cl
.
intToString
(
cl
.
getNumAtomBlocks
());
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
nb
.
getForceThreadBlockSize
());
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
cl
.
intToString
(
nb
.
getForceThreadBlockSize
());
string
platformVendor
=
cl
::
Platform
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_PLATFORM
>
()).
getInfo
<
CL_PLATFORM_VENDOR
>
();
string
platformVendor
=
cl
::
Platform
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_PLATFORM
>
()).
getInfo
<
CL_PLATFORM_VENDOR
>
();
if
(
platformVendor
==
"Apple"
)
if
(
platformVendor
==
"Apple"
)
defines
[
"USE_APPLE_WORKAROUND"
]
=
"1"
;
defines
[
"USE_APPLE_WORKAROUND"
]
=
"1"
;
...
@@ -2220,7 +2267,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2220,7 +2267,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
int
forceIndex
;
int
forceIndex
;
for
(
forceIndex
=
0
;
forceIndex
<
system
.
getNumForces
()
&&
&
system
.
getForce
(
forceIndex
)
!=
&
force
;
++
forceIndex
)
for
(
forceIndex
=
0
;
forceIndex
<
system
.
getNumForces
()
&&
&
system
.
getForce
(
forceIndex
)
!=
&
force
;
++
forceIndex
)
;
;
string
prefix
=
"custom"
+
intToString
(
forceIndex
)
+
"_"
;
string
prefix
=
"custom"
+
cl
.
intToString
(
forceIndex
)
+
"_"
;
// Record parameters and exclusions.
// Record parameters and exclusions.
...
@@ -2259,11 +2306,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2259,11 +2306,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
vector
<
double
>
values
;
vector
<
double
>
values
;
double
min
,
max
;
double
min
,
max
;
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
string
arrayName
=
prefix
+
"table"
+
intToString
(
i
);
string
arrayName
=
prefix
+
"table"
+
cl
.
intToString
(
i
);
functionDefinitions
.
push_back
(
make_pair
(
name
,
arrayName
));
functionDefinitions
.
push_back
(
make_pair
(
name
,
arrayName
));
functions
[
name
]
=
&
fp
;
functions
[
name
]
=
&
fp
;
tabulatedFunctionParamsVec
[
i
]
=
mm_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
tabulatedFunctionParamsVec
[
i
]
=
mm_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
vector
<
mm_float4
>
f
=
OpenCL
ExpressionUtilities
::
computeFunctionCoefficients
(
values
,
min
,
max
);
vector
<
mm_float4
>
f
=
cl
.
get
ExpressionUtilities
().
computeFunctionCoefficients
(
values
,
min
,
max
);
tabulatedFunctions
.
push_back
(
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
.
push_back
(
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
arrayName
,
"float"
,
4
,
sizeof
(
cl_float4
),
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
getDeviceBuffer
()));
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
arrayName
,
"float"
,
4
,
sizeof
(
cl_float4
),
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
getDeviceBuffer
()));
...
@@ -2356,7 +2403,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2356,7 +2403,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
}
}
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
"globals["
+
intToString
(
i
)
+
"]"
;
string
value
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
variables
.
push_back
(
makeVariable
(
name
,
value
));
variables
.
push_back
(
makeVariable
(
name
,
value
));
}
}
map
<
string
,
Lepton
::
ParsedExpression
>
n2ValueExpressions
;
map
<
string
,
Lepton
::
ParsedExpression
>
n2ValueExpressions
;
...
@@ -2364,7 +2411,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2364,7 +2411,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
Lepton
::
ParsedExpression
ex
=
Lepton
::
Parser
::
parse
(
computedValueExpressions
[
0
],
functions
).
optimize
();
Lepton
::
ParsedExpression
ex
=
Lepton
::
Parser
::
parse
(
computedValueExpressions
[
0
],
functions
).
optimize
();
n2ValueExpressions
[
"tempValue1 = "
]
=
ex
;
n2ValueExpressions
[
"tempValue1 = "
]
=
ex
;
n2ValueExpressions
[
"tempValue2 = "
]
=
ex
.
renameVariables
(
rename
);
n2ValueExpressions
[
"tempValue2 = "
]
=
ex
.
renameVariables
(
rename
);
n2ValueSource
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
n2ValueExpressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
n2ValueSource
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
n2ValueExpressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
string
n2ValueStr
=
n2ValueSource
.
str
();
string
n2ValueStr
=
n2ValueSource
.
str
();
replacements
[
"COMPUTE_VALUE"
]
=
n2ValueStr
;
replacements
[
"COMPUTE_VALUE"
]
=
n2ValueStr
;
...
@@ -2374,7 +2421,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2374,7 +2421,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
pairValueUsesParam
.
resize
(
params
->
getBuffers
().
size
(),
false
);
pairValueUsesParam
.
resize
(
params
->
getBuffers
().
size
(),
false
);
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
string
paramName
=
"params"
+
cl
.
intToString
(
i
+
1
);
if
(
n2ValueStr
.
find
(
paramName
+
"1"
)
!=
n2ValueStr
.
npos
||
n2ValueStr
.
find
(
paramName
+
"2"
)
!=
n2ValueStr
.
npos
)
{
if
(
n2ValueStr
.
find
(
paramName
+
"1"
)
!=
n2ValueStr
.
npos
||
n2ValueStr
.
find
(
paramName
+
"2"
)
!=
n2ValueStr
.
npos
)
{
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict global_"
<<
paramName
<<
", __local "
<<
buffer
.
getType
()
<<
"* restrict local_"
<<
paramName
;
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict global_"
<<
paramName
<<
", __local "
<<
buffer
.
getType
()
<<
"* restrict local_"
<<
paramName
;
loadLocal1
<<
"local_"
<<
paramName
<<
"[localAtomIndex] = "
<<
paramName
<<
"1;
\n
"
;
loadLocal1
<<
"local_"
<<
paramName
<<
"[localAtomIndex] = "
<<
paramName
<<
"1;
\n
"
;
...
@@ -2399,11 +2446,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2399,11 +2446,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if
(
useExclusionsForValue
)
if
(
useExclusionsForValue
)
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
if
(
cl
.
getSIMDWidth
()
==
32
)
if
(
cl
.
getSIMDWidth
()
==
32
)
defines
[
"WARPS_PER_GROUP"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
/
OpenCLContext
::
TileSize
);
defines
[
"WARPS_PER_GROUP"
]
=
cl
.
intToString
(
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
/
OpenCLContext
::
TileSize
);
defines
[
"CUTOFF_SQUARED"
]
=
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
());
defines
[
"CUTOFF_SQUARED"
]
=
cl
.
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
());
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
cl
.
intToString
(
cl
.
getNumAtomBlocks
());
string
file
;
string
file
;
if
(
deviceIsCpu
)
if
(
deviceIsCpu
)
file
=
OpenCLKernelSources
::
customGBValueN2_cpu
;
file
=
OpenCLKernelSources
::
customGBValueN2_cpu
;
...
@@ -2424,12 +2471,12 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2424,12 +2471,12 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
extraArgs
<<
", __global const float* globals"
;
extraArgs
<<
", __global const float* globals"
;
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
string
paramName
=
"params"
+
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict "
<<
paramName
;
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict "
<<
paramName
;
}
}
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
valueName
=
"values"
+
intToString
(
i
+
1
);
string
valueName
=
"values"
+
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* restrict global_"
<<
valueName
;
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* restrict global_"
<<
valueName
;
reductionSource
<<
buffer
.
getType
()
<<
" local_"
<<
valueName
<<
";
\n
"
;
reductionSource
<<
buffer
.
getType
()
<<
" local_"
<<
valueName
<<
";
\n
"
;
}
}
...
@@ -2441,22 +2488,22 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2441,22 +2488,22 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for
(
int
i
=
0
;
i
<
force
.
getNumPerParticleParameters
();
i
++
)
for
(
int
i
=
0
;
i
<
force
.
getNumPerParticleParameters
();
i
++
)
variables
[
force
.
getPerParticleParameterName
(
i
)]
=
"params"
+
params
->
getParameterSuffix
(
i
,
"[index]"
);
variables
[
force
.
getPerParticleParameterName
(
i
)]
=
"params"
+
params
->
getParameterSuffix
(
i
,
"[index]"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
variables
[
force
.
getGlobalParameterName
(
i
)]
=
"globals["
+
intToString
(
i
)
+
"]"
;
variables
[
force
.
getGlobalParameterName
(
i
)]
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
variables
[
computedValueNames
[
i
-
1
]]
=
"local_values"
+
computedValues
->
getParameterSuffix
(
i
-
1
);
variables
[
computedValueNames
[
i
-
1
]]
=
"local_values"
+
computedValues
->
getParameterSuffix
(
i
-
1
);
map
<
string
,
Lepton
::
ParsedExpression
>
valueExpressions
;
map
<
string
,
Lepton
::
ParsedExpression
>
valueExpressions
;
valueExpressions
[
"local_values"
+
computedValues
->
getParameterSuffix
(
i
)
+
" = "
]
=
Lepton
::
Parser
::
parse
(
computedValueExpressions
[
i
],
functions
).
optimize
();
valueExpressions
[
"local_values"
+
computedValues
->
getParameterSuffix
(
i
)
+
" = "
]
=
Lepton
::
Parser
::
parse
(
computedValueExpressions
[
i
],
functions
).
optimize
();
reductionSource
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
valueExpressions
,
variables
,
functionDefinitions
,
"value"
+
intToString
(
i
)
+
"_temp"
,
prefix
+
"functionParams"
);
reductionSource
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
valueExpressions
,
variables
,
functionDefinitions
,
"value"
+
cl
.
intToString
(
i
)
+
"_temp"
,
prefix
+
"functionParams"
);
}
}
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
string
valueName
=
"values"
+
intToString
(
i
+
1
);
string
valueName
=
"values"
+
cl
.
intToString
(
i
+
1
);
reductionSource
<<
"global_"
<<
valueName
<<
"[index] = local_"
<<
valueName
<<
";
\n
"
;
reductionSource
<<
"global_"
<<
valueName
<<
"[index] = local_"
<<
valueName
<<
";
\n
"
;
}
}
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
replacements
[
"COMPUTE_VALUES"
]
=
reductionSource
.
str
();
replacements
[
"COMPUTE_VALUES"
]
=
reductionSource
.
str
();
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
replaceStrings
(
OpenCLKernelSources
::
customGBValuePerParticle
,
replacements
),
defines
);
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
replaceStrings
(
OpenCLKernelSources
::
customGBValuePerParticle
,
replacements
),
defines
);
perParticleValueKernel
=
cl
::
Kernel
(
program
,
"computePerParticleValues"
);
perParticleValueKernel
=
cl
::
Kernel
(
program
,
"computePerParticleValues"
);
}
}
...
@@ -2478,7 +2525,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2478,7 +2525,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
variables
.
push_back
(
makeVariable
(
computedValueNames
[
i
]
+
"2"
,
"values"
+
computedValues
->
getParameterSuffix
(
i
,
"2"
)));
variables
.
push_back
(
makeVariable
(
computedValueNames
[
i
]
+
"2"
,
"values"
+
computedValues
->
getParameterSuffix
(
i
,
"2"
)));
}
}
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
variables
.
push_back
(
makeVariable
(
force
.
getGlobalParameterName
(
i
),
"globals["
+
intToString
(
i
)
+
"]"
));
variables
.
push_back
(
makeVariable
(
force
.
getGlobalParameterName
(
i
),
"globals["
+
cl
.
intToString
(
i
)
+
"]"
));
stringstream
n2EnergySource
;
stringstream
n2EnergySource
;
bool
anyExclusions
=
(
force
.
getNumExclusions
()
>
0
);
bool
anyExclusions
=
(
force
.
getNumExclusions
()
>
0
);
for
(
int
i
=
0
;
i
<
force
.
getNumEnergyTerms
();
i
++
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumEnergyTerms
();
i
++
)
{
...
@@ -2494,23 +2541,23 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2494,23 +2541,23 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if
(
useLong
)
{
if
(
useLong
)
{
for
(
int
j
=
0
;
j
<
force
.
getNumComputedValues
();
j
++
)
{
for
(
int
j
=
0
;
j
<
force
.
getNumComputedValues
();
j
++
)
{
if
(
needChainForValue
[
j
])
{
if
(
needChainForValue
[
j
])
{
string
index
=
intToString
(
j
+
1
);
string
index
=
cl
.
intToString
(
j
+
1
);
n2EnergyExpressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ deriv"
+
index
+
"_1 += "
]
=
energyDerivExpressions
[
i
][
2
*
j
];
n2EnergyExpressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ deriv"
+
index
+
"_1 += "
]
=
energyDerivExpressions
[
i
][
2
*
j
];
n2EnergyExpressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ deriv"
+
index
+
"_2 += "
]
=
energyDerivExpressions
[
i
][
2
*
j
+
1
];
n2EnergyExpressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ deriv"
+
index
+
"_2 += "
]
=
energyDerivExpressions
[
i
][
2
*
j
+
1
];
}
}
}
}
}
}
else
{
else
{
for
(
int
j
=
0
;
j
<
force
.
getNumComputedValues
();
j
++
)
{
for
(
int
j
=
0
;
j
<
force
.
getNumComputedValues
();
j
++
)
{
if
(
needChainForValue
[
j
])
{
if
(
needChainForValue
[
j
])
{
n2EnergyExpressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ deriv"
+
energyDerivs
->
getParameterSuffix
(
j
,
"_1"
)
+
" += "
]
=
energyDerivExpressions
[
i
][
2
*
j
];
n2EnergyExpressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ deriv"
+
energyDerivs
->
getParameterSuffix
(
j
,
"_1"
)
+
" += "
]
=
energyDerivExpressions
[
i
][
2
*
j
];
n2EnergyExpressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ deriv"
+
energyDerivs
->
getParameterSuffix
(
j
,
"_2"
)
+
" += "
]
=
energyDerivExpressions
[
i
][
2
*
j
+
1
];
n2EnergyExpressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ deriv"
+
energyDerivs
->
getParameterSuffix
(
j
,
"_2"
)
+
" += "
]
=
energyDerivExpressions
[
i
][
2
*
j
+
1
];
}
}
}
}
}
}
if
(
exclude
)
if
(
exclude
)
n2EnergySource
<<
"if (!isExcluded) {
\n
"
;
n2EnergySource
<<
"if (!isExcluded) {
\n
"
;
n2EnergySource
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
n2EnergyExpressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
n2EnergySource
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
n2EnergyExpressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
if
(
exclude
)
if
(
exclude
)
n2EnergySource
<<
"}
\n
"
;
n2EnergySource
<<
"}
\n
"
;
}
}
...
@@ -2523,7 +2570,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2523,7 +2570,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
pairEnergyUsesParam
.
resize
(
params
->
getBuffers
().
size
(),
false
);
pairEnergyUsesParam
.
resize
(
params
->
getBuffers
().
size
(),
false
);
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
string
paramName
=
"params"
+
cl
.
intToString
(
i
+
1
);
if
(
n2EnergyStr
.
find
(
paramName
+
"1"
)
!=
n2EnergyStr
.
npos
||
n2EnergyStr
.
find
(
paramName
+
"2"
)
!=
n2EnergyStr
.
npos
)
{
if
(
n2EnergyStr
.
find
(
paramName
+
"1"
)
!=
n2EnergyStr
.
npos
||
n2EnergyStr
.
find
(
paramName
+
"2"
)
!=
n2EnergyStr
.
npos
)
{
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict global_"
<<
paramName
<<
", __local "
<<
buffer
.
getType
()
<<
"* restrict local_"
<<
paramName
;
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict global_"
<<
paramName
<<
", __local "
<<
buffer
.
getType
()
<<
"* restrict local_"
<<
paramName
;
loadLocal1
<<
"local_"
<<
paramName
<<
"[localAtomIndex] = "
<<
paramName
<<
"1;
\n
"
;
loadLocal1
<<
"local_"
<<
paramName
<<
"[localAtomIndex] = "
<<
paramName
<<
"1;
\n
"
;
...
@@ -2536,7 +2583,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2536,7 +2583,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
pairEnergyUsesValue
.
resize
(
computedValues
->
getBuffers
().
size
(),
false
);
pairEnergyUsesValue
.
resize
(
computedValues
->
getBuffers
().
size
(),
false
);
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
valueName
=
"values"
+
intToString
(
i
+
1
);
string
valueName
=
"values"
+
cl
.
intToString
(
i
+
1
);
if
(
n2EnergyStr
.
find
(
valueName
+
"1"
)
!=
n2EnergyStr
.
npos
||
n2EnergyStr
.
find
(
valueName
+
"2"
)
!=
n2EnergyStr
.
npos
)
{
if
(
n2EnergyStr
.
find
(
valueName
+
"1"
)
!=
n2EnergyStr
.
npos
||
n2EnergyStr
.
find
(
valueName
+
"2"
)
!=
n2EnergyStr
.
npos
)
{
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict global_"
<<
valueName
<<
", __local "
<<
buffer
.
getType
()
<<
"* restrict local_"
<<
valueName
;
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict global_"
<<
valueName
<<
", __local "
<<
buffer
.
getType
()
<<
"* restrict local_"
<<
valueName
;
loadLocal1
<<
"local_"
<<
valueName
<<
"[localAtomIndex] = "
<<
valueName
<<
"1;
\n
"
;
loadLocal1
<<
"local_"
<<
valueName
<<
"[localAtomIndex] = "
<<
valueName
<<
"1;
\n
"
;
...
@@ -2549,7 +2596,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2549,7 +2596,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if
(
useLong
)
{
if
(
useLong
)
{
extraArgs
<<
", __global long* restrict derivBuffers"
;
extraArgs
<<
", __global long* restrict derivBuffers"
;
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
string
index
=
intToString
(
i
+
1
);
string
index
=
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __local float* restrict local_deriv"
<<
index
;
extraArgs
<<
", __local float* restrict local_deriv"
<<
index
;
clearLocal
<<
"local_deriv"
<<
index
<<
"[localAtomIndex] = 0.0f;
\n
"
;
clearLocal
<<
"local_deriv"
<<
index
<<
"[localAtomIndex] = 0.0f;
\n
"
;
declare1
<<
"float deriv"
<<
index
<<
"_1 = 0.0f;
\n
"
;
declare1
<<
"float deriv"
<<
index
<<
"_1 = 0.0f;
\n
"
;
...
@@ -2564,7 +2611,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2564,7 +2611,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
else
{
else
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
string
index
=
intToString
(
i
+
1
);
string
index
=
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* restrict derivBuffers"
<<
index
<<
", __local "
<<
buffer
.
getType
()
<<
"* restrict local_deriv"
<<
index
;
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* restrict derivBuffers"
<<
index
<<
", __local "
<<
buffer
.
getType
()
<<
"* restrict local_deriv"
<<
index
;
clearLocal
<<
"local_deriv"
<<
index
<<
"[localAtomIndex] = 0.0f;
\n
"
;
clearLocal
<<
"local_deriv"
<<
index
<<
"[localAtomIndex] = 0.0f;
\n
"
;
declare1
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
"_1 = 0.0f;
\n
"
;
declare1
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
"_1 = 0.0f;
\n
"
;
...
@@ -2598,11 +2645,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2598,11 +2645,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if
(
anyExclusions
)
if
(
anyExclusions
)
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
if
(
cl
.
getSIMDWidth
()
==
32
)
if
(
cl
.
getSIMDWidth
()
==
32
)
defines
[
"WARPS_PER_GROUP"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
/
OpenCLContext
::
TileSize
);
defines
[
"WARPS_PER_GROUP"
]
=
cl
.
intToString
(
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
/
OpenCLContext
::
TileSize
);
defines
[
"CUTOFF_SQUARED"
]
=
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
());
defines
[
"CUTOFF_SQUARED"
]
=
cl
.
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
());
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
cl
.
intToString
(
cl
.
getNumAtomBlocks
());
string
file
;
string
file
;
if
(
deviceIsCpu
)
if
(
deviceIsCpu
)
file
=
OpenCLKernelSources
::
customGBEnergyN2_cpu
;
file
=
OpenCLKernelSources
::
customGBEnergyN2_cpu
;
...
@@ -2621,17 +2668,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2621,17 +2668,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
extraArgs
<<
", __global const float* globals"
;
extraArgs
<<
", __global const float* globals"
;
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
string
paramName
=
"params"
+
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict "
<<
paramName
;
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict "
<<
paramName
;
}
}
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
valueName
=
"values"
+
intToString
(
i
+
1
);
string
valueName
=
"values"
+
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict "
<<
valueName
;
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict "
<<
valueName
;
}
}
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
string
index
=
intToString
(
i
+
1
);
string
index
=
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* restrict derivBuffers"
<<
index
;
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* restrict derivBuffers"
<<
index
;
compute
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
" = derivBuffers"
<<
index
<<
"[index];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
" = derivBuffers"
<<
index
<<
"[index];
\n
"
;
}
}
...
@@ -2639,11 +2686,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2639,11 +2686,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
extraArgs
<<
", __global const long* restrict derivBuffersIn"
;
extraArgs
<<
", __global const long* restrict derivBuffersIn"
;
for
(
int
i
=
0
;
i
<
energyDerivs
->
getNumParameters
();
++
i
)
for
(
int
i
=
0
;
i
<
energyDerivs
->
getNumParameters
();
++
i
)
reduce
<<
"derivBuffers"
<<
energyDerivs
->
getParameterSuffix
(
i
,
"[index]"
)
<<
reduce
<<
"derivBuffers"
<<
energyDerivs
->
getParameterSuffix
(
i
,
"[index]"
)
<<
" = (1.0f/0xFFFFFFFF)*derivBuffersIn[index+PADDED_NUM_ATOMS*"
<<
intToString
(
i
)
<<
"];
\n
"
;
" = (1.0f/0xFFFFFFFF)*derivBuffersIn[index+PADDED_NUM_ATOMS*"
<<
cl
.
intToString
(
i
)
<<
"];
\n
"
;
}
}
else
{
else
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
reduce
<<
"REDUCE_VALUE(derivBuffers"
<<
intToString
(
i
+
1
)
<<
", "
<<
energyDerivs
->
getBuffers
()[
i
].
getType
()
<<
")
\n
"
;
reduce
<<
"REDUCE_VALUE(derivBuffers"
<<
cl
.
intToString
(
i
+
1
)
<<
", "
<<
energyDerivs
->
getBuffers
()[
i
].
getType
()
<<
")
\n
"
;
}
}
// Compute the various expressions.
// Compute the various expressions.
...
@@ -2655,7 +2702,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2655,7 +2702,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for
(
int
i
=
0
;
i
<
force
.
getNumPerParticleParameters
();
i
++
)
for
(
int
i
=
0
;
i
<
force
.
getNumPerParticleParameters
();
i
++
)
variables
[
force
.
getPerParticleParameterName
(
i
)]
=
"params"
+
params
->
getParameterSuffix
(
i
,
"[index]"
);
variables
[
force
.
getPerParticleParameterName
(
i
)]
=
"params"
+
params
->
getParameterSuffix
(
i
,
"[index]"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
variables
[
force
.
getGlobalParameterName
(
i
)]
=
"globals["
+
intToString
(
i
)
+
"]"
;
variables
[
force
.
getGlobalParameterName
(
i
)]
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
variables
[
computedValueNames
[
i
]]
=
"values"
+
computedValues
->
getParameterSuffix
(
i
,
"[index]"
);
variables
[
computedValueNames
[
i
]]
=
"values"
+
computedValues
->
getParameterSuffix
(
i
,
"[index]"
);
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
...
@@ -2666,23 +2713,23 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2666,23 +2713,23 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if
(
type
!=
CustomGBForce
::
SingleParticle
)
if
(
type
!=
CustomGBForce
::
SingleParticle
)
continue
;
continue
;
Lepton
::
ParsedExpression
parsed
=
Lepton
::
Parser
::
parse
(
expression
,
functions
).
optimize
();
Lepton
::
ParsedExpression
parsed
=
Lepton
::
Parser
::
parse
(
expression
,
functions
).
optimize
();
expressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ energy += "
]
=
parsed
;
expressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ energy += "
]
=
parsed
;
for
(
int
j
=
0
;
j
<
force
.
getNumComputedValues
();
j
++
)
for
(
int
j
=
0
;
j
<
force
.
getNumComputedValues
();
j
++
)
expressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ deriv"
+
energyDerivs
->
getParameterSuffix
(
j
)
+
" += "
]
=
energyDerivExpressions
[
i
][
j
];
expressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ deriv"
+
energyDerivs
->
getParameterSuffix
(
j
)
+
" += "
]
=
energyDerivExpressions
[
i
][
j
];
Lepton
::
ParsedExpression
gradx
=
parsed
.
differentiate
(
"x"
).
optimize
();
Lepton
::
ParsedExpression
gradx
=
parsed
.
differentiate
(
"x"
).
optimize
();
Lepton
::
ParsedExpression
grady
=
parsed
.
differentiate
(
"y"
).
optimize
();
Lepton
::
ParsedExpression
grady
=
parsed
.
differentiate
(
"y"
).
optimize
();
Lepton
::
ParsedExpression
gradz
=
parsed
.
differentiate
(
"z"
).
optimize
();
Lepton
::
ParsedExpression
gradz
=
parsed
.
differentiate
(
"z"
).
optimize
();
if
(
!
isZeroExpression
(
gradx
))
if
(
!
isZeroExpression
(
gradx
))
expressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ force.x -= "
]
=
gradx
;
expressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ force.x -= "
]
=
gradx
;
if
(
!
isZeroExpression
(
grady
))
if
(
!
isZeroExpression
(
grady
))
expressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ force.y -= "
]
=
grady
;
expressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ force.y -= "
]
=
grady
;
if
(
!
isZeroExpression
(
gradz
))
if
(
!
isZeroExpression
(
gradz
))
expressions
[
"/*"
+
intToString
(
i
+
1
)
+
"*/ force.z -= "
]
=
gradz
;
expressions
[
"/*"
+
cl
.
intToString
(
i
+
1
)
+
"*/ force.z -= "
]
=
gradz
;
}
}
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
for
(
int
j
=
0
;
j
<
i
;
j
++
)
for
(
int
j
=
0
;
j
<
i
;
j
++
)
expressions
[
"float dV"
+
intToString
(
i
)
+
"dV"
+
intToString
(
j
)
+
" = "
]
=
valueDerivExpressions
[
i
][
j
];
expressions
[
"float dV"
+
cl
.
intToString
(
i
)
+
"dV"
+
cl
.
intToString
(
j
)
+
" = "
]
=
valueDerivExpressions
[
i
][
j
];
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
// Record values.
// Record values.
...
@@ -2695,7 +2742,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2695,7 +2742,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
compute
<<
"deriv"
<<
(
i
+
1
)
<<
" *= totalDeriv"
<<
i
<<
";
\n
"
;
compute
<<
"deriv"
<<
(
i
+
1
)
<<
" *= totalDeriv"
<<
i
<<
";
\n
"
;
}
}
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
string
index
=
intToString
(
i
+
1
);
string
index
=
cl
.
intToString
(
i
+
1
);
compute
<<
"derivBuffers"
<<
index
<<
"[index] = deriv"
<<
index
<<
";
\n
"
;
compute
<<
"derivBuffers"
<<
index
<<
"[index] = deriv"
<<
index
<<
";
\n
"
;
}
}
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
...
@@ -2703,8 +2750,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2703,8 +2750,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
replacements
[
"REDUCE_DERIVATIVES"
]
=
reduce
.
str
();
replacements
[
"REDUCE_DERIVATIVES"
]
=
reduce
.
str
();
replacements
[
"COMPUTE_ENERGY"
]
=
compute
.
str
();
replacements
[
"COMPUTE_ENERGY"
]
=
compute
.
str
();
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
replaceStrings
(
OpenCLKernelSources
::
customGBEnergyPerParticle
,
replacements
),
defines
);
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
replaceStrings
(
OpenCLKernelSources
::
customGBEnergyPerParticle
,
replacements
),
defines
);
perParticleEnergyKernel
=
cl
::
Kernel
(
program
,
"computePerParticleEnergy"
);
perParticleEnergyKernel
=
cl
::
Kernel
(
program
,
"computePerParticleEnergy"
);
}
}
...
@@ -2716,17 +2763,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2716,17 +2763,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
extraArgs
<<
", __global const float* globals"
;
extraArgs
<<
", __global const float* globals"
;
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
string
paramName
=
"params"
+
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict "
<<
paramName
;
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict "
<<
paramName
;
}
}
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
valueName
=
"values"
+
intToString
(
i
+
1
);
string
valueName
=
"values"
+
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict "
<<
valueName
;
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict "
<<
valueName
;
}
}
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
string
index
=
intToString
(
i
+
1
);
string
index
=
cl
.
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* restrict derivBuffers"
<<
index
;
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* restrict derivBuffers"
<<
index
;
compute
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
" = derivBuffers"
<<
index
<<
"[index];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
" = derivBuffers"
<<
index
<<
"[index];
\n
"
;
}
}
...
@@ -2737,18 +2784,18 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2737,18 +2784,18 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for
(
int
i
=
0
;
i
<
force
.
getNumPerParticleParameters
();
i
++
)
for
(
int
i
=
0
;
i
<
force
.
getNumPerParticleParameters
();
i
++
)
variables
[
force
.
getPerParticleParameterName
(
i
)]
=
"params"
+
params
->
getParameterSuffix
(
i
,
"[index]"
);
variables
[
force
.
getPerParticleParameterName
(
i
)]
=
"params"
+
params
->
getParameterSuffix
(
i
,
"[index]"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
variables
[
force
.
getGlobalParameterName
(
i
)]
=
"globals["
+
intToString
(
i
)
+
"]"
;
variables
[
force
.
getGlobalParameterName
(
i
)]
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
variables
[
computedValueNames
[
i
]]
=
"values"
+
computedValues
->
getParameterSuffix
(
i
,
"[index]"
);
variables
[
computedValueNames
[
i
]]
=
"values"
+
computedValues
->
getParameterSuffix
(
i
,
"[index]"
);
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
string
is
=
intToString
(
i
);
string
is
=
cl
.
intToString
(
i
);
compute
<<
"float4 dV"
<<
is
<<
"dR = (float4) 0;
\n
"
;
compute
<<
"float4 dV"
<<
is
<<
"dR = (float4) 0;
\n
"
;
for
(
int
j
=
1
;
j
<
i
;
j
++
)
{
for
(
int
j
=
1
;
j
<
i
;
j
++
)
{
if
(
!
isZeroExpression
(
valueDerivExpressions
[
i
][
j
]))
{
if
(
!
isZeroExpression
(
valueDerivExpressions
[
i
][
j
]))
{
map
<
string
,
Lepton
::
ParsedExpression
>
derivExpressions
;
map
<
string
,
Lepton
::
ParsedExpression
>
derivExpressions
;
string
js
=
intToString
(
j
);
string
js
=
cl
.
intToString
(
j
);
derivExpressions
[
"float dV"
+
is
+
"dV"
+
js
+
" = "
]
=
valueDerivExpressions
[
i
][
j
];
derivExpressions
[
"float dV"
+
is
+
"dV"
+
js
+
" = "
]
=
valueDerivExpressions
[
i
][
j
];
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
derivExpressions
,
variables
,
functionDefinitions
,
"temp_"
+
is
+
"_"
+
js
,
prefix
+
"functionParams"
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
derivExpressions
,
variables
,
functionDefinitions
,
"temp_"
+
is
+
"_"
+
js
,
prefix
+
"functionParams"
);
compute
<<
"dV"
<<
is
<<
"dR += dV"
<<
is
<<
"dV"
<<
js
<<
"*dV"
<<
js
<<
"dR;
\n
"
;
compute
<<
"dV"
<<
is
<<
"dR += dV"
<<
is
<<
"dV"
<<
js
<<
"*dV"
<<
js
<<
"dR;
\n
"
;
}
}
}
}
...
@@ -2759,17 +2806,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2759,17 +2806,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
gradientExpressions
[
"dV"
+
is
+
"dR.y += "
]
=
valueGradientExpressions
[
i
][
1
];
gradientExpressions
[
"dV"
+
is
+
"dR.y += "
]
=
valueGradientExpressions
[
i
][
1
];
if
(
!
isZeroExpression
(
valueGradientExpressions
[
i
][
2
]))
if
(
!
isZeroExpression
(
valueGradientExpressions
[
i
][
2
]))
gradientExpressions
[
"dV"
+
is
+
"dR.z += "
]
=
valueGradientExpressions
[
i
][
2
];
gradientExpressions
[
"dV"
+
is
+
"dR.z += "
]
=
valueGradientExpressions
[
i
][
2
];
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
gradientExpressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
gradientExpressions
,
variables
,
functionDefinitions
,
"temp"
,
prefix
+
"functionParams"
);
}
}
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
string
is
=
intToString
(
i
);
string
is
=
cl
.
intToString
(
i
);
compute
<<
"force -= deriv"
<<
energyDerivs
->
getParameterSuffix
(
i
)
<<
"*dV"
<<
is
<<
"dR;
\n
"
;
compute
<<
"force -= deriv"
<<
energyDerivs
->
getParameterSuffix
(
i
)
<<
"*dV"
<<
is
<<
"dR;
\n
"
;
}
}
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
replacements
[
"COMPUTE_FORCES"
]
=
compute
.
str
();
replacements
[
"COMPUTE_FORCES"
]
=
compute
.
str
();
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
replaceStrings
(
OpenCLKernelSources
::
customGBGradientChainRule
,
replacements
),
defines
);
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
replaceStrings
(
OpenCLKernelSources
::
customGBGradientChainRule
,
replacements
),
defines
);
gradientChainRuleKernel
=
cl
::
Kernel
(
program
,
"computeGradientChainRuleTerms"
);
gradientChainRuleKernel
=
cl
::
Kernel
(
program
,
"computeGradientChainRuleTerms"
);
}
}
...
@@ -2779,7 +2826,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2779,7 +2826,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
globalVariables
;
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
globalVariables
;
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
"globals["
+
intToString
(
i
)
+
"]"
;
string
value
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
globalVariables
.
push_back
(
makeVariable
(
name
,
prefix
+
value
));
globalVariables
.
push_back
(
makeVariable
(
name
,
prefix
+
value
));
}
}
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
variables
=
globalVariables
;
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
variables
=
globalVariables
;
...
@@ -2800,7 +2847,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2800,7 +2847,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
Lepton
::
ParsedExpression
dVdR
=
Lepton
::
Parser
::
parse
(
computedValueExpressions
[
0
],
functions
).
differentiate
(
"r"
).
optimize
();
Lepton
::
ParsedExpression
dVdR
=
Lepton
::
Parser
::
parse
(
computedValueExpressions
[
0
],
functions
).
differentiate
(
"r"
).
optimize
();
derivExpressions
[
"float dV0dR1 = "
]
=
dVdR
;
derivExpressions
[
"float dV0dR1 = "
]
=
dVdR
;
derivExpressions
[
"float dV0dR2 = "
]
=
dVdR
.
renameVariables
(
rename
);
derivExpressions
[
"float dV0dR2 = "
]
=
dVdR
.
renameVariables
(
rename
);
chainSource
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
derivExpressions
,
variables
,
functionDefinitions
,
prefix
+
"temp0_"
,
prefix
+
"functionParams"
);
chainSource
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
derivExpressions
,
variables
,
functionDefinitions
,
prefix
+
"temp0_"
,
prefix
+
"functionParams"
);
if
(
needChainForValue
[
0
])
{
if
(
needChainForValue
[
0
])
{
if
(
useExclusionsForValue
)
if
(
useExclusionsForValue
)
chainSource
<<
"if (!isExcluded) {
\n
"
;
chainSource
<<
"if (!isExcluded) {
\n
"
;
...
@@ -2823,20 +2870,20 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2823,20 +2870,20 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
vector
<
OpenCLNonbondedUtilities
::
ParameterInfo
>
arguments
;
vector
<
OpenCLNonbondedUtilities
::
ParameterInfo
>
arguments
;
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
prefix
+
"params"
+
intToString
(
i
+
1
);
string
paramName
=
prefix
+
"params"
+
cl
.
intToString
(
i
+
1
);
if
(
chainStr
.
find
(
paramName
+
"1"
)
!=
chainStr
.
npos
||
chainStr
.
find
(
paramName
+
"2"
)
!=
chainStr
.
npos
)
if
(
chainStr
.
find
(
paramName
+
"1"
)
!=
chainStr
.
npos
||
chainStr
.
find
(
paramName
+
"2"
)
!=
chainStr
.
npos
)
parameters
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
parameters
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
}
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
paramName
=
prefix
+
"values"
+
intToString
(
i
+
1
);
string
paramName
=
prefix
+
"values"
+
cl
.
intToString
(
i
+
1
);
if
(
chainStr
.
find
(
paramName
+
"1"
)
!=
chainStr
.
npos
||
chainStr
.
find
(
paramName
+
"2"
)
!=
chainStr
.
npos
)
if
(
chainStr
.
find
(
paramName
+
"1"
)
!=
chainStr
.
npos
||
chainStr
.
find
(
paramName
+
"2"
)
!=
chainStr
.
npos
)
parameters
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
parameters
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
}
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
if
(
needChainForValue
[
i
])
{
if
(
needChainForValue
[
i
])
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
string
paramName
=
prefix
+
"dEdV"
+
intToString
(
i
+
1
);
string
paramName
=
prefix
+
"dEdV"
+
cl
.
intToString
(
i
+
1
);
parameters
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
parameters
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
}
}
}
...
@@ -2852,11 +2899,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2852,11 +2899,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
}
}
cl
.
addForce
(
new
OpenCLCustomGBForceInfo
(
cl
.
getNonbondedUtilities
().
getNumForceBuffers
(),
force
));
cl
.
addForce
(
new
OpenCLCustomGBForceInfo
(
cl
.
getNonbondedUtilities
().
getNumForceBuffers
(),
force
));
if
(
useLong
)
if
(
useLong
)
cl
.
addAutoclearBuffer
(
longEnergyDerivs
->
getDeviceBuffer
(),
2
*
longEnergyDerivs
->
getSize
()
);
cl
.
addAutoclearBuffer
(
*
longEnergyDerivs
);
else
{
else
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
cl
.
addAutoclearBuffer
(
buffer
.
getMemory
(),
buffer
.
getSize
()
*
energyDerivs
->
getNumObjects
()
/
sizeof
(
cl_float
)
);
cl
.
addAutoclearBuffer
(
buffer
.
getMemory
(),
buffer
.
getSize
()
*
energyDerivs
->
getNumObjects
());
}
}
}
}
}
}
...
@@ -2870,12 +2917,12 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -2870,12 +2917,12 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
bool
useLong
=
(
cl
.
getSupports64BitGlobalAtomics
()
&&
!
deviceIsCpu
);
bool
useLong
=
(
cl
.
getSupports64BitGlobalAtomics
()
&&
!
deviceIsCpu
);
if
(
useLong
)
{
if
(
useLong
)
{
longValueBuffers
=
OpenCLArray
::
create
<
cl_long
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"customGBLongValueBuffers"
);
longValueBuffers
=
OpenCLArray
::
create
<
cl_long
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"customGBLongValueBuffers"
);
cl
.
addAutoclearBuffer
(
longValueBuffers
->
getDeviceBuffer
(),
2
*
longValueBuffers
->
getSize
()
);
cl
.
addAutoclearBuffer
(
*
longValueBuffers
);
cl
.
clearBuffer
(
longValueBuffers
->
getDeviceBuffer
(),
2
*
longValueBuffers
->
getSize
()
);
cl
.
clearBuffer
(
*
longValueBuffers
);
}
}
else
{
else
{
valueBuffers
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
()
*
nb
.
getNumForceBuffers
(),
"customGBValueBuffers"
);
valueBuffers
=
OpenCLArray
::
create
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
()
*
nb
.
getNumForceBuffers
(),
"customGBValueBuffers"
);
cl
.
addAutoclearBuffer
(
valueBuffers
->
getDeviceBuffer
(),
valueBuffers
->
getSize
()
);
cl
.
addAutoclearBuffer
(
*
valueBuffers
);
cl
.
clearBuffer
(
*
valueBuffers
);
cl
.
clearBuffer
(
*
valueBuffers
);
}
}
int
index
=
0
;
int
index
=
0
;
...
@@ -3151,9 +3198,9 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
...
@@ -3151,9 +3198,9 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
Lepton
::
ParsedExpression
forceExpressionZ
=
energyExpression
.
differentiate
(
"z"
).
optimize
();
Lepton
::
ParsedExpression
forceExpressionZ
=
energyExpression
.
differentiate
(
"z"
).
optimize
();
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
expressions
[
"energy += "
]
=
energyExpression
;
expressions
[
"energy += "
]
=
energyExpression
;
expressions
[
"
float
dEdX = "
]
=
forceExpressionX
;
expressions
[
"
real
dEdX = "
]
=
forceExpressionX
;
expressions
[
"
float
dEdY = "
]
=
forceExpressionY
;
expressions
[
"
real
dEdY = "
]
=
forceExpressionY
;
expressions
[
"
float
dEdZ = "
]
=
forceExpressionZ
;
expressions
[
"
real
dEdZ = "
]
=
forceExpressionZ
;
// Create the kernels.
// Create the kernels.
...
@@ -3171,7 +3218,7 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
...
@@ -3171,7 +3218,7 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
string
argName
=
cl
.
getBondedUtilities
().
addArgument
(
globals
->
getDeviceBuffer
(),
"float"
);
string
argName
=
cl
.
getBondedUtilities
().
addArgument
(
globals
->
getDeviceBuffer
(),
"float"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
argName
+
"["
+
intToString
(
i
)
+
"]"
;
string
value
=
argName
+
"["
+
cl
.
intToString
(
i
)
+
"]"
;
variables
[
name
]
=
value
;
variables
[
name
]
=
value
;
}
}
}
}
...
@@ -3182,7 +3229,7 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
...
@@ -3182,7 +3229,7 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
compute
<<
buffer
.
getType
()
<<
" particleParams"
<<
(
i
+
1
)
<<
" = "
<<
argName
<<
"[index];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" particleParams"
<<
(
i
+
1
)
<<
" = "
<<
argName
<<
"[index];
\n
"
;
}
}
vector
<
pair
<
string
,
string
>
>
functions
;
vector
<
pair
<
string
,
string
>
>
functions
;
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
cl
.
replaceStrings
(
OpenCLKernelSources
::
customExternalForce
,
replacements
),
force
.
getForceGroup
());
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
cl
.
replaceStrings
(
OpenCLKernelSources
::
customExternalForce
,
replacements
),
force
.
getForceGroup
());
...
@@ -3455,11 +3502,11 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -3455,11 +3502,11 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
vector
<
double
>
values
;
vector
<
double
>
values
;
double
min
,
max
;
double
min
,
max
;
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
string
arrayName
=
"table"
+
intToString
(
i
);
string
arrayName
=
"table"
+
cl
.
intToString
(
i
);
functionDefinitions
.
push_back
(
make_pair
(
name
,
arrayName
));
functionDefinitions
.
push_back
(
make_pair
(
name
,
arrayName
));
functions
[
name
]
=
&
fp
;
functions
[
name
]
=
&
fp
;
tabulatedFunctionParamsVec
[
i
]
=
mm_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
tabulatedFunctionParamsVec
[
i
]
=
mm_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
vector
<
mm_float4
>
f
=
OpenCL
ExpressionUtilities
::
computeFunctionCoefficients
(
values
,
min
,
max
);
vector
<
mm_float4
>
f
=
cl
.
get
ExpressionUtilities
().
computeFunctionCoefficients
(
values
,
min
,
max
);
tabulatedFunctions
.
push_back
(
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
.
push_back
(
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
tableArgs
<<
", __global const float4* restrict "
<<
arrayName
;
tableArgs
<<
", __global const float4* restrict "
<<
arrayName
;
...
@@ -3491,7 +3538,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -3491,7 +3538,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
}
}
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
variables
[
name
]
=
"globals["
+
intToString
(
i
)
+
"]"
;
variables
[
name
]
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
}
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
// Now to generate the kernel. First, it needs to calculate all distances, angles,
...
@@ -3512,12 +3559,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -3512,12 +3559,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
const
vector
<
int
>&
atoms
=
iter
->
second
;
const
vector
<
int
>&
atoms
=
iter
->
second
;
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
if
(
computedDeltas
.
count
(
deltaName
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 delta"
+
deltaName
+
" = delta("
+
atomNamesLower
[
atoms
[
0
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 delta"
+
deltaName
+
" = delta("
+
atomNamesLower
[
atoms
[
0
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName
);
computedDeltas
.
insert
(
deltaName
);
}
}
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
r_"
+
deltaName
+
" =
sqrt
(delta"
+
deltaName
+
".w);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
r_"
+
deltaName
+
" =
SQRT
(delta"
+
deltaName
+
".w);
\n
"
);
variables
[
iter
->
first
]
=
"r_"
+
deltaName
;
variables
[
iter
->
first
]
=
"r_"
+
deltaName
;
forceExpressions
[
"
float
dEdDistance"
+
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
forceExpressions
[
"
real
dEdDistance"
+
cl
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
}
index
=
0
;
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
angles
.
begin
();
iter
!=
angles
.
end
();
++
iter
,
++
index
)
{
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
angles
.
begin
();
iter
!=
angles
.
end
();
++
iter
,
++
index
)
{
...
@@ -3526,16 +3573,16 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -3526,16 +3573,16 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
string
angleName
=
"angle_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
string
angleName
=
"angle_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 delta"
+
deltaName1
+
" = delta("
+
atomNamesLower
[
atoms
[
1
]]
+
", "
+
atomNamesLower
[
atoms
[
0
]]
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 delta"
+
deltaName1
+
" = delta("
+
atomNamesLower
[
atoms
[
1
]]
+
", "
+
atomNamesLower
[
atoms
[
0
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName1
);
computedDeltas
.
insert
(
deltaName1
);
}
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 delta"
+
deltaName2
+
" = delta("
+
atomNamesLower
[
atoms
[
1
]]
+
", "
+
atomNamesLower
[
atoms
[
2
]]
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 delta"
+
deltaName2
+
" = delta("
+
atomNamesLower
[
atoms
[
1
]]
+
", "
+
atomNamesLower
[
atoms
[
2
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName2
);
computedDeltas
.
insert
(
deltaName2
);
}
}
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
"
+
angleName
+
" = computeAngle(delta"
+
deltaName1
+
", delta"
+
deltaName2
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
"
+
angleName
+
" = computeAngle(delta"
+
deltaName1
+
", delta"
+
deltaName2
+
");
\n
"
);
variables
[
iter
->
first
]
=
angleName
;
variables
[
iter
->
first
]
=
angleName
;
forceExpressions
[
"
float
dEdAngle"
+
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
forceExpressions
[
"
real
dEdAngle"
+
cl
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
}
index
=
0
;
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
dihedrals
.
begin
();
iter
!=
dihedrals
.
end
();
++
iter
,
++
index
)
{
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
dihedrals
.
begin
();
iter
!=
dihedrals
.
end
();
++
iter
,
++
index
)
{
...
@@ -3547,23 +3594,23 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -3547,23 +3594,23 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
string
dihedralName
=
"dihedral_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
3
]];
string
dihedralName
=
"dihedral_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
3
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 delta"
+
deltaName1
+
" = delta("
+
atomNamesLower
[
atoms
[
0
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 delta"
+
deltaName1
+
" = delta("
+
atomNamesLower
[
atoms
[
0
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName1
);
computedDeltas
.
insert
(
deltaName1
);
}
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 delta"
+
deltaName2
+
" = delta("
+
atomNamesLower
[
atoms
[
2
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 delta"
+
deltaName2
+
" = delta("
+
atomNamesLower
[
atoms
[
2
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName2
);
computedDeltas
.
insert
(
deltaName2
);
}
}
if
(
computedDeltas
.
count
(
deltaName3
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName3
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 delta"
+
deltaName3
+
" = delta("
+
atomNamesLower
[
atoms
[
2
]]
+
", "
+
atomNamesLower
[
atoms
[
3
]]
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 delta"
+
deltaName3
+
" = delta("
+
atomNamesLower
[
atoms
[
2
]]
+
", "
+
atomNamesLower
[
atoms
[
3
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName3
);
computedDeltas
.
insert
(
deltaName3
);
}
}
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 "
+
crossName1
+
" = computeCross(delta"
+
deltaName1
+
", delta"
+
deltaName2
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 "
+
crossName1
+
" = computeCross(delta"
+
deltaName1
+
", delta"
+
deltaName2
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 "
+
crossName2
+
" = computeCross(delta"
+
deltaName2
+
", delta"
+
deltaName3
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 "
+
crossName2
+
" = computeCross(delta"
+
deltaName2
+
", delta"
+
deltaName3
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
"
+
dihedralName
+
" = computeAngle("
+
crossName1
+
", "
+
crossName2
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
"
+
dihedralName
+
" = computeAngle("
+
crossName1
+
", "
+
crossName2
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
dihedralName
+
" *= (delta"
+
deltaName1
+
".x*"
+
crossName2
+
".x + delta"
+
deltaName1
+
".y*"
+
crossName2
+
".y + delta"
+
deltaName1
+
".z*"
+
crossName2
+
".z < 0 ? -1 : 1);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
dihedralName
+
" *= (delta"
+
deltaName1
+
".x*"
+
crossName2
+
".x + delta"
+
deltaName1
+
".y*"
+
crossName2
+
".y + delta"
+
deltaName1
+
".z*"
+
crossName2
+
".z < 0 ? -1 : 1);
\n
"
);
variables
[
iter
->
first
]
=
dihedralName
;
variables
[
iter
->
first
]
=
dihedralName
;
forceExpressions
[
"
float
dEdDihedral"
+
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
forceExpressions
[
"
real
dEdDihedral"
+
cl
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
}
// Next it needs to load parameters from global memory.
// Next it needs to load parameters from global memory.
...
@@ -3573,19 +3620,19 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -3573,19 +3620,19 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
for
(
int
i
=
0
;
i
<
(
int
)
donorParams
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
donorParams
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
donorParams
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
donorParams
->
getBuffers
()[
i
];
extraArgs
<<
", __global const "
+
buffer
.
getType
()
+
"* restrict donor"
+
buffer
.
getName
();
extraArgs
<<
", __global const "
+
buffer
.
getType
()
+
"* restrict donor"
+
buffer
.
getName
();
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
buffer
.
getType
()
+
" donorParams"
+
intToString
(
i
+
1
)
+
" = donor"
+
buffer
.
getName
()
+
"[index];
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
buffer
.
getType
()
+
" donorParams"
+
cl
.
intToString
(
i
+
1
)
+
" = donor"
+
buffer
.
getName
()
+
"[index];
\n
"
);
}
}
for
(
int
i
=
0
;
i
<
(
int
)
acceptorParams
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
acceptorParams
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
acceptorParams
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
acceptorParams
->
getBuffers
()[
i
];
extraArgs
<<
", __global const "
+
buffer
.
getType
()
+
"* restrict acceptor"
+
buffer
.
getName
();
extraArgs
<<
", __global const "
+
buffer
.
getType
()
+
"* restrict acceptor"
+
buffer
.
getName
();
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
buffer
.
getType
()
+
" acceptorParams"
+
intToString
(
i
+
1
)
+
" = acceptor"
+
buffer
.
getName
()
+
"[index];
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
buffer
.
getType
()
+
" acceptorParams"
+
cl
.
intToString
(
i
+
1
)
+
" = acceptor"
+
buffer
.
getName
()
+
"[index];
\n
"
);
}
}
// Now evaluate the expressions.
// Now evaluate the expressions.
computeAcceptor
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
"functionParams"
);
computeAcceptor
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
"functionParams"
);
forceExpressions
[
"energy += "
]
=
energyExpression
;
forceExpressions
[
"energy += "
]
=
energyExpression
;
computeDonor
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
"functionParams"
);
computeDonor
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
"functionParams"
);
// Finally, apply forces to atoms.
// Finally, apply forces to atoms.
...
@@ -3593,7 +3640,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -3593,7 +3640,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
distances
.
begin
();
iter
!=
distances
.
end
();
++
iter
,
++
index
)
{
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
distances
.
begin
();
iter
!=
distances
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
atoms
=
iter
->
second
;
const
vector
<
int
>&
atoms
=
iter
->
second
;
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
string
value
=
"(dEdDistance"
+
intToString
(
index
)
+
"/r_"
+
deltaName
+
")*delta"
+
deltaName
+
".xyz"
;
string
value
=
"(dEdDistance"
+
cl
.
intToString
(
index
)
+
"/r_"
+
deltaName
+
")*delta"
+
deltaName
+
".xyz"
;
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
0
],
"-"
+
value
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
0
],
"-"
+
value
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
1
],
value
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
1
],
value
);
}
}
...
@@ -3603,11 +3650,11 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -3603,11 +3650,11 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string
deltaName1
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
0
]];
string
deltaName1
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
0
]];
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"{
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"{
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 crossProd = cross(delta"
+
deltaName2
+
", delta"
+
deltaName1
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 crossProd = cross(delta"
+
deltaName2
+
", delta"
+
deltaName1
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
lengthCross = max(length(crossProd), 1e-6f);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
lengthCross = max(length(crossProd),
(real)
1e-6f);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 deltaCross0 = -cross(delta"
+
deltaName1
+
", crossProd)*dEdAngle"
+
intToString
(
index
)
+
"/(delta"
+
deltaName1
+
".w*lengthCross);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 deltaCross0 = -cross(delta"
+
deltaName1
+
", crossProd)*dEdAngle"
+
cl
.
intToString
(
index
)
+
"/(delta"
+
deltaName1
+
".w*lengthCross);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 deltaCross2 = cross(delta"
+
deltaName2
+
", crossProd)*dEdAngle"
+
intToString
(
index
)
+
"/(delta"
+
deltaName2
+
".w*lengthCross);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 deltaCross2 = cross(delta"
+
deltaName2
+
", crossProd)*dEdAngle"
+
cl
.
intToString
(
index
)
+
"/(delta"
+
deltaName2
+
".w*lengthCross);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 deltaCross1 = -(deltaCross0+deltaCross2);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 deltaCross1 = -(deltaCross0+deltaCross2);
\n
"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
0
],
"deltaCross0.xyz"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
0
],
"deltaCross0.xyz"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
1
],
"deltaCross1.xyz"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
1
],
"deltaCross1.xyz"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
2
],
"deltaCross2.xyz"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
2
],
"deltaCross2.xyz"
);
...
@@ -3622,15 +3669,15 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -3622,15 +3669,15 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string
crossName1
=
"cross_"
+
deltaName1
+
"_"
+
deltaName2
;
string
crossName1
=
"cross_"
+
deltaName1
+
"_"
+
deltaName2
;
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"{
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"{
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
r =
sqrt
(delta"
+
deltaName2
+
".w);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
r =
SQRT
(delta"
+
deltaName2
+
".w);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 ff;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 ff;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.x = (-dEdDihedral"
+
intToString
(
index
)
+
"*r)/"
+
crossName1
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.x = (-dEdDihedral"
+
cl
.
intToString
(
index
)
+
"*r)/"
+
crossName1
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.y = (delta"
+
deltaName1
+
".x*delta"
+
deltaName2
+
".x + delta"
+
deltaName1
+
".y*delta"
+
deltaName2
+
".y + delta"
+
deltaName1
+
".z*delta"
+
deltaName2
+
".z)/delta"
+
deltaName2
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.y = (delta"
+
deltaName1
+
".x*delta"
+
deltaName2
+
".x + delta"
+
deltaName1
+
".y*delta"
+
deltaName2
+
".y + delta"
+
deltaName1
+
".z*delta"
+
deltaName2
+
".z)/delta"
+
deltaName2
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.z = (delta"
+
deltaName3
+
".x*delta"
+
deltaName2
+
".x + delta"
+
deltaName3
+
".y*delta"
+
deltaName2
+
".y + delta"
+
deltaName3
+
".z*delta"
+
deltaName2
+
".z)/delta"
+
deltaName2
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.z = (delta"
+
deltaName3
+
".x*delta"
+
deltaName2
+
".x + delta"
+
deltaName3
+
".y*delta"
+
deltaName2
+
".y + delta"
+
deltaName3
+
".z*delta"
+
deltaName2
+
".z)/delta"
+
deltaName2
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.w = (dEdDihedral"
+
intToString
(
index
)
+
"*r)/"
+
crossName2
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.w = (dEdDihedral"
+
cl
.
intToString
(
index
)
+
"*r)/"
+
crossName2
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 internalF0 = ff.x*"
+
crossName1
+
";
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 internalF0 = ff.x*"
+
crossName1
+
";
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 internalF3 = ff.w*"
+
crossName2
+
";
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 internalF3 = ff.w*"
+
crossName2
+
";
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
float
4 s = ff.y*internalF0 - ff.z*internalF3;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"
real
4 s = ff.y*internalF0 - ff.z*internalF3;
\n
"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
0
],
"internalF0.xyz"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
0
],
"internalF0.xyz"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
1
],
"s.xyz-internalF0.xyz"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
1
],
"s.xyz-internalF0.xyz"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
2
],
"-s.xyz-internalF3.xyz"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
2
],
"-s.xyz-internalF3.xyz"
);
...
@@ -3645,13 +3692,13 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -3645,13 +3692,13 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
replacements
[
"COMPUTE_ACCEPTOR_FORCE"
]
=
computeAcceptor
.
str
();
replacements
[
"COMPUTE_ACCEPTOR_FORCE"
]
=
computeAcceptor
.
str
();
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_DONORS"
]
=
intToString
(
numDonors
);
defines
[
"NUM_DONORS"
]
=
cl
.
intToString
(
numDonors
);
defines
[
"NUM_ACCEPTORS"
]
=
intToString
(
numAcceptors
);
defines
[
"NUM_ACCEPTORS"
]
=
cl
.
intToString
(
numAcceptors
);
defines
[
"PI"
]
=
doubleToString
(
M_PI
);
defines
[
"PI"
]
=
cl
.
doubleToString
(
M_PI
);
if
(
force
.
getNonbondedMethod
()
!=
CustomHbondForce
::
NoCutoff
)
{
if
(
force
.
getNonbondedMethod
()
!=
CustomHbondForce
::
NoCutoff
)
{
defines
[
"USE_CUTOFF"
]
=
"1"
;
defines
[
"USE_CUTOFF"
]
=
"1"
;
defines
[
"CUTOFF_SQUARED"
]
=
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
());
defines
[
"CUTOFF_SQUARED"
]
=
cl
.
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
());
}
}
if
(
force
.
getNonbondedMethod
()
!=
CustomHbondForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
CustomHbondForce
::
CutoffNonPeriodic
)
if
(
force
.
getNonbondedMethod
()
!=
CustomHbondForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
CustomHbondForce
::
CutoffNonPeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines
[
"USE_PERIODIC"
]
=
"1"
;
...
@@ -3729,11 +3776,11 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
...
@@ -3729,11 +3776,11 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
acceptorKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
tabulatedFunctionParams
->
getDeviceBuffer
());
acceptorKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
tabulatedFunctionParams
->
getDeviceBuffer
());
}
}
}
}
donorKernel
.
setArg
<
mm_float4
>
(
8
,
cl
.
getPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
cl
,
donorKernel
,
8
);
donorKernel
.
setArg
<
mm_float4
>
(
9
,
cl
.
getInvPeriodicBoxSize
()
);
setInvPeriodicBoxSizeArg
(
cl
,
donorKernel
,
9
);
cl
.
executeKernel
(
donorKernel
,
max
(
numDonors
,
numAcceptors
));
cl
.
executeKernel
(
donorKernel
,
max
(
numDonors
,
numAcceptors
));
acceptorKernel
.
setArg
<
mm_float4
>
(
8
,
cl
.
getPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
cl
,
acceptorKernel
,
8
);
acceptorKernel
.
setArg
<
mm_float4
>
(
9
,
cl
.
getInvPeriodicBoxSize
()
);
setInvPeriodicBoxSizeArg
(
cl
,
acceptorKernel
,
9
);
cl
.
executeKernel
(
acceptorKernel
,
max
(
numDonors
,
numAcceptors
));
cl
.
executeKernel
(
acceptorKernel
,
max
(
numDonors
,
numAcceptors
));
return
0.0
;
return
0.0
;
}
}
...
@@ -3848,7 +3895,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
...
@@ -3848,7 +3895,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
functions
[
name
]
=
&
fp
;
functions
[
name
]
=
&
fp
;
tabulatedFunctionParamsVec
[
i
]
=
mm_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
tabulatedFunctionParamsVec
[
i
]
=
mm_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
vector
<
mm_float4
>
f
=
OpenCL
ExpressionUtilities
::
computeFunctionCoefficients
(
values
,
min
,
max
);
vector
<
mm_float4
>
f
=
cl
.
get
ExpressionUtilities
().
computeFunctionCoefficients
(
values
,
min
,
max
);
OpenCLArray
*
array
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
);
OpenCLArray
*
array
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
);
tabulatedFunctions
.
push_back
(
array
);
tabulatedFunctions
.
push_back
(
array
);
array
->
upload
(
f
);
array
->
upload
(
f
);
...
@@ -3872,7 +3919,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
...
@@ -3872,7 +3919,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
}
}
map
<
string
,
string
>
variables
;
map
<
string
,
string
>
variables
;
for
(
int
i
=
0
;
i
<
particlesPerBond
;
i
++
)
{
for
(
int
i
=
0
;
i
<
particlesPerBond
;
i
++
)
{
string
index
=
intToString
(
i
+
1
);
string
index
=
cl
.
intToString
(
i
+
1
);
variables
[
"x"
+
index
]
=
"pos"
+
index
+
".x"
;
variables
[
"x"
+
index
]
=
"pos"
+
index
+
".x"
;
variables
[
"y"
+
index
]
=
"pos"
+
index
+
".y"
;
variables
[
"y"
+
index
]
=
"pos"
+
index
+
".y"
;
variables
[
"z"
+
index
]
=
"pos"
+
index
+
".z"
;
variables
[
"z"
+
index
]
=
"pos"
+
index
+
".z"
;
...
@@ -3887,7 +3934,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
...
@@ -3887,7 +3934,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
string
argName
=
cl
.
getBondedUtilities
().
addArgument
(
globals
->
getDeviceBuffer
(),
"float"
);
string
argName
=
cl
.
getBondedUtilities
().
addArgument
(
globals
->
getDeviceBuffer
(),
"float"
);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
argName
+
"["
+
intToString
(
i
)
+
"]"
;
string
value
=
argName
+
"["
+
cl
.
intToString
(
i
)
+
"]"
;
variables
[
name
]
=
value
;
variables
[
name
]
=
value
;
}
}
}
}
...
@@ -3903,7 +3950,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
...
@@ -3903,7 +3950,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
set
<
string
>
computedDeltas
;
set
<
string
>
computedDeltas
;
vector
<
string
>
atomNames
,
posNames
;
vector
<
string
>
atomNames
,
posNames
;
for
(
int
i
=
0
;
i
<
particlesPerBond
;
i
++
)
{
for
(
int
i
=
0
;
i
<
particlesPerBond
;
i
++
)
{
string
index
=
intToString
(
i
+
1
);
string
index
=
cl
.
intToString
(
i
+
1
);
atomNames
.
push_back
(
"P"
+
index
);
atomNames
.
push_back
(
"P"
+
index
);
posNames
.
push_back
(
"pos"
+
index
);
posNames
.
push_back
(
"pos"
+
index
);
}
}
...
@@ -3913,12 +3960,12 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
...
@@ -3913,12 +3960,12 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
const
vector
<
int
>&
atoms
=
iter
->
second
;
const
vector
<
int
>&
atoms
=
iter
->
second
;
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
if
(
computedDeltas
.
count
(
deltaName
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName
)
==
0
)
{
compute
<<
"
float
4 delta"
<<
deltaName
<<
" = ccb_delta("
<<
posNames
[
atoms
[
0
]]
<<
", "
<<
posNames
[
atoms
[
1
]]
<<
");
\n
"
;
compute
<<
"
real
4 delta"
<<
deltaName
<<
" = ccb_delta("
<<
posNames
[
atoms
[
0
]]
<<
", "
<<
posNames
[
atoms
[
1
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName
);
computedDeltas
.
insert
(
deltaName
);
}
}
compute
<<
"
float
r_"
<<
deltaName
<<
" = sqrt(delta"
<<
deltaName
<<
".w);
\n
"
;
compute
<<
"
real
r_"
<<
deltaName
<<
" = sqrt(delta"
<<
deltaName
<<
".w);
\n
"
;
variables
[
iter
->
first
]
=
"r_"
+
deltaName
;
variables
[
iter
->
first
]
=
"r_"
+
deltaName
;
forceExpressions
[
"
float
dEdDistance"
+
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
forceExpressions
[
"
real
dEdDistance"
+
cl
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
}
index
=
0
;
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
angles
.
begin
();
iter
!=
angles
.
end
();
++
iter
,
++
index
)
{
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
angles
.
begin
();
iter
!=
angles
.
end
();
++
iter
,
++
index
)
{
...
@@ -3927,16 +3974,16 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
...
@@ -3927,16 +3974,16 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
string
angleName
=
"angle_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
string
angleName
=
"angle_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
compute
<<
"
float
4 delta"
<<
deltaName1
<<
" = ccb_delta("
<<
posNames
[
atoms
[
1
]]
<<
", "
<<
posNames
[
atoms
[
0
]]
<<
");
\n
"
;
compute
<<
"
real
4 delta"
<<
deltaName1
<<
" = ccb_delta("
<<
posNames
[
atoms
[
1
]]
<<
", "
<<
posNames
[
atoms
[
0
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName1
);
computedDeltas
.
insert
(
deltaName1
);
}
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
compute
<<
"
float
4 delta"
<<
deltaName2
<<
" = ccb_delta("
<<
posNames
[
atoms
[
1
]]
<<
", "
<<
posNames
[
atoms
[
2
]]
<<
");
\n
"
;
compute
<<
"
real
4 delta"
<<
deltaName2
<<
" = ccb_delta("
<<
posNames
[
atoms
[
1
]]
<<
", "
<<
posNames
[
atoms
[
2
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName2
);
computedDeltas
.
insert
(
deltaName2
);
}
}
compute
<<
"
float
"
<<
angleName
<<
" = ccb_computeAngle(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
compute
<<
"
real
"
<<
angleName
<<
" = ccb_computeAngle(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
variables
[
iter
->
first
]
=
angleName
;
variables
[
iter
->
first
]
=
angleName
;
forceExpressions
[
"
float
dEdAngle"
+
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
forceExpressions
[
"
real
dEdAngle"
+
cl
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
}
index
=
0
;
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
dihedrals
.
begin
();
iter
!=
dihedrals
.
end
();
++
iter
,
++
index
)
{
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
dihedrals
.
begin
();
iter
!=
dihedrals
.
end
();
++
iter
,
++
index
)
{
...
@@ -3948,23 +3995,23 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
...
@@ -3948,23 +3995,23 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
string
dihedralName
=
"dihedral_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
3
]];
string
dihedralName
=
"dihedral_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
3
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
compute
<<
"
float
4 delta"
<<
deltaName1
<<
" = ccb_delta("
<<
posNames
[
atoms
[
0
]]
<<
", "
<<
posNames
[
atoms
[
1
]]
<<
");
\n
"
;
compute
<<
"
real
4 delta"
<<
deltaName1
<<
" = ccb_delta("
<<
posNames
[
atoms
[
0
]]
<<
", "
<<
posNames
[
atoms
[
1
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName1
);
computedDeltas
.
insert
(
deltaName1
);
}
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
compute
<<
"
float
4 delta"
<<
deltaName2
<<
" = ccb_delta("
<<
posNames
[
atoms
[
2
]]
<<
", "
<<
posNames
[
atoms
[
1
]]
<<
");
\n
"
;
compute
<<
"
real
4 delta"
<<
deltaName2
<<
" = ccb_delta("
<<
posNames
[
atoms
[
2
]]
<<
", "
<<
posNames
[
atoms
[
1
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName2
);
computedDeltas
.
insert
(
deltaName2
);
}
}
if
(
computedDeltas
.
count
(
deltaName3
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName3
)
==
0
)
{
compute
<<
"
float
4 delta"
<<
deltaName3
<<
" = ccb_delta("
<<
posNames
[
atoms
[
2
]]
<<
", "
<<
posNames
[
atoms
[
3
]]
<<
");
\n
"
;
compute
<<
"
real
4 delta"
<<
deltaName3
<<
" = ccb_delta("
<<
posNames
[
atoms
[
2
]]
<<
", "
<<
posNames
[
atoms
[
3
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName3
);
computedDeltas
.
insert
(
deltaName3
);
}
}
compute
<<
"
float
4 "
<<
crossName1
<<
" = ccb_computeCross(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
compute
<<
"
real
4 "
<<
crossName1
<<
" = ccb_computeCross(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
compute
<<
"
float
4 "
<<
crossName2
<<
" = ccb_computeCross(delta"
<<
deltaName2
<<
", delta"
<<
deltaName3
<<
");
\n
"
;
compute
<<
"
real
4 "
<<
crossName2
<<
" = ccb_computeCross(delta"
<<
deltaName2
<<
", delta"
<<
deltaName3
<<
");
\n
"
;
compute
<<
"
float
"
<<
dihedralName
<<
" = ccb_computeAngle("
<<
crossName1
<<
", "
<<
crossName2
<<
");
\n
"
;
compute
<<
"
real
"
<<
dihedralName
<<
" = ccb_computeAngle("
<<
crossName1
<<
", "
<<
crossName2
<<
");
\n
"
;
compute
<<
dihedralName
<<
" *= (delta"
<<
deltaName1
<<
".x*"
<<
crossName2
<<
".x + delta"
<<
deltaName1
<<
".y*"
<<
crossName2
<<
".y + delta"
<<
deltaName1
<<
".z*"
<<
crossName2
<<
".z < 0 ? -1 : 1);
\n
"
;
compute
<<
dihedralName
<<
" *= (delta"
<<
deltaName1
<<
".x*"
<<
crossName2
<<
".x + delta"
<<
deltaName1
<<
".y*"
<<
crossName2
<<
".y + delta"
<<
deltaName1
<<
".z*"
<<
crossName2
<<
".z < 0 ? -1 : 1);
\n
"
;
variables
[
iter
->
first
]
=
dihedralName
;
variables
[
iter
->
first
]
=
dihedralName
;
forceExpressions
[
"
float
dEdDihedral"
+
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
forceExpressions
[
"
real
dEdDihedral"
+
cl
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
}
// Now evaluate the expressions.
// Now evaluate the expressions.
...
@@ -3975,16 +4022,16 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
...
@@ -3975,16 +4022,16 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
compute
<<
buffer
.
getType
()
<<
" bondParams"
<<
(
i
+
1
)
<<
" = "
<<
argName
<<
"[index];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" bondParams"
<<
(
i
+
1
)
<<
" = "
<<
argName
<<
"[index];
\n
"
;
}
}
forceExpressions
[
"energy += "
]
=
energyExpression
;
forceExpressions
[
"energy += "
]
=
energyExpression
;
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
functionParamsName
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
functionParamsName
);
// Finally, apply forces to atoms.
// Finally, apply forces to atoms.
vector
<
string
>
forceNames
;
vector
<
string
>
forceNames
;
for
(
int
i
=
0
;
i
<
particlesPerBond
;
i
++
)
{
for
(
int
i
=
0
;
i
<
particlesPerBond
;
i
++
)
{
string
istr
=
intToString
(
i
+
1
);
string
istr
=
cl
.
intToString
(
i
+
1
);
string
forceName
=
"force"
+
istr
;
string
forceName
=
"force"
+
istr
;
forceNames
.
push_back
(
forceName
);
forceNames
.
push_back
(
forceName
);
compute
<<
"
float
4 "
<<
forceName
<<
" = (
float4) (0.0f, 0.0f, 0.0f, 0.0f)
;
\n
"
;
compute
<<
"
real
4 "
<<
forceName
<<
" = (
real4) 0
;
\n
"
;
compute
<<
"{
\n
"
;
compute
<<
"{
\n
"
;
Lepton
::
ParsedExpression
forceExpressionX
=
energyExpression
.
differentiate
(
"x"
+
istr
).
optimize
();
Lepton
::
ParsedExpression
forceExpressionX
=
energyExpression
.
differentiate
(
"x"
+
istr
).
optimize
();
Lepton
::
ParsedExpression
forceExpressionY
=
energyExpression
.
differentiate
(
"y"
+
istr
).
optimize
();
Lepton
::
ParsedExpression
forceExpressionY
=
energyExpression
.
differentiate
(
"y"
+
istr
).
optimize
();
...
@@ -3997,14 +4044,14 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
...
@@ -3997,14 +4044,14 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
if
(
!
isZeroExpression
(
forceExpressionZ
))
if
(
!
isZeroExpression
(
forceExpressionZ
))
expressions
[
forceName
+
".z -= "
]
=
forceExpressionZ
;
expressions
[
forceName
+
".z -= "
]
=
forceExpressionZ
;
if
(
expressions
.
size
()
>
0
)
if
(
expressions
.
size
()
>
0
)
compute
<<
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functionDefinitions
,
"coordtemp"
,
functionParamsName
);
compute
<<
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functionDefinitions
,
"coordtemp"
,
functionParamsName
);
compute
<<
"}
\n
"
;
compute
<<
"}
\n
"
;
}
}
index
=
0
;
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
distances
.
begin
();
iter
!=
distances
.
end
();
++
iter
,
++
index
)
{
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
distances
.
begin
();
iter
!=
distances
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
atoms
=
iter
->
second
;
const
vector
<
int
>&
atoms
=
iter
->
second
;
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
string
value
=
"(dEdDistance"
+
intToString
(
index
)
+
"/r_"
+
deltaName
+
")*delta"
+
deltaName
+
".xyz"
;
string
value
=
"(dEdDistance"
+
cl
.
intToString
(
index
)
+
"/r_"
+
deltaName
+
")*delta"
+
deltaName
+
".xyz"
;
compute
<<
forceNames
[
atoms
[
0
]]
<<
".xyz += "
<<
"-"
<<
value
<<
";
\n
"
;
compute
<<
forceNames
[
atoms
[
0
]]
<<
".xyz += "
<<
"-"
<<
value
<<
";
\n
"
;
compute
<<
forceNames
[
atoms
[
1
]]
<<
".xyz += "
<<
value
<<
";
\n
"
;
compute
<<
forceNames
[
atoms
[
1
]]
<<
".xyz += "
<<
value
<<
";
\n
"
;
}
}
...
@@ -4014,11 +4061,11 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
...
@@ -4014,11 +4061,11 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
string
deltaName1
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
0
]];
string
deltaName1
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
0
]];
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
compute
<<
"{
\n
"
;
compute
<<
"{
\n
"
;
compute
<<
"
float
4 crossProd = cross(delta"
<<
deltaName2
<<
", delta"
<<
deltaName1
<<
");
\n
"
;
compute
<<
"
real
4 crossProd = cross(delta"
<<
deltaName2
<<
", delta"
<<
deltaName1
<<
");
\n
"
;
compute
<<
"
float
lengthCross = max(length(crossProd), 1e-6f);
\n
"
;
compute
<<
"
real
lengthCross = max(length(crossProd),
(real)
1e-6f);
\n
"
;
compute
<<
"
float
4 deltaCross0 = -cross(delta"
<<
deltaName1
<<
", crossProd)*dEdAngle"
<<
intToString
(
index
)
<<
"/(delta"
<<
deltaName1
<<
".w*lengthCross);
\n
"
;
compute
<<
"
real
4 deltaCross0 = -cross(delta"
<<
deltaName1
<<
", crossProd)*dEdAngle"
<<
cl
.
intToString
(
index
)
<<
"/(delta"
<<
deltaName1
<<
".w*lengthCross);
\n
"
;
compute
<<
"
float
4 deltaCross2 = cross(delta"
<<
deltaName2
<<
", crossProd)*dEdAngle"
<<
intToString
(
index
)
<<
"/(delta"
<<
deltaName2
<<
".w*lengthCross);
\n
"
;
compute
<<
"
real
4 deltaCross2 = cross(delta"
<<
deltaName2
<<
", crossProd)*dEdAngle"
<<
cl
.
intToString
(
index
)
<<
"/(delta"
<<
deltaName2
<<
".w*lengthCross);
\n
"
;
compute
<<
"
float
4 deltaCross1 = -(deltaCross0+deltaCross2);
\n
"
;
compute
<<
"
real
4 deltaCross1 = -(deltaCross0+deltaCross2);
\n
"
;
compute
<<
forceNames
[
atoms
[
0
]]
<<
".xyz += deltaCross0.xyz;
\n
"
;
compute
<<
forceNames
[
atoms
[
0
]]
<<
".xyz += deltaCross0.xyz;
\n
"
;
compute
<<
forceNames
[
atoms
[
1
]]
<<
".xyz += deltaCross1.xyz;
\n
"
;
compute
<<
forceNames
[
atoms
[
1
]]
<<
".xyz += deltaCross1.xyz;
\n
"
;
compute
<<
forceNames
[
atoms
[
2
]]
<<
".xyz += deltaCross2.xyz;
\n
"
;
compute
<<
forceNames
[
atoms
[
2
]]
<<
".xyz += deltaCross2.xyz;
\n
"
;
...
@@ -4033,15 +4080,15 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
...
@@ -4033,15 +4080,15 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
string
crossName1
=
"cross_"
+
deltaName1
+
"_"
+
deltaName2
;
string
crossName1
=
"cross_"
+
deltaName1
+
"_"
+
deltaName2
;
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
compute
<<
"{
\n
"
;
compute
<<
"{
\n
"
;
compute
<<
"
float
r =
sqrt
(delta"
<<
deltaName2
<<
".w);
\n
"
;
compute
<<
"
real
r =
SQRT
(delta"
<<
deltaName2
<<
".w);
\n
"
;
compute
<<
"
float
4 ff;
\n
"
;
compute
<<
"
real
4 ff;
\n
"
;
compute
<<
"ff.x = (-dEdDihedral"
<<
intToString
(
index
)
<<
"*r)/"
<<
crossName1
<<
".w;
\n
"
;
compute
<<
"ff.x = (-dEdDihedral"
<<
cl
.
intToString
(
index
)
<<
"*r)/"
<<
crossName1
<<
".w;
\n
"
;
compute
<<
"ff.y = (delta"
<<
deltaName1
<<
".x*delta"
<<
deltaName2
<<
".x + delta"
<<
deltaName1
<<
".y*delta"
<<
deltaName2
<<
".y + delta"
<<
deltaName1
<<
".z*delta"
<<
deltaName2
<<
".z)/delta"
<<
deltaName2
<<
".w;
\n
"
;
compute
<<
"ff.y = (delta"
<<
deltaName1
<<
".x*delta"
<<
deltaName2
<<
".x + delta"
<<
deltaName1
<<
".y*delta"
<<
deltaName2
<<
".y + delta"
<<
deltaName1
<<
".z*delta"
<<
deltaName2
<<
".z)/delta"
<<
deltaName2
<<
".w;
\n
"
;
compute
<<
"ff.z = (delta"
<<
deltaName3
<<
".x*delta"
<<
deltaName2
<<
".x + delta"
<<
deltaName3
<<
".y*delta"
<<
deltaName2
<<
".y + delta"
<<
deltaName3
<<
".z*delta"
<<
deltaName2
<<
".z)/delta"
<<
deltaName2
<<
".w;
\n
"
;
compute
<<
"ff.z = (delta"
<<
deltaName3
<<
".x*delta"
<<
deltaName2
<<
".x + delta"
<<
deltaName3
<<
".y*delta"
<<
deltaName2
<<
".y + delta"
<<
deltaName3
<<
".z*delta"
<<
deltaName2
<<
".z)/delta"
<<
deltaName2
<<
".w;
\n
"
;
compute
<<
"ff.w = (dEdDihedral"
<<
intToString
(
index
)
<<
"*r)/"
<<
crossName2
<<
".w;
\n
"
;
compute
<<
"ff.w = (dEdDihedral"
<<
cl
.
intToString
(
index
)
<<
"*r)/"
<<
crossName2
<<
".w;
\n
"
;
compute
<<
"
float
4 internalF0 = ff.x*"
<<
crossName1
<<
";
\n
"
;
compute
<<
"
real
4 internalF0 = ff.x*"
<<
crossName1
<<
";
\n
"
;
compute
<<
"
float
4 internalF3 = ff.w*"
<<
crossName2
<<
";
\n
"
;
compute
<<
"
real
4 internalF3 = ff.w*"
<<
crossName2
<<
";
\n
"
;
compute
<<
"
float
4 s = ff.y*internalF0 - ff.z*internalF3;
\n
"
;
compute
<<
"
real
4 s = ff.y*internalF0 - ff.z*internalF3;
\n
"
;
compute
<<
forceNames
[
atoms
[
0
]]
<<
".xyz += internalF0.xyz;
\n
"
;
compute
<<
forceNames
[
atoms
[
0
]]
<<
".xyz += internalF0.xyz;
\n
"
;
compute
<<
forceNames
[
atoms
[
1
]]
<<
".xyz += s.xyz-internalF0.xyz;
\n
"
;
compute
<<
forceNames
[
atoms
[
1
]]
<<
".xyz += s.xyz-internalF0.xyz;
\n
"
;
compute
<<
forceNames
[
atoms
[
2
]]
<<
".xyz += -s.xyz-internalF3.xyz;
\n
"
;
compute
<<
forceNames
[
atoms
[
2
]]
<<
".xyz += -s.xyz-internalF3.xyz;
\n
"
;
...
@@ -4050,7 +4097,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
...
@@ -4050,7 +4097,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
}
}
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
compute
.
str
(),
force
.
getForceGroup
());
cl
.
getBondedUtilities
().
addInteraction
(
atoms
,
compute
.
str
(),
force
.
getForceGroup
());
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"M_PI"
]
=
doubleToString
(
M_PI
);
replacements
[
"M_PI"
]
=
cl
.
doubleToString
(
M_PI
);
cl
.
getBondedUtilities
().
addPrefixCode
(
cl
.
replaceStrings
(
OpenCLKernelSources
::
customCompoundBond
,
replacements
));;
cl
.
getBondedUtilities
().
addPrefixCode
(
cl
.
replaceStrings
(
OpenCLKernelSources
::
customCompoundBond
,
replacements
));;
}
}
...
@@ -4173,8 +4220,8 @@ void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const L
...
@@ -4173,8 +4220,8 @@ void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const L
cl
.
getPlatformData
().
initializeContexts
(
system
);
cl
.
getPlatformData
().
initializeContexts
(
system
);
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
langevin
,
defines
,
""
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
langevin
,
defines
,
""
);
kernel1
=
cl
::
Kernel
(
program
,
"integrateLangevinPart1"
);
kernel1
=
cl
::
Kernel
(
program
,
"integrateLangevinPart1"
);
kernel2
=
cl
::
Kernel
(
program
,
"integrateLangevinPart2"
);
kernel2
=
cl
::
Kernel
(
program
,
"integrateLangevinPart2"
);
...
@@ -4266,7 +4313,7 @@ void OpenCLIntegrateBrownianStepKernel::initialize(const System& system, const B
...
@@ -4266,7 +4313,7 @@ void OpenCLIntegrateBrownianStepKernel::initialize(const System& system, const B
cl
.
getPlatformData
().
initializeContexts
(
system
);
cl
.
getPlatformData
().
initializeContexts
(
system
);
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
brownian
,
defines
,
""
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
brownian
,
defines
,
""
);
kernel1
=
cl
::
Kernel
(
program
,
"integrateBrownianPart1"
);
kernel1
=
cl
::
Kernel
(
program
,
"integrateBrownianPart1"
);
kernel2
=
cl
::
Kernel
(
program
,
"integrateBrownianPart2"
);
kernel2
=
cl
::
Kernel
(
program
,
"integrateBrownianPart2"
);
...
@@ -4437,8 +4484,8 @@ void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system,
...
@@ -4437,8 +4484,8 @@ void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system,
cl
.
getPlatformData
().
initializeContexts
(
system
);
cl
.
getPlatformData
().
initializeContexts
(
system
);
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
langevin
,
defines
,
""
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
langevin
,
defines
,
""
);
kernel1
=
cl
::
Kernel
(
program
,
"integrateLangevinPart1"
);
kernel1
=
cl
::
Kernel
(
program
,
"integrateLangevinPart1"
);
kernel2
=
cl
::
Kernel
(
program
,
"integrateLangevinPart2"
);
kernel2
=
cl
::
Kernel
(
program
,
"integrateLangevinPart2"
);
...
@@ -4635,10 +4682,10 @@ string OpenCLIntegrateCustomStepKernel::createGlobalComputation(const string& va
...
@@ -4635,10 +4682,10 @@ string OpenCLIntegrateCustomStepKernel::createGlobalComputation(const string& va
else
{
else
{
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
if
(
variable
==
integrator
.
getGlobalVariableName
(
i
))
if
(
variable
==
integrator
.
getGlobalVariableName
(
i
))
expressions
[
"globals["
+
intToString
(
i
)
+
"] = "
]
=
expr
;
expressions
[
"globals["
+
cl
.
intToString
(
i
)
+
"] = "
]
=
expr
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
if
(
variable
==
parameterNames
[
i
])
{
if
(
variable
==
parameterNames
[
i
])
{
expressions
[
"params["
+
intToString
(
i
)
+
"] = "
]
=
expr
;
expressions
[
"params["
+
cl
.
intToString
(
i
)
+
"] = "
]
=
expr
;
modifiesParameters
=
true
;
modifiesParameters
=
true
;
}
}
}
}
...
@@ -4650,11 +4697,11 @@ string OpenCLIntegrateCustomStepKernel::createGlobalComputation(const string& va
...
@@ -4650,11 +4697,11 @@ string OpenCLIntegrateCustomStepKernel::createGlobalComputation(const string& va
variables
[
"gaussian"
]
=
"gaussian"
;
variables
[
"gaussian"
]
=
"gaussian"
;
variables
[
energyName
]
=
"energy[0]"
;
variables
[
energyName
]
=
"energy[0]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
intToString
(
i
)
+
"]"
;
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
variables
[
parameterNames
[
i
]]
=
"params["
+
intToString
(
i
)
+
"]"
;
variables
[
parameterNames
[
i
]]
=
"params["
+
cl
.
intToString
(
i
)
+
"]"
;
vector
<
pair
<
string
,
string
>
>
functions
;
vector
<
pair
<
string
,
string
>
>
functions
;
return
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
return
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
}
}
string
OpenCLIntegrateCustomStepKernel
::
createPerDofComputation
(
const
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
string
&
forceName
,
const
string
&
energyName
)
{
string
OpenCLIntegrateCustomStepKernel
::
createPerDofComputation
(
const
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
string
&
forceName
,
const
string
&
energyName
)
{
...
@@ -4666,7 +4713,7 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
...
@@ -4666,7 +4713,7 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
else
if
(
variable
==
"v"
)
else
if
(
variable
==
"v"
)
expressions
[
"velocity"
+
suffix
+
" = "
]
=
expr
;
expressions
[
"velocity"
+
suffix
+
" = "
]
=
expr
;
else
if
(
variable
==
""
)
else
if
(
variable
==
""
)
expressions
[
"sum[3*index+"
+
intToString
(
component
)
+
"] = "
]
=
expr
;
expressions
[
"sum[3*index+"
+
cl
.
intToString
(
component
)
+
"] = "
]
=
expr
;
else
{
else
{
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
if
(
variable
==
integrator
.
getPerDofVariableName
(
i
))
if
(
variable
==
integrator
.
getPerDofVariableName
(
i
))
...
@@ -4684,14 +4731,14 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
...
@@ -4684,14 +4731,14 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
variables
[
"dt"
]
=
"stepSize"
;
variables
[
"dt"
]
=
"stepSize"
;
variables
[
energyName
]
=
"energy[0]"
;
variables
[
energyName
]
=
"energy[0]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
intToString
(
i
)
+
"]"
;
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
variables
[
integrator
.
getPerDofVariableName
(
i
)]
=
"perDof"
+
suffix
.
substr
(
1
)
+
perDofValues
->
getParameterSuffix
(
i
);
variables
[
integrator
.
getPerDofVariableName
(
i
)]
=
"perDof"
+
suffix
.
substr
(
1
)
+
perDofValues
->
getParameterSuffix
(
i
);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
variables
[
parameterNames
[
i
]]
=
"params["
+
intToString
(
i
)
+
"]"
;
variables
[
parameterNames
[
i
]]
=
"params["
+
cl
.
intToString
(
i
)
+
"]"
;
vector
<
pair
<
string
,
string
>
>
functions
;
vector
<
pair
<
string
,
string
>
>
functions
;
string
tempType
=
(
cl
.
getSupportsDoublePrecision
()
?
"double"
:
"float"
);
string
tempType
=
(
cl
.
getSupportsDoublePrecision
()
?
"double"
:
"float"
);
return
OpenCL
ExpressionUtilities
::
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
+
intToString
(
component
)
+
"_"
,
""
,
tempType
);
return
cl
.
get
ExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
+
cl
.
intToString
(
component
)
+
"_"
,
""
,
tempType
);
}
}
void
OpenCLIntegrateCustomStepKernel
::
execute
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
)
{
void
OpenCLIntegrateCustomStepKernel
::
execute
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
)
{
...
@@ -4733,8 +4780,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
...
@@ -4733,8 +4780,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
merged
.
resize
(
numSteps
,
false
);
merged
.
resize
(
numSteps
,
false
);
modifiesParameters
=
false
;
modifiesParameters
=
false
;
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"WORK_GROUP_SIZE"
]
=
intToString
(
OpenCLContext
::
ThreadBlockSize
);
defines
[
"WORK_GROUP_SIZE"
]
=
cl
.
intToString
(
OpenCLContext
::
ThreadBlockSize
);
// Initialize the random number generator.
// Initialize the random number generator.
...
@@ -4858,9 +4905,9 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
...
@@ -4858,9 +4905,9 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
stringstream
compute
;
stringstream
compute
;
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
perDofValues
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
perDofValues
->
getBuffers
()[
i
];
compute
<<
buffer
.
getType
()
<<
" perDofx"
<<
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
intToString
(
i
+
1
)
<<
"[3*index];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" perDofx"
<<
cl
.
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" perDofy"
<<
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
intToString
(
i
+
1
)
<<
"[3*index+1];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" perDofy"
<<
cl
.
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index+1];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" perDofz"
<<
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
intToString
(
i
+
1
)
<<
"[3*index+2];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" perDofz"
<<
cl
.
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index+2];
\n
"
;
}
}
int
numGaussian
=
0
,
numUniform
=
0
;
int
numGaussian
=
0
,
numUniform
=
0
;
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
...
@@ -4882,9 +4929,9 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
...
@@ -4882,9 +4929,9 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
else
{
else
{
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
perDofValues
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
perDofValues
->
getBuffers
()[
i
];
compute
<<
"perDofValues"
<<
intToString
(
i
+
1
)
<<
"[3*index] = perDofx"
<<
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index] = perDofx"
<<
cl
.
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
intToString
(
i
+
1
)
<<
"[3*index+1] = perDofy"
<<
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index+1] = perDofy"
<<
cl
.
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
cl
.
intToString
(
i
+
1
)
<<
";
\n
"
;
}
}
}
}
compute
<<
"}
\n
"
;
compute
<<
"}
\n
"
;
...
@@ -4896,7 +4943,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
...
@@ -4896,7 +4943,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
stringstream
args
;
stringstream
args
;
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
perDofValues
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
perDofValues
->
getBuffers
()[
i
];
string
valueName
=
"perDofValues"
+
intToString
(
i
+
1
);
string
valueName
=
"perDofValues"
+
cl
.
intToString
(
i
+
1
);
args
<<
", __global "
<<
buffer
.
getType
()
<<
"* restrict "
<<
valueName
;
args
<<
", __global "
<<
buffer
.
getType
()
<<
"* restrict "
<<
valueName
;
}
}
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
...
@@ -5216,7 +5263,7 @@ OpenCLApplyAndersenThermostatKernel::~OpenCLApplyAndersenThermostatKernel() {
...
@@ -5216,7 +5263,7 @@ OpenCLApplyAndersenThermostatKernel::~OpenCLApplyAndersenThermostatKernel() {
void
OpenCLApplyAndersenThermostatKernel
::
initialize
(
const
System
&
system
,
const
AndersenThermostat
&
thermostat
)
{
void
OpenCLApplyAndersenThermostatKernel
::
initialize
(
const
System
&
system
,
const
AndersenThermostat
&
thermostat
)
{
randomSeed
=
thermostat
.
getRandomNumberSeed
();
randomSeed
=
thermostat
.
getRandomNumberSeed
();
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
andersenThermostat
,
defines
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
andersenThermostat
,
defines
);
kernel
=
cl
::
Kernel
(
program
,
"applyAndersenThermostat"
);
kernel
=
cl
::
Kernel
(
program
,
"applyAndersenThermostat"
);
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
randomSeed
);
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
randomSeed
);
...
@@ -5349,7 +5396,7 @@ void OpenCLRemoveCMMotionKernel::initialize(const System& system, const CMMotion
...
@@ -5349,7 +5396,7 @@ void OpenCLRemoveCMMotionKernel::initialize(const System& system, const CMMotion
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
totalMass
+=
system
.
getParticleMass
(
i
);
totalMass
+=
system
.
getParticleMass
(
i
);
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"INVERSE_TOTAL_MASS"
]
=
doubleToString
(
1.0
/
totalMass
);
defines
[
"INVERSE_TOTAL_MASS"
]
=
cl
.
doubleToString
(
1.0
/
totalMass
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
removeCM
,
defines
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
removeCM
,
defines
);
kernel1
=
cl
::
Kernel
(
program
,
"calcCenterOfMassMomentum"
);
kernel1
=
cl
::
Kernel
(
program
,
"calcCenterOfMassMomentum"
);
kernel1
.
setArg
<
cl_int
>
(
0
,
numAtoms
);
kernel1
.
setArg
<
cl_int
>
(
0
,
numAtoms
);
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
c8dac206
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
1
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
2
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -267,7 +267,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -267,7 +267,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
forceKernel
=
createInteractionKernel
(
kernelSource
,
parameters
,
arguments
,
true
,
true
);
forceKernel
=
createInteractionKernel
(
kernelSource
,
parameters
,
arguments
,
true
,
true
);
if
(
useCutoff
)
{
if
(
useCutoff
)
{
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
if
(
forceBufferPerAtomBlock
)
if
(
forceBufferPerAtomBlock
)
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
if
(
usePeriodic
)
if
(
usePeriodic
)
...
@@ -281,6 +281,9 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -281,6 +281,9 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
blockBoundingBox
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
blockBoundingBox
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactionCount
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
if
(
context
.
getUseDoublePrecision
())
findInteractingBlocksKernel
.
setArg
<
cl_double
>
(
0
,
cutoff
*
cutoff
);
else
findInteractingBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractingBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
blockCenter
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
blockCenter
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
blockBoundingBox
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
blockBoundingBox
->
getDeviceBuffer
());
...
@@ -293,6 +296,9 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -293,6 +296,9 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
startTileIndex
+
numTiles
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
startTileIndex
+
numTiles
);
if
(
context
.
getSIMDWidth
()
==
32
&&
!
deviceIsCpu
)
{
if
(
context
.
getSIMDWidth
()
==
32
&&
!
deviceIsCpu
)
{
findInteractionsWithinBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findInteractionsWithinBlocks"
);
findInteractionsWithinBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findInteractionsWithinBlocks"
);
if
(
context
.
getUseDoublePrecision
())
findInteractionsWithinBlocksKernel
.
setArg
<
cl_double
>
(
0
,
cutoff
*
cutoff
);
else
findInteractionsWithinBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractionsWithinBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
interactingTiles
->
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
interactingTiles
->
getDeviceBuffer
());
...
@@ -315,6 +321,20 @@ int OpenCLNonbondedUtilities::findExclusionIndex(int x, int y, const vector<cl_u
...
@@ -315,6 +321,20 @@ int OpenCLNonbondedUtilities::findExclusionIndex(int x, int y, const vector<cl_u
throw
OpenMMException
(
"Internal error: exclusion in unexpected tile"
);
throw
OpenMMException
(
"Internal error: exclusion in unexpected tile"
);
}
}
static
void
setPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getPeriodicBoxSizeDouble
());
else
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getPeriodicBoxSize
());
}
static
void
setInvPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getInvPeriodicBoxSizeDouble
());
else
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getInvPeriodicBoxSize
());
}
void
OpenCLNonbondedUtilities
::
prepareInteractions
()
{
void
OpenCLNonbondedUtilities
::
prepareInteractions
()
{
if
(
!
useCutoff
)
if
(
!
useCutoff
)
return
;
return
;
...
@@ -327,15 +347,15 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
...
@@ -327,15 +347,15 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
// Compute the neighbor list.
// Compute the neighbor list.
findBlockBoundsKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
context
,
findBlockBoundsKernel
,
1
);
findBlockBoundsKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
()
);
setInvPeriodicBoxSizeArg
(
context
,
findBlockBoundsKernel
,
2
);
context
.
executeKernel
(
findBlockBoundsKernel
,
context
.
getNumAtoms
());
context
.
executeKernel
(
findBlockBoundsKernel
,
context
.
getNumAtoms
());
findInteractingBlocksKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
context
,
findInteractingBlocksKernel
,
1
);
findInteractingBlocksKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
()
);
setInvPeriodicBoxSizeArg
(
context
,
findInteractingBlocksKernel
,
2
);
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
deviceIsCpu
?
1
:
-
1
);
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
deviceIsCpu
?
1
:
-
1
);
if
(
context
.
getSIMDWidth
()
==
32
&&
!
deviceIsCpu
)
{
if
(
context
.
getSIMDWidth
()
==
32
&&
!
deviceIsCpu
)
{
findInteractionsWithinBlocksKernel
.
setArg
<
mm_float4
>
(
1
,
context
.
getPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
context
,
findInteractionsWithinBlocksKernel
,
1
);
findInteractionsWithinBlocksKernel
.
setArg
<
mm_float4
>
(
2
,
context
.
getInvPeriodicBoxSize
()
);
setInvPeriodicBoxSizeArg
(
context
,
findInteractionsWithinBlocksKernel
,
2
);
context
.
executeKernel
(
findInteractionsWithinBlocksKernel
,
context
.
getNumAtoms
(),
128
);
context
.
executeKernel
(
findInteractionsWithinBlocksKernel
,
context
.
getNumAtoms
(),
128
);
}
}
}
}
...
@@ -343,8 +363,8 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
...
@@ -343,8 +363,8 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
if
(
cutoff
!=
-
1.0
)
{
if
(
cutoff
!=
-
1.0
)
{
if
(
useCutoff
)
{
if
(
useCutoff
)
{
forceKernel
.
setArg
<
mm_float4
>
(
10
,
context
.
getPeriodicBoxSize
()
);
setPeriodicBoxSizeArg
(
context
,
forceKernel
,
10
);
forceKernel
.
setArg
<
mm_float4
>
(
11
,
context
.
getInvPeriodicBoxSize
()
);
setInvPeriodicBoxSizeArg
(
context
,
forceKernel
,
11
);
}
}
context
.
executeKernel
(
forceKernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
context
.
executeKernel
(
forceKernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
}
...
@@ -498,11 +518,11 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -498,11 +518,11 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
if
(
isSymmetric
)
if
(
isSymmetric
)
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
forceThreadBlockSize
);
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
defines
[
"CUTOFF_SQUARED"
]
=
OpenCLExpressionUtilities
::
doubleToString
(
cutoff
*
cutoff
);
defines
[
"CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
cutoff
*
cutoff
);
defines
[
"NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
if
((
localDataSize
/
4
)
%
2
==
0
)
if
((
localDataSize
/
4
)
%
2
==
0
)
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
string
file
;
string
file
;
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.h
View file @
c8dac206
...
@@ -30,6 +30,7 @@
...
@@ -30,6 +30,7 @@
#include "OpenCLContext.h"
#include "OpenCLContext.h"
#include "openmm/System.h"
#include "openmm/System.h"
#include "OpenCLExpressionUtilities.h"
#include "OpenCLExpressionUtilities.h"
#include <sstream>
#include <string>
#include <string>
#include <vector>
#include <vector>
...
@@ -287,8 +288,11 @@ public:
...
@@ -287,8 +288,11 @@ public:
name
(
name
),
componentType
(
componentType
),
numComponents
(
numComponents
),
size
(
size
),
memory
(
&
memory
)
{
name
(
name
),
componentType
(
componentType
),
numComponents
(
numComponents
),
size
(
size
),
memory
(
&
memory
)
{
if
(
numComponents
==
1
)
if
(
numComponents
==
1
)
type
=
componentType
;
type
=
componentType
;
else
else
{
type
=
componentType
+
OpenCLExpressionUtilities
::
intToString
(
numComponents
);
std
::
stringstream
s
;
s
<<
componentType
<<
numComponents
;
type
=
s
.
str
();
}
}
}
const
std
::
string
&
getName
()
const
{
const
std
::
string
&
getName
()
const
{
return
name
;
return
name
;
...
...
platforms/opencl/src/OpenCLParallelKernels.cpp
View file @
c8dac206
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011 Stanford University and the Authors.
*
* Portions copyright (c) 2011
-2012
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -54,14 +54,14 @@ using namespace std;
...
@@ -54,14 +54,14 @@ using namespace std;
class
OpenCLParallelCalcForcesAndEnergyKernel
::
BeginComputationTask
:
public
OpenCLContext
::
WorkTask
{
class
OpenCLParallelCalcForcesAndEnergyKernel
::
BeginComputationTask
:
public
OpenCLContext
::
WorkTask
{
public:
public:
BeginComputationTask
(
ContextImpl
&
context
,
OpenCLContext
&
cl
,
OpenCLCalcForcesAndEnergyKernel
&
kernel
,
BeginComputationTask
(
ContextImpl
&
context
,
OpenCLContext
&
cl
,
OpenCLCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
mm_float4
*
pinnedMemory
)
:
context
(
context
),
cl
(
cl
),
kernel
(
kernel
),
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
void
*
pinnedMemory
)
:
context
(
context
),
cl
(
cl
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
pinnedMemory
(
pinnedMemory
)
{
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
pinnedMemory
(
pinnedMemory
)
{
}
}
void
execute
()
{
void
execute
()
{
// Copy coordinates over to this device and execute the kernel.
// Copy coordinates over to this device and execute the kernel.
if
(
cl
.
getContextIndex
()
>
0
)
if
(
cl
.
getContextIndex
()
>
0
)
cl
.
getQueue
().
enqueueWriteBuffer
(
cl
.
getPosq
().
getDeviceBuffer
(),
CL_FALSE
,
0
,
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
),
pinnedMemory
);
cl
.
getQueue
().
enqueueWriteBuffer
(
cl
.
getPosq
().
getDeviceBuffer
(),
CL_FALSE
,
0
,
cl
.
getPaddedNumAtoms
()
*
cl
.
getPosq
().
getElementSize
(
),
pinnedMemory
);
kernel
.
beginComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
kernel
.
beginComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
}
}
private:
private:
...
@@ -70,13 +70,13 @@ private:
...
@@ -70,13 +70,13 @@ private:
OpenCLCalcForcesAndEnergyKernel
&
kernel
;
OpenCLCalcForcesAndEnergyKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
bool
includeForce
,
includeEnergy
;
int
groups
;
int
groups
;
mm_float4
*
pinnedMemory
;
void
*
pinnedMemory
;
};
};
class
OpenCLParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
OpenCLContext
::
WorkTask
{
class
OpenCLParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
OpenCLContext
::
WorkTask
{
public:
public:
FinishComputationTask
(
ContextImpl
&
context
,
OpenCLContext
&
cl
,
OpenCLCalcForcesAndEnergyKernel
&
kernel
,
FinishComputationTask
(
ContextImpl
&
context
,
OpenCLContext
&
cl
,
OpenCLCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
mm_float4
*
pinnedMemory
)
:
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
void
*
pinnedMemory
)
:
context
(
context
),
cl
(
cl
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
energy
(
energy
),
context
(
context
),
cl
(
cl
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
energy
(
energy
),
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
)
{
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
)
{
}
}
...
@@ -87,8 +87,9 @@ public:
...
@@ -87,8 +87,9 @@ public:
if
(
includeForce
)
{
if
(
includeForce
)
{
if
(
cl
.
getContextIndex
()
>
0
)
{
if
(
cl
.
getContextIndex
()
>
0
)
{
int
numAtoms
=
cl
.
getPaddedNumAtoms
();
int
numAtoms
=
cl
.
getPaddedNumAtoms
();
void
*
dest
=
(
cl
.
getUseDoublePrecision
()
?
(
void
*
)
&
((
mm_double4
*
)
pinnedMemory
)[(
cl
.
getContextIndex
()
-
1
)
*
numAtoms
]
:
(
void
*
)
&
((
mm_float4
*
)
pinnedMemory
)[(
cl
.
getContextIndex
()
-
1
)
*
numAtoms
]);
cl
.
getQueue
().
enqueueReadBuffer
(
cl
.
getForce
().
getDeviceBuffer
(),
CL_TRUE
,
0
,
cl
.
getQueue
().
enqueueReadBuffer
(
cl
.
getForce
().
getDeviceBuffer
(),
CL_TRUE
,
0
,
numAtoms
*
sizeof
(
mm_float4
),
&
pinnedMemory
[(
cl
.
getContextIndex
()
-
1
)
*
numAtoms
]
);
numAtoms
*
cl
.
getForce
().
getElementSize
(),
dest
);
}
}
else
else
cl
.
getQueue
().
finish
();
cl
.
getQueue
().
finish
();
...
@@ -103,7 +104,7 @@ private:
...
@@ -103,7 +104,7 @@ private:
int
groups
;
int
groups
;
double
&
energy
;
double
&
energy
;
long
long
&
completionTime
;
long
long
&
completionTime
;
mm_float4
*
pinnedMemory
;
void
*
pinnedMemory
;
};
};
OpenCLParallelCalcForcesAndEnergyKernel
::
OpenCLParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
OpenCLPlatform
::
PlatformData
&
data
)
:
OpenCLParallelCalcForcesAndEnergyKernel
::
OpenCLParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
OpenCLPlatform
::
PlatformData
&
data
)
:
...
@@ -129,19 +130,20 @@ void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
...
@@ -129,19 +130,20 @@ void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
void
OpenCLParallelCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
void
OpenCLParallelCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
OpenCLContext
&
cl0
=
*
data
.
contexts
[
0
];
OpenCLContext
&
cl0
=
*
data
.
contexts
[
0
];
int
elementSize
=
(
cl0
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
if
(
contextForces
==
NULL
)
{
if
(
contextForces
==
NULL
)
{
contextForces
=
OpenCLArray
::
create
<
mm_float4
>
(
cl0
,
&
cl0
.
getForceBuffers
().
getDeviceBuffer
(),
contextForces
=
OpenCLArray
::
create
<
mm_float4
>
(
cl0
,
&
cl0
.
getForceBuffers
().
getDeviceBuffer
(),
data
.
contexts
.
size
()
*
cl0
.
getPaddedNumAtoms
(),
"contextForces"
);
data
.
contexts
.
size
()
*
cl0
.
getPaddedNumAtoms
(),
"contextForces"
);
int
bufferBytes
=
(
data
.
contexts
.
size
()
-
1
)
*
cl0
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
)
;
int
bufferBytes
=
(
data
.
contexts
.
size
()
-
1
)
*
cl0
.
getPaddedNumAtoms
()
*
elementSize
;
pinnedPositionBuffer
=
new
cl
::
Buffer
(
cl0
.
getContext
(),
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedPositionBuffer
=
new
cl
::
Buffer
(
cl0
.
getContext
(),
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedPositionMemory
=
(
mm_float4
*
)
cl0
.
getQueue
().
enqueueMapBuffer
(
*
pinnedPositionBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
pinnedPositionMemory
=
cl0
.
getQueue
().
enqueueMapBuffer
(
*
pinnedPositionBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
pinnedForceBuffer
=
new
cl
::
Buffer
(
cl0
.
getContext
(),
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedForceBuffer
=
new
cl
::
Buffer
(
cl0
.
getContext
(),
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedForceMemory
=
(
mm_float4
*
)
cl0
.
getQueue
().
enqueueMapBuffer
(
*
pinnedForceBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
pinnedForceMemory
=
cl0
.
getQueue
().
enqueueMapBuffer
(
*
pinnedForceBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
}
}
// Copy coordinates over to each device and execute the kernel.
// Copy coordinates over to each device and execute the kernel.
cl0
.
getQueue
().
enqueueReadBuffer
(
cl0
.
getPosq
().
getDeviceBuffer
(),
CL_TRUE
,
0
,
cl0
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
)
,
pinnedPositionMemory
);
cl0
.
getQueue
().
enqueueReadBuffer
(
cl0
.
getPosq
().
getDeviceBuffer
(),
CL_TRUE
,
0
,
cl0
.
getPaddedNumAtoms
()
*
elementSize
,
pinnedPositionMemory
);
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
data
.
contextEnergy
[
i
]
=
0.0
;
data
.
contextEnergy
[
i
]
=
0.0
;
OpenCLContext
&
cl
=
*
data
.
contexts
[
i
];
OpenCLContext
&
cl
=
*
data
.
contexts
[
i
];
...
@@ -165,8 +167,9 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
...
@@ -165,8 +167,9 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
OpenCLContext
&
cl
=
*
data
.
contexts
[
0
];
OpenCLContext
&
cl
=
*
data
.
contexts
[
0
];
int
numAtoms
=
cl
.
getPaddedNumAtoms
();
int
numAtoms
=
cl
.
getPaddedNumAtoms
();
cl
.
getQueue
().
enqueueWriteBuffer
(
contextForces
->
getDeviceBuffer
(),
CL_FALSE
,
numAtoms
*
sizeof
(
mm_float4
),
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
numAtoms
*
(
data
.
contexts
.
size
()
-
1
)
*
sizeof
(
mm_float4
),
pinnedForceMemory
);
cl
.
getQueue
().
enqueueWriteBuffer
(
contextForces
->
getDeviceBuffer
(),
CL_FALSE
,
numAtoms
*
elementSize
,
numAtoms
*
(
data
.
contexts
.
size
()
-
1
)
*
elementSize
,
pinnedForceMemory
);
cl
.
reduceBuffer
(
*
contextForces
,
data
.
contexts
.
size
());
cl
.
reduceBuffer
(
*
contextForces
,
data
.
contexts
.
size
());
// Balance work between the contexts by transferring a few nonbonded tiles from the context that
// Balance work between the contexts by transferring a few nonbonded tiles from the context that
...
...
platforms/opencl/src/OpenCLParallelKernels.h
View file @
c8dac206
...
@@ -84,8 +84,8 @@ private:
...
@@ -84,8 +84,8 @@ private:
OpenCLArray
*
contextForces
;
OpenCLArray
*
contextForces
;
cl
::
Buffer
*
pinnedPositionBuffer
;
cl
::
Buffer
*
pinnedPositionBuffer
;
cl
::
Buffer
*
pinnedForceBuffer
;
cl
::
Buffer
*
pinnedForceBuffer
;
mm_float4
*
pinnedPositionMemory
;
void
*
pinnedPositionMemory
;
mm_float4
*
pinnedForceMemory
;
void
*
pinnedForceMemory
;
};
};
/**
/**
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
c8dac206
...
@@ -141,7 +141,7 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
...
@@ -141,7 +141,7 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
device
<<
contexts
[
i
]
->
getDeviceIndex
();
device
<<
contexts
[
i
]
->
getDeviceIndex
();
}
}
propertyValues
[
OpenCLPlatform
::
OpenCLDeviceIndex
()]
=
device
.
str
();
propertyValues
[
OpenCLPlatform
::
OpenCLDeviceIndex
()]
=
device
.
str
();
propertyValues
[
OpenCLPlatform
::
OpenCLPlatformIndex
()]
=
OpenCLExpressionUtilities
::
intToString
(
platformIndex
);
propertyValues
[
OpenCLPlatform
::
OpenCLPlatformIndex
()]
=
contexts
[
0
]
->
intToString
(
platformIndex
);
propertyValues
[
OpenCLPlatform
::
OpenCLPrecision
()]
=
precisionProperty
;
propertyValues
[
OpenCLPlatform
::
OpenCLPrecision
()]
=
precisionProperty
;
contextEnergy
.
resize
(
contexts
.
size
());
contextEnergy
.
resize
(
contexts
.
size
());
}
}
...
...
platforms/opencl/src/OpenCLSort.h
View file @
c8dac206
...
@@ -162,7 +162,7 @@ public:
...
@@ -162,7 +162,7 @@ public:
// Assign array elements to buckets.
// Assign array elements to buckets.
unsigned
int
numBuckets
=
bucketOffset
->
getSize
();
unsigned
int
numBuckets
=
bucketOffset
->
getSize
();
context
.
clearBuffer
(
bucketOffset
->
getDeviceBuffer
(),
numBuckets
);
context
.
clearBuffer
(
*
bucketOffset
);
assignElementsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
assignElementsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
assignElementsKernel
.
setArg
<
cl_int
>
(
1
,
data
.
getSize
());
assignElementsKernel
.
setArg
<
cl_int
>
(
1
,
data
.
getSize
());
assignElementsKernel
.
setArg
<
cl_int
>
(
2
,
numBuckets
);
assignElementsKernel
.
setArg
<
cl_int
>
(
2
,
numBuckets
);
...
...
platforms/opencl/src/kernels/angleForce.cl
View file @
c8dac206
float
4
v0
=
pos2-pos1
;
real
4
v0
=
pos2-pos1
;
float
4
v1
=
pos2-pos3
;
real
4
v1
=
pos2-pos3
;
float
4
cp
=
cross
(
v0,
v1
)
;
real
4
cp
=
cross
(
v0,
v1
)
;
float
rp
=
cp.x*cp.x
+
cp.y*cp.y
+
cp.z*cp.z
;
real
rp
=
cp.x*cp.x
+
cp.y*cp.y
+
cp.z*cp.z
;
rp
=
max
(
SQRT
(
rp
)
,
1.0e-06f
)
;
rp
=
max
(
SQRT
(
rp
)
,
(
real
)
1.0e-06f
)
;
float
r21
=
v0.x*v0.x
+
v0.y*v0.y
+
v0.z*v0.z
;
real
r21
=
v0.x*v0.x
+
v0.y*v0.y
+
v0.z*v0.z
;
float
r23
=
v1.x*v1.x
+
v1.y*v1.y
+
v1.z*v1.z
;
real
r23
=
v1.x*v1.x
+
v1.y*v1.y
+
v1.z*v1.z
;
float
dot
=
v0.x*v1.x
+
v0.y*v1.y
+
v0.z*v1.z
;
real
dot
=
v0.x*v1.x
+
v0.y*v1.y
+
v0.z*v1.z
;
float
cosine
=
clamp
(
dot*RSQRT
(
r21*r23
)
,
-1.0f,
1.0f
)
;
real
cosine
=
clamp
(
dot*RSQRT
(
r21*r23
)
,
(
real
)
-1
,
(
real
)
1
)
;
float
theta
=
acos
(
cosine
)
;
real
theta
=
acos
(
cosine
)
;
COMPUTE_FORCE
COMPUTE_FORCE
float
4
force1
=
cross
(
v0,
cp
)
*
(
dEdAngle/
(
r21*rp
))
;
real
4
force1
=
cross
(
v0,
cp
)
*
(
dEdAngle/
(
r21*rp
))
;
float
4
force3
=
cross
(
cp,
v1
)
*
(
dEdAngle/
(
r23*rp
))
;
real
4
force3
=
cross
(
cp,
v1
)
*
(
dEdAngle/
(
r23*rp
))
;
float
4
force2
=
-force1-force3
;
real
4
force2
=
-force1-force3
;
platforms/opencl/src/kernels/bondForce.cl
View file @
c8dac206
float
4
delta
=
pos2-pos1
;
real
4
delta
=
pos2-pos1
;
float
r
=
SQRT
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
real
r
=
SQRT
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
COMPUTE_FORCE
COMPUTE_FORCE
dEdR
=
(
r
>
0.0f
)
?
(
dEdR
/
r
)
:
0.0f
;
dEdR
=
(
r
>
0.0f
)
?
(
dEdR
/
r
)
:
0.0f
;
delta.xyz
*=
dEdR
;
delta.xyz
*=
dEdR
;
float4
force1
=
delta
;
real4
force1
=
delta
;
float4
force2
=
-delta
;
real4
force2
=
-delta
;
\ No newline at end of file
\ No newline at end of file
platforms/opencl/src/kernels/cmapTorsionForce.cl
View file @
c8dac206
const
float
PI
=
3.14159265358979323846f
;
const
real
PI
=
3.14159265358979323846f
;
//
Compute
the
first
angle.
//
Compute
the
first
angle.
float
4
v0a
=
(
float
4
)
(
pos1.xyz-pos2.xyz,
0.0f
)
;
real
4
v0a
=
(
real
4
)
(
pos1.xyz-pos2.xyz,
0.0f
)
;
float
4
v1a
=
(
float
4
)
(
pos3.xyz-pos2.xyz,
0.0f
)
;
real
4
v1a
=
(
real
4
)
(
pos3.xyz-pos2.xyz,
0.0f
)
;
float
4
v2a
=
(
float
4
)
(
pos3.xyz-pos4.xyz,
0.0f
)
;
real
4
v2a
=
(
real
4
)
(
pos3.xyz-pos4.xyz,
0.0f
)
;
float
4
cp0a
=
cross
(
v0a,
v1a
)
;
real
4
cp0a
=
cross
(
v0a,
v1a
)
;
float
4
cp1a
=
cross
(
v1a,
v2a
)
;
real
4
cp1a
=
cross
(
v1a,
v2a
)
;
float
cosangle
=
dot
(
normalize
(
cp0a
)
,
normalize
(
cp1a
))
;
real
cosangle
=
dot
(
normalize
(
cp0a
)
,
normalize
(
cp1a
))
;
float
angleA
;
real
angleA
;
if
(
cosangle
>
0.99f
|
| cosangle < -0.99f) {
if
(
cosangle
>
0.99f
|
| cosangle < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
float
4 cross_prod = cross(cp0a, cp1a);
real
4 cross_prod = cross(cp0a, cp1a);
float
scale = dot(cp0a, cp0a)*dot(cp1a, cp1a);
real
scale = dot(cp0a, cp0a)*dot(cp1a, cp1a);
angleA = asin(SQRT(dot(cross_prod, cross_prod)/scale));
angleA = asin(SQRT(dot(cross_prod, cross_prod)/scale));
if (cosangle < 0.0f)
if (cosangle < 0.0f)
angleA = PI-angleA;
angleA = PI-angleA;
...
@@ -25,18 +25,18 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
...
@@ -25,18 +25,18 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
// Compute the second angle.
// Compute the second angle.
float
4 v0b = (
float
4) (pos5.xyz-pos6.xyz, 0.0f);
real
4 v0b = (
real
4) (pos5.xyz-pos6.xyz, 0.0f);
float
4 v1b = (
float
4) (pos7.xyz-pos6.xyz, 0.0f);
real
4 v1b = (
real
4) (pos7.xyz-pos6.xyz, 0.0f);
float
4 v2b = (
float
4) (pos7.xyz-pos8.xyz, 0.0f);
real
4 v2b = (
real
4) (pos7.xyz-pos8.xyz, 0.0f);
float
4 cp0b = cross(v0b, v1b);
real
4 cp0b = cross(v0b, v1b);
float
4 cp1b = cross(v1b, v2b);
real
4 cp1b = cross(v1b, v2b);
cosangle = dot(normalize(cp0b), normalize(cp1b));
cosangle = dot(normalize(cp0b), normalize(cp1b));
float
angleB;
real
angleB;
if (cosangle > 0.99f |
|
cosangle
<
-0.99f
)
{
if (cosangle > 0.99f |
|
cosangle
<
-0.99f
)
{
//
We
're
close
to
the
singularity
in
acos
()
,
so
take
the
cross
product
and
use
asin
()
instead.
//
We
're
close
to
the
singularity
in
acos
()
,
so
take
the
cross
product
and
use
asin
()
instead.
float
4
cross_prod
=
cross
(
cp0b,
cp1b
)
;
real
4
cross_prod
=
cross
(
cp0b,
cp1b
)
;
float
scale
=
dot
(
cp0b,
cp0b
)
*dot
(
cp1b,
cp1b
)
;
real
scale
=
dot
(
cp0b,
cp0b
)
*dot
(
cp1b,
cp1b
)
;
angleB
=
asin
(
SQRT
(
dot
(
cross_prod,
cross_prod
)
/scale
))
;
angleB
=
asin
(
SQRT
(
dot
(
cross_prod,
cross_prod
)
/scale
))
;
if
(
cosangle
<
0.0f
)
if
(
cosangle
<
0.0f
)
angleB
=
PI-angleB
;
angleB
=
PI-angleB
;
...
@@ -50,7 +50,7 @@ angleB = fmod(angleB+2.0f*PI, 2.0f*PI);
...
@@ -50,7 +50,7 @@ angleB = fmod(angleB+2.0f*PI, 2.0f*PI);
int2
pos
=
MAP_POS[MAPS[index]]
;
int2
pos
=
MAP_POS[MAPS[index]]
;
int
size
=
pos.y
;
int
size
=
pos.y
;
float
delta
=
2*PI/size
;
real
delta
=
2*PI/size
;
int
s
=
(
int
)
(
angleA/delta
)
;
int
s
=
(
int
)
(
angleA/delta
)
;
int
t
=
(
int
)
(
angleB/delta
)
;
int
t
=
(
int
)
(
angleB/delta
)
;
float4
c[4]
;
float4
c[4]
;
...
@@ -59,14 +59,14 @@ c[0] = COEFF[coeffIndex];
...
@@ -59,14 +59,14 @@ c[0] = COEFF[coeffIndex];
c[1]
=
COEFF[coeffIndex+1]
;
c[1]
=
COEFF[coeffIndex+1]
;
c[2]
=
COEFF[coeffIndex+2]
;
c[2]
=
COEFF[coeffIndex+2]
;
c[3]
=
COEFF[coeffIndex+3]
;
c[3]
=
COEFF[coeffIndex+3]
;
float
da
=
angleA/delta-s
;
real
da
=
angleA/delta-s
;
float
db
=
angleB/delta-t
;
real
db
=
angleB/delta-t
;
//
Evaluate
the
spline
to
determine
the
energy
and
gradients.
//
Evaluate
the
spline
to
determine
the
energy
and
gradients.
float
torsionEnergy
=
0.0f
;
real
torsionEnergy
=
0.0f
;
float
dEdA
=
0.0f
;
real
dEdA
=
0.0f
;
float
dEdB
=
0.0f
;
real
dEdB
=
0.0f
;
torsionEnergy
=
da*torsionEnergy
+
((
c[3].w*db
+
c[3].z
)
*db
+
c[3].y
)
*db
+
c[3].x
;
torsionEnergy
=
da*torsionEnergy
+
((
c[3].w*db
+
c[3].z
)
*db
+
c[3].y
)
*db
+
c[3].x
;
dEdA
=
db*dEdA
+
(
3.0f*c[3].w*da
+
2.0f*c[2].w
)
*da
+
c[1].w
;
dEdA
=
db*dEdA
+
(
3.0f*c[3].w*da
+
2.0f*c[2].w
)
*da
+
c[1].w
;
dEdB
=
da*dEdB
+
(
3.0f*c[3].w*db
+
2.0f*c[3].z
)
*db
+
c[3].y
;
dEdB
=
da*dEdB
+
(
3.0f*c[3].w*db
+
2.0f*c[3].z
)
*db
+
c[3].y
;
...
@@ -85,17 +85,17 @@ energy += torsionEnergy;
...
@@ -85,17 +85,17 @@ energy += torsionEnergy;
//
Apply
the
force
to
the
first
torsion.
//
Apply
the
force
to
the
first
torsion.
float
normCross1
=
dot
(
cp0a,
cp0a
)
;
real
normCross1
=
dot
(
cp0a,
cp0a
)
;
float
normSqrBC
=
dot
(
v1a,
v1a
)
;
real
normSqrBC
=
dot
(
v1a,
v1a
)
;
float
normBC
=
SQRT
(
normSqrBC
)
;
real
normBC
=
SQRT
(
normSqrBC
)
;
float
normCross2
=
dot
(
cp1a,
cp1a
)
;
real
normCross2
=
dot
(
cp1a,
cp1a
)
;
float
dp
=
1.0f/normSqrBC
;
real
dp
=
1.0f/normSqrBC
;
float
4
ff
=
(
float
4
)
((
-dEdA*normBC
)
/normCross1,
dot
(
v0a,
v1a
)
*dp,
dot
(
v2a,
v1a
)
*dp,
(
dEdA*normBC
)
/normCross2
)
;
real
4
ff
=
(
real
4
)
((
-dEdA*normBC
)
/normCross1,
dot
(
v0a,
v1a
)
*dp,
dot
(
v2a,
v1a
)
*dp,
(
dEdA*normBC
)
/normCross2
)
;
float
4
force1
=
ff.x*cp0a
;
real
4
force1
=
ff.x*cp0a
;
float
4
force4
=
ff.w*cp1a
;
real
4
force4
=
ff.w*cp1a
;
float
4
d
=
ff.y*force1
-
ff.z*force4
;
real
4
d
=
ff.y*force1
-
ff.z*force4
;
float
4
force2
=
d-force1
;
real
4
force2
=
d-force1
;
float
4
force3
=
-d-force4
;
real
4
force3
=
-d-force4
;
//
Apply
the
force
to
the
second
torsion.
//
Apply
the
force
to
the
second
torsion.
...
@@ -104,9 +104,9 @@ normSqrBC = dot(v1b, v1b);
...
@@ -104,9 +104,9 @@ normSqrBC = dot(v1b, v1b);
normBC
=
SQRT
(
normSqrBC
)
;
normBC
=
SQRT
(
normSqrBC
)
;
normCross2
=
dot
(
cp1b,
cp1b
)
;
normCross2
=
dot
(
cp1b,
cp1b
)
;
dp
=
1.0f/normSqrBC
;
dp
=
1.0f/normSqrBC
;
ff
=
(
float
4
)
((
-dEdB*normBC
)
/normCross1,
dot
(
v0b,
v1b
)
*dp,
dot
(
v2b,
v1b
)
*dp,
(
dEdB*normBC
)
/normCross2
)
;
ff
=
(
real
4
)
((
-dEdB*normBC
)
/normCross1,
dot
(
v0b,
v1b
)
*dp,
dot
(
v2b,
v1b
)
*dp,
(
dEdB*normBC
)
/normCross2
)
;
float
4
force5
=
ff.x*cp0b
;
real
4
force5
=
ff.x*cp0b
;
float
4
force8
=
ff.w*cp1b
;
real
4
force8
=
ff.w*cp1b
;
d
=
ff.y*force5
-
ff.z*force8
;
d
=
ff.y*force5
-
ff.z*force8
;
float
4
force6
=
d-force5
;
real
4
force6
=
d-force5
;
float
4
force7
=
-d-force8
;
real
4
force7
=
-d-force8
;
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
c8dac206
#
if
USE_EWALD
#
if
USE_EWALD
bool
needCorrection
=
isExcluded
&&
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
;
bool
needCorrection
=
isExcluded
&&
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
;
if
(
!isExcluded
|
| needCorrection) {
if
(
!isExcluded
|
| needCorrection) {
float
tempForce = 0
.0f
;
real
tempForce = 0;
if (r2 < CUTOFF_SQUARED |
|
needCorrection
)
{
if (r2 < CUTOFF_SQUARED |
|
needCorrection
)
{
const
float
alphaR
=
EWALD_ALPHA*r
;
const
real
alphaR
=
EWALD_ALPHA*r
;
const
float
expAlphaRSqr
=
EXP
(
-alphaR*alphaR
)
;
const
real
expAlphaRSqr
=
EXP
(
-alphaR*alphaR
)
;
const
float
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
const
real
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
//
This
approximation
for
erfc
is
from
Abramowitz
and
Stegun
(
1964
)
p.
299.
They
cite
the
following
as
//
This
approximation
for
erfc
is
from
Abramowitz
and
Stegun
(
1964
)
p.
299.
They
cite
the
following
as
//
the
original
source:
C.
Hastings,
Jr.,
Approximations
for
Digital
Computers
(
1955
)
.
It
has
a
maximum
//
the
original
source:
C.
Hastings,
Jr.,
Approximations
for
Digital
Computers
(
1955
)
.
It
has
a
maximum
//
error
of
3e-7.
//
error
of
3e-7.
float
t
=
1.0f+
(
0.0705230784f+
(
0.0422820123f+
(
0.0092705272f+
(
0.0001520143f+
(
0.0002765672f+0.0000430638f*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
;
real
t
=
1.0f+
(
0.0705230784f+
(
0.0422820123f+
(
0.0092705272f+
(
0.0001520143f+
(
0.0002765672f+0.0000430638f*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
)
*alphaR
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
const
float
erfcAlphaR
=
RECIP
(
t*t
)
;
const
real
erfcAlphaR
=
RECIP
(
t*t
)
;
if
(
needCorrection
)
{
if
(
needCorrection
)
{
//
Subtract
off
the
part
of
this
interaction
that
was
included
in
the
reciprocal
space
contribution.
//
Subtract
off
the
part
of
this
interaction
that
was
included
in
the
reciprocal
space
contribution.
...
@@ -24,11 +24,11 @@ if (!isExcluded || needCorrection) {
...
@@ -24,11 +24,11 @@ if (!isExcluded || needCorrection) {
}
}
else
{
else
{
#
if
HAS_LENNARD_JONES
#
if
HAS_LENNARD_JONES
float
sig
=
sigmaEpsilon1.x
+
sigmaEpsilon2.x
;
real
sig
=
sigmaEpsilon1.x
+
sigmaEpsilon2.x
;
float
sig2
=
invR*sig
;
real
sig2
=
invR*sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
float
sig6
=
sig2*sig2*sig2
;
real
sig6
=
sig2*sig2*sig2
;
float
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
real
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
tempForce
=
epssig6*
(
12.0f*sig6
-
6.0f
)
+
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempForce
=
epssig6*
(
12.0f*sig6
-
6.0f
)
+
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
epssig6*
(
sig6
-
1.0f
)
+
prefactor*erfcAlphaR
;
tempEnergy
+=
epssig6*
(
sig6
-
1.0f
)
+
prefactor*erfcAlphaR
;
#
else
#
else
...
@@ -41,32 +41,37 @@ if (!isExcluded || needCorrection) {
...
@@ -41,32 +41,37 @@ if (!isExcluded || needCorrection) {
}
}
#
else
#
else
{
{
#
ifdef
USE_DOUBLE_PRECISION
unsigned
long
includeInteraction
;
#
else
unsigned
int
includeInteraction
;
#
endif
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
includeInteraction
=
(
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
;
includeInteraction
=
(
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
;
#
else
#
else
unsigned
int
includeInteraction
=
(
!isExcluded
)
;
includeInteraction
=
(
!isExcluded
)
;
#
endif
#
endif
float
tempForce
=
0
.0f
;
real
tempForce
=
0
;
#
if
HAS_LENNARD_JONES
#
if
HAS_LENNARD_JONES
float
sig
=
sigmaEpsilon1.x
+
sigmaEpsilon2.x
;
real
sig
=
sigmaEpsilon1.x
+
sigmaEpsilon2.x
;
float
sig2
=
invR*sig
;
real
sig2
=
invR*sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
float
sig6
=
sig2*sig2*sig2
;
real
sig6
=
sig2*sig2*sig2
;
float
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
real
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
tempForce
=
epssig6*
(
12.0f*sig6
-
6.0f
)
;
tempForce
=
epssig6*
(
12.0f*sig6
-
6.0f
)
;
tempEnergy
+=
select
(
0.0f
,
epssig6*
(
sig6
-
1.0f
)
,
includeInteraction
)
;
tempEnergy
+=
select
(
(
real
)
0
,
epssig6*
(
sig6
-1
)
,
includeInteraction
)
;
#
endif
#
endif
#
if
HAS_COULOMB
#
if
HAS_COULOMB
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
const
float
prefactor
=
138.935456f*posq1.w*posq2.w
;
const
real
prefactor
=
138.935456f*posq1.w*posq2.w
;
tempForce
+=
prefactor*
(
invR
-
2.0f*REACTION_FIELD_K*r2
)
;
tempForce
+=
prefactor*
(
invR
-
2.0f*REACTION_FIELD_K*r2
)
;
tempEnergy
+=
select
(
0.0f
,
prefactor*
(
invR
+
REACTION_FIELD_K*r2
-
REACTION_FIELD_C
)
,
includeInteraction
)
;
tempEnergy
+=
select
(
(
real
)
0
,
prefactor*
(
invR
+
REACTION_FIELD_K*r2
-
REACTION_FIELD_C
)
,
includeInteraction
)
;
#
else
#
else
const
float
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
const
real
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
tempForce
+=
prefactor
;
tempForce
+=
prefactor
;
tempEnergy
+=
select
(
0.0f
,
prefactor,
includeInteraction
)
;
tempEnergy
+=
select
(
(
real
)
0
,
prefactor,
includeInteraction
)
;
#
endif
#
endif
#
endif
#
endif
dEdR
+=
select
(
0.0f
,
tempForce*invR*invR,
includeInteraction
)
;
dEdR
+=
select
(
(
real
)
0
,
tempForce*invR*invR,
includeInteraction
)
;
}
}
#
endif
#
endif
\ No newline at end of file
platforms/opencl/src/kernels/customCompoundBond.cl
View file @
c8dac206
/**
/**
*
Compute
the
difference
between
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*
Compute
the
difference
between
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
*/
float
4
ccb_delta
(
float
4
vec1,
float
4
vec2
)
{
real
4
ccb_delta
(
real
4
vec1,
real
4
vec2
)
{
float
4
result
=
(
float
4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
.0f
)
;
real
4
result
=
(
real
4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
return
result
;
}
}
...
@@ -10,17 +10,17 @@ float4 ccb_delta(float4 vec1, float4 vec2) {
...
@@ -10,17 +10,17 @@ float4 ccb_delta(float4 vec1, float4 vec2) {
/**
/**
*
Compute
the
angle
between
two
vectors.
The
w
component
of
each
vector
should
contain
the
squared
magnitude.
*
Compute
the
angle
between
two
vectors.
The
w
component
of
each
vector
should
contain
the
squared
magnitude.
*/
*/
float
ccb_computeAngle
(
float
4
vec1,
float
4
vec2
)
{
real
ccb_computeAngle
(
real
4
vec1,
real
4
vec2
)
{
float
dotProduct
=
vec1.x*vec2.x
+
vec1.y*vec2.y
+
vec1.z*vec2.z
;
real
dotProduct
=
vec1.x*vec2.x
+
vec1.y*vec2.y
+
vec1.z*vec2.z
;
float
cosine
=
dotProduct*RSQRT
(
vec1.w*vec2.w
)
;
real
cosine
=
dotProduct*RSQRT
(
vec1.w*vec2.w
)
;
float
angle
;
real
angle
;
if
(
cosine
>
0.99f
||
cosine
<
-0.99f
)
{
if
(
cosine
>
0.99f
||
cosine
<
-0.99f
)
{
//
We
're
close
to
the
singularity
in
acos
()
,
so
take
the
cross
product
and
use
asin
()
instead.
//
We
're
close
to
the
singularity
in
acos
()
,
so
take
the
cross
product
and
use
asin
()
instead.
float
4
crossProduct
=
cross
(
vec1,
vec2
)
;
real
4
crossProduct
=
cross
(
vec1,
vec2
)
;
float
scale
=
vec1.w*vec2.w
;
real
scale
=
vec1.w*vec2.w
;
angle
=
asin
(
SQRT
(
dot
(
crossProduct,
crossProduct
)
/scale
))
;
angle
=
asin
(
SQRT
(
dot
(
crossProduct,
crossProduct
)
/scale
))
;
if
(
cosine
<
0
.0f
)
if
(
cosine
<
0
)
angle
=
M_PI-angle
;
angle
=
M_PI-angle
;
}
}
else
else
...
@@ -31,8 +31,8 @@ float ccb_computeAngle(float4 vec1, float4 vec2) {
...
@@ -31,8 +31,8 @@ float ccb_computeAngle(float4 vec1, float4 vec2) {
/**
/**
*
Compute
the
cross
product
of
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*
Compute
the
cross
product
of
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
*/
float
4
ccb_computeCross
(
float
4
vec1,
float
4
vec2
)
{
real
4
ccb_computeCross
(
real
4
vec1,
real
4
vec2
)
{
float
4
result
=
cross
(
vec1,
vec2
)
;
real
4
result
=
cross
(
vec1,
vec2
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
return
result
;
}
}
platforms/opencl/src/kernels/customExternalForce.cl
View file @
c8dac206
COMPUTE_FORCE
COMPUTE_FORCE
float
4
force1
=
(
float
4
)
(
-dEdX,
-dEdY,
-dEdZ,
0
.0f
)
;
real
4
force1
=
(
real
4
)
(
-dEdX,
-dEdY,
-dEdZ,
0
)
;
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment