Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
73183c61
Commit
73183c61
authored
May 31, 2016
by
ChayaSt
Browse files
resolved conflict
parents
0e218233
32e08b87
Changes
267
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
340 additions
and
260 deletions
+340
-260
platforms/cuda/tests/TestCudaSort.cpp
platforms/cuda/tests/TestCudaSort.cpp
+1
-1
platforms/opencl/include/OpenCLContext.h
platforms/opencl/include/OpenCLContext.h
+13
-1
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+2
-2
platforms/opencl/include/OpenCLNonbondedUtilities.h
platforms/opencl/include/OpenCLNonbondedUtilities.h
+3
-0
platforms/opencl/include/OpenCLPlatform.h
platforms/opencl/include/OpenCLPlatform.h
+5
-5
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+8
-4
platforms/opencl/src/OpenCLExpressionUtilities.cpp
platforms/opencl/src/OpenCLExpressionUtilities.cpp
+8
-8
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+88
-49
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+21
-18
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+9
-1
platforms/opencl/src/kernels/angleForce.cl
platforms/opencl/src/kernels/angleForce.cl
+4
-0
platforms/opencl/src/kernels/bondForce.cl
platforms/opencl/src/kernels/bondForce.cl
+3
-0
platforms/opencl/src/kernels/cmapTorsionForce.cl
platforms/opencl/src/kernels/cmapTorsionForce.cl
+10
-0
platforms/opencl/src/kernels/customCentroidBond.cl
platforms/opencl/src/kernels/customCentroidBond.cl
+5
-2
platforms/opencl/src/kernels/customCompoundBond.cl
platforms/opencl/src/kernels/customCompoundBond.cl
+4
-1
platforms/opencl/src/kernels/customGBEnergyN2.cl
platforms/opencl/src/kernels/customGBEnergyN2.cl
+28
-30
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
+22
-24
platforms/opencl/src/kernels/customGBValueN2.cl
platforms/opencl/src/kernels/customGBValueN2.cl
+28
-30
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
+22
-24
platforms/opencl/src/kernels/gbsaObc.cl
platforms/opencl/src/kernels/gbsaObc.cl
+56
-60
No files found.
platforms/cuda/tests/TestCudaSort.cpp
View file @
73183c61
...
@@ -66,7 +66,7 @@ void verifySorting(vector<float> array) {
...
@@ -66,7 +66,7 @@ void verifySorting(vector<float> array) {
system
.
addParticle
(
0.0
);
system
.
addParticle
(
0.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
1
);
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
"false"
,
1
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
initialize
();
CudaArray
data
(
context
,
array
.
size
(),
4
,
"sortData"
);
CudaArray
data
(
context
,
array
.
size
(),
4
,
"sortData"
);
...
...
platforms/opencl/include/OpenCLContext.h
View file @
73183c61
...
@@ -408,6 +408,18 @@ public:
...
@@ -408,6 +408,18 @@ public:
void
setStepsSinceReorder
(
int
steps
)
{
void
setStepsSinceReorder
(
int
steps
)
{
stepsSinceReorder
=
steps
;
stepsSinceReorder
=
steps
;
}
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
bool
getForcesValid
()
const
{
return
forcesValid
;
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
void
setForcesValid
(
bool
valid
)
{
forcesValid
=
valid
;
}
/**
/**
* Get the number of atoms.
* Get the number of atoms.
*/
*/
...
@@ -684,7 +696,7 @@ private:
...
@@ -684,7 +696,7 @@ private:
int
numThreadBlocks
;
int
numThreadBlocks
;
int
numForceBuffers
;
int
numForceBuffers
;
int
simdWidth
;
int
simdWidth
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
,
boxIsTriclinic
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
,
boxIsTriclinic
,
forcesValid
;
mm_float4
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
;
mm_float4
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
;
mm_double4
periodicBoxSizeDouble
,
invPeriodicBoxSizeDouble
,
periodicBoxVecXDouble
,
periodicBoxVecYDouble
,
periodicBoxVecZDouble
;
mm_double4
periodicBoxSizeDouble
,
invPeriodicBoxSizeDouble
,
periodicBoxVecXDouble
,
periodicBoxVecYDouble
,
periodicBoxVecZDouble
;
std
::
string
defaultOptimizationOptions
;
std
::
string
defaultOptimizationOptions
;
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
73183c61
...
@@ -156,7 +156,7 @@ public:
...
@@ -156,7 +156,7 @@ public:
* @param b the vector defining the second edge of the periodic box
* @param b the vector defining the second edge of the periodic box
* @param c the vector defining the third edge of the periodic box
* @param c the vector defining the third edge of the periodic box
*/
*/
void
setPeriodicBoxVectors
(
ContextImpl
&
context
,
const
Vec3
&
a
,
const
Vec3
&
b
,
const
Vec3
&
c
)
const
;
void
setPeriodicBoxVectors
(
ContextImpl
&
context
,
const
Vec3
&
a
,
const
Vec3
&
b
,
const
Vec3
&
c
);
/**
/**
* Create a checkpoint recording the current state of the Context.
* Create a checkpoint recording the current state of the Context.
*
*
...
@@ -698,7 +698,7 @@ public:
...
@@ -698,7 +698,7 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
private:
private:
void
initInteractionGroups
(
const
CustomNonbondedForce
&
force
,
const
std
::
string
&
interactionSource
);
void
initInteractionGroups
(
const
CustomNonbondedForce
&
force
,
const
std
::
string
&
interactionSource
,
const
std
::
vector
<
std
::
string
>&
tableTypes
);
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
*
globals
;
...
...
platforms/opencl/include/OpenCLNonbondedUtilities.h
View file @
73183c61
...
@@ -281,6 +281,9 @@ private:
...
@@ -281,6 +281,9 @@ private:
OpenCLArray
*
oldPositions
;
OpenCLArray
*
oldPositions
;
OpenCLArray
*
rebuildNeighborList
;
OpenCLArray
*
rebuildNeighborList
;
OpenCLSort
*
blockSorter
;
OpenCLSort
*
blockSorter
;
cl
::
Event
downloadCountEvent
;
cl
::
Buffer
*
pinnedCountBuffer
;
int
*
pinnedCountMemory
;
std
::
vector
<
std
::
vector
<
int
>
>
atomExclusions
;
std
::
vector
<
std
::
vector
<
int
>
>
atomExclusions
;
std
::
vector
<
ParameterInfo
>
parameters
;
std
::
vector
<
ParameterInfo
>
parameters
;
std
::
vector
<
ParameterInfo
>
arguments
;
std
::
vector
<
ParameterInfo
>
arguments
;
...
...
platforms/opencl/include/OpenCLPlatform.h
View file @
73183c61
...
@@ -58,14 +58,14 @@ public:
...
@@ -58,14 +58,14 @@ public:
* This is the name of the parameter for selecting which OpenCL device or devices to use.
* This is the name of the parameter for selecting which OpenCL device or devices to use.
*/
*/
static
const
std
::
string
&
OpenCLDeviceIndex
()
{
static
const
std
::
string
&
OpenCLDeviceIndex
()
{
static
const
std
::
string
key
=
"
OpenCL
DeviceIndex"
;
static
const
std
::
string
key
=
"DeviceIndex"
;
return
key
;
return
key
;
}
}
/**
/**
* This is the name of the parameter that reports the OpenCL device or devices being used.
* This is the name of the parameter that reports the OpenCL device or devices being used.
*/
*/
static
const
std
::
string
&
OpenCLDeviceName
()
{
static
const
std
::
string
&
OpenCLDeviceName
()
{
static
const
std
::
string
key
=
"
OpenCL
DeviceName"
;
static
const
std
::
string
key
=
"DeviceName"
;
return
key
;
return
key
;
}
}
/**
/**
...
@@ -86,21 +86,21 @@ public:
...
@@ -86,21 +86,21 @@ public:
* This is the name of the parameter for selecting what numerical precision to use.
* This is the name of the parameter for selecting what numerical precision to use.
*/
*/
static
const
std
::
string
&
OpenCLPrecision
()
{
static
const
std
::
string
&
OpenCLPrecision
()
{
static
const
std
::
string
key
=
"
OpenCL
Precision"
;
static
const
std
::
string
key
=
"Precision"
;
return
key
;
return
key
;
}
}
/**
/**
* This is the name of the parameter for selecting whether to use the CPU based PME calculation.
* This is the name of the parameter for selecting whether to use the CPU based PME calculation.
*/
*/
static
const
std
::
string
&
OpenCLUseCpuPme
()
{
static
const
std
::
string
&
OpenCLUseCpuPme
()
{
static
const
std
::
string
key
=
"
OpenCL
UseCpuPme"
;
static
const
std
::
string
key
=
"UseCpuPme"
;
return
key
;
return
key
;
}
}
/**
/**
* This is the name of the parameter for selecting whether to disable use of a separate stream for PME.
* This is the name of the parameter for selecting whether to disable use of a separate stream for PME.
*/
*/
static
const
std
::
string
&
OpenCLDisablePmeStream
()
{
static
const
std
::
string
&
OpenCLDisablePmeStream
()
{
static
const
std
::
string
key
=
"
OpenCL
DisablePmeStream"
;
static
const
std
::
string
key
=
"DisablePmeStream"
;
return
key
;
return
key
;
}
}
};
};
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
73183c61
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -84,7 +84,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -84,7 +84,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
useMixedPrecision
=
false
;
useMixedPrecision
=
false
;
}
}
else
else
throw
OpenMMException
(
"Illegal value for
OpenCL
Precision: "
+
precision
);
throw
OpenMMException
(
"Illegal value for Precision: "
+
precision
);
try
{
try
{
contextIndex
=
platformData
.
contexts
.
size
();
contextIndex
=
platformData
.
contexts
.
size
();
std
::
vector
<
cl
::
Platform
>
platforms
;
std
::
vector
<
cl
::
Platform
>
platforms
;
...
@@ -105,7 +105,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -105,7 +105,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
vector
<
cl
::
Device
>
devices
;
vector
<
cl
::
Device
>
devices
;
platforms
[
j
].
getDevices
(
CL_DEVICE_TYPE_ALL
,
&
devices
);
platforms
[
j
].
getDevices
(
CL_DEVICE_TYPE_ALL
,
&
devices
);
if
(
deviceIndex
<
-
1
||
deviceIndex
>=
(
int
)
devices
.
size
())
if
(
deviceIndex
<
-
1
||
deviceIndex
>=
(
int
)
devices
.
size
())
throw
OpenMMException
(
"Illegal value for
OpenCL
DeviceIndex: "
+
intToString
(
deviceIndex
));
throw
OpenMMException
(
"Illegal value for DeviceIndex: "
+
intToString
(
deviceIndex
));
for
(
int
i
=
0
;
i
<
(
int
)
devices
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
devices
.
size
();
i
++
)
{
// If they supplied a valid deviceIndex, we only look through that one
// If they supplied a valid deviceIndex, we only look through that one
...
@@ -113,6 +113,11 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -113,6 +113,11 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
continue
;
continue
;
if
(
platformVendor
==
"Apple"
&&
(
devices
[
i
].
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
))
if
(
platformVendor
==
"Apple"
&&
(
devices
[
i
].
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
))
continue
;
// The CPU device on OS X won't work correctly.
continue
;
// The CPU device on OS X won't work correctly.
if
(
useMixedPrecision
||
useDoublePrecision
)
{
bool
supportsDouble
=
(
devices
[
i
].
getInfo
<
CL_DEVICE_EXTENSIONS
>
().
find
(
"cl_khr_fp64"
)
!=
string
::
npos
);
if
(
!
supportsDouble
)
continue
;
// This device does not support double precision.
}
int
maxSize
=
devices
[
i
].
getInfo
<
CL_DEVICE_MAX_WORK_ITEM_SIZES
>
()[
0
];
int
maxSize
=
devices
[
i
].
getInfo
<
CL_DEVICE_MAX_WORK_ITEM_SIZES
>
()[
0
];
int
processingElementsPerComputeUnit
=
8
;
int
processingElementsPerComputeUnit
=
8
;
if
(
devices
[
i
].
getInfo
<
CL_DEVICE_TYPE
>
()
!=
CL_DEVICE_TYPE_GPU
)
{
if
(
devices
[
i
].
getInfo
<
CL_DEVICE_TYPE
>
()
!=
CL_DEVICE_TYPE_GPU
)
{
...
@@ -1047,7 +1052,6 @@ void OpenCLContext::reorderAtoms() {
...
@@ -1047,7 +1052,6 @@ void OpenCLContext::reorderAtoms() {
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_double
,
mm_double4
>
();
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_double
,
mm_double4
>
();
else
else
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_float
,
mm_float4
>
();
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_float
,
mm_float4
>
();
nonbonded
->
updateNeighborListSize
();
}
}
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
...
...
platforms/opencl/src/OpenCLExpressionUtilities.cpp
View file @
73183c61
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -174,8 +174,8 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
...
@@ -174,8 +174,8 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out
<<
"if (x >= "
<<
paramsFloat
[
2
]
<<
" && x <= "
<<
paramsFloat
[
3
]
<<
" && y >= "
<<
paramsFloat
[
4
]
<<
" && y <= "
<<
paramsFloat
[
5
]
<<
") {
\n
"
;
out
<<
"if (x >= "
<<
paramsFloat
[
2
]
<<
" && x <= "
<<
paramsFloat
[
3
]
<<
" && y >= "
<<
paramsFloat
[
4
]
<<
" && y <= "
<<
paramsFloat
[
5
]
<<
") {
\n
"
;
out
<<
"x = (x - "
<<
paramsFloat
[
2
]
<<
")*"
<<
paramsFloat
[
6
]
<<
";
\n
"
;
out
<<
"x = (x - "
<<
paramsFloat
[
2
]
<<
")*"
<<
paramsFloat
[
6
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
4
]
<<
")*"
<<
paramsFloat
[
7
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
4
]
<<
")*"
<<
paramsFloat
[
7
]
<<
";
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
");
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
"
-1
);
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
");
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
"
-1
);
\n
"
;
out
<<
"int coeffIndex = 4*(s+"
<<
paramsInt
[
0
]
<<
"*t);
\n
"
;
out
<<
"int coeffIndex = 4*(s+"
<<
paramsInt
[
0
]
<<
"*t);
\n
"
;
out
<<
"float4 c[4];
\n
"
;
out
<<
"float4 c[4];
\n
"
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
...
@@ -217,9 +217,9 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
...
@@ -217,9 +217,9 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out
<<
"x = (x - "
<<
paramsFloat
[
3
]
<<
")*"
<<
paramsFloat
[
9
]
<<
";
\n
"
;
out
<<
"x = (x - "
<<
paramsFloat
[
3
]
<<
")*"
<<
paramsFloat
[
9
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
5
]
<<
")*"
<<
paramsFloat
[
10
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
5
]
<<
")*"
<<
paramsFloat
[
10
]
<<
";
\n
"
;
out
<<
"z = (z - "
<<
paramsFloat
[
7
]
<<
")*"
<<
paramsFloat
[
11
]
<<
";
\n
"
;
out
<<
"z = (z - "
<<
paramsFloat
[
7
]
<<
")*"
<<
paramsFloat
[
11
]
<<
";
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
");
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
"
-1
);
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
");
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
"
-1
);
\n
"
;
out
<<
"int u = min((int) floor(z), "
<<
paramsInt
[
2
]
<<
");
\n
"
;
out
<<
"int u = min((int) floor(z), "
<<
paramsInt
[
2
]
<<
"
-1
);
\n
"
;
out
<<
"int coeffIndex = 16*(s+"
<<
paramsInt
[
0
]
<<
"*(t+"
<<
paramsInt
[
1
]
<<
"*u));
\n
"
;
out
<<
"int coeffIndex = 16*(s+"
<<
paramsInt
[
0
]
<<
"*(t+"
<<
paramsInt
[
1
]
<<
"*u));
\n
"
;
out
<<
"float4 c[16];
\n
"
;
out
<<
"float4 c[16];
\n
"
;
for
(
int
j
=
0
;
j
<
16
;
j
++
)
for
(
int
j
=
0
;
j
<
16
;
j
++
)
...
@@ -254,7 +254,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
...
@@ -254,7 +254,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
for
(
int
k
=
3
;
k
>=
0
;
k
--
)
for
(
int
k
=
3
;
k
>=
0
;
k
--
)
for
(
int
m
=
0
;
m
<
4
;
m
++
)
{
for
(
int
m
=
0
;
m
<
4
;
m
++
)
{
int
base
=
4
*
m
;
int
base
=
4
*
m
;
string
suffix
=
suffixes
[
m
];
string
suffix
=
suffixes
[
k
];
out
<<
"derivy["
<<
m
<<
"] = da*derivy["
<<
m
<<
"] + (3*c["
<<
(
base
+
3
)
<<
"]"
<<
suffix
<<
"*db + 2*c["
<<
(
base
+
2
)
<<
"]"
<<
suffix
<<
")*db + c["
<<
(
base
+
1
)
<<
"]"
<<
suffix
<<
";
\n
"
;
out
<<
"derivy["
<<
m
<<
"] = da*derivy["
<<
m
<<
"] + (3*c["
<<
(
base
+
3
)
<<
"]"
<<
suffix
<<
"*db + 2*c["
<<
(
base
+
2
)
<<
"]"
<<
suffix
<<
")*db + c["
<<
(
base
+
1
)
<<
"]"
<<
suffix
<<
";
\n
"
;
}
}
out
<<
nodeNames
[
j
]
<<
" = derivy[0] + dc*(derivy[1] + dc*(derivy[2] + dc*derivy[3]));
\n
"
;
out
<<
nodeNames
[
j
]
<<
" = derivy[0] + dc*(derivy[1] + dc*(derivy[2] + dc*derivy[3]));
\n
"
;
...
@@ -271,7 +271,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
...
@@ -271,7 +271,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out
<<
nodeNames
[
j
]
<<
" *= "
<<
paramsFloat
[
11
]
<<
";
\n
"
;
out
<<
nodeNames
[
j
]
<<
" *= "
<<
paramsFloat
[
11
]
<<
";
\n
"
;
}
}
else
else
throw
OpenMMException
(
"Unsupported derivative order for Continuous
2
DFunction"
);
throw
OpenMMException
(
"Unsupported derivative order for Continuous
3
DFunction"
);
}
}
out
<<
"}
\n
"
;
out
<<
"}
\n
"
;
}
}
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
73183c61
This diff is collapsed.
Click to expand it.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
73183c61
...
@@ -57,7 +57,7 @@ private:
...
@@ -57,7 +57,7 @@ private:
OpenCLNonbondedUtilities
::
OpenCLNonbondedUtilities
(
OpenCLContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
OpenCLNonbondedUtilities
::
OpenCLNonbondedUtilities
(
OpenCLContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
numForceBuffers
(
0
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
numForceBuffers
(
0
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
pinnedCountBuffer
(
NULL
),
pinnedCountMemory
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
// Decide how many thread blocks and force buffers to use.
// Decide how many thread blocks and force buffers to use.
deviceIsCpu
=
(
context
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
deviceIsCpu
=
(
context
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
...
@@ -90,6 +90,8 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
...
@@ -90,6 +90,8 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
numForceBuffers
=
numForceThreadBlocks
*
forceThreadBlockSize
/
OpenCLContext
::
TileSize
;
numForceBuffers
=
numForceThreadBlocks
*
forceThreadBlockSize
/
OpenCLContext
::
TileSize
;
}
}
}
}
pinnedCountBuffer
=
new
cl
::
Buffer
(
context
.
getContext
(),
CL_MEM_ALLOC_HOST_PTR
,
sizeof
(
int
));
pinnedCountMemory
=
(
int
*
)
context
.
getQueue
().
enqueueMapBuffer
(
*
pinnedCountBuffer
,
CL_TRUE
,
CL_MAP_READ
,
0
,
sizeof
(
int
));
}
}
OpenCLNonbondedUtilities
::~
OpenCLNonbondedUtilities
()
{
OpenCLNonbondedUtilities
::~
OpenCLNonbondedUtilities
()
{
...
@@ -123,6 +125,8 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
...
@@ -123,6 +125,8 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
delete
rebuildNeighborList
;
delete
rebuildNeighborList
;
if
(
blockSorter
!=
NULL
)
if
(
blockSorter
!=
NULL
)
delete
blockSorter
;
delete
blockSorter
;
if
(
pinnedCountBuffer
!=
NULL
)
delete
pinnedCountBuffer
;
}
}
void
OpenCLNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
)
{
void
OpenCLNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
)
{
...
@@ -357,8 +361,6 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
...
@@ -357,8 +361,6 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
forceRebuildNeighborList
=
true
;
forceRebuildNeighborList
=
true
;
bool
rebuild
=
false
;
do
{
setPeriodicBoxArgs
(
context
,
kernels
.
findBlockBoundsKernel
,
1
);
setPeriodicBoxArgs
(
context
,
kernels
.
findBlockBoundsKernel
,
1
);
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
context
.
getNumAtoms
());
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
blockSorter
->
sort
(
*
sortedBlocks
);
...
@@ -367,10 +369,8 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
...
@@ -367,10 +369,8 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
setPeriodicBoxArgs
(
context
,
kernels
.
findInteractingBlocksKernel
,
0
);
setPeriodicBoxArgs
(
context
,
kernels
.
findInteractingBlocksKernel
,
0
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
forceRebuildNeighborList
=
false
;
forceRebuildNeighborList
=
false
;
if
(
context
.
getComputeForceCount
()
==
1
)
rebuild
=
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
}
while
(
rebuild
);
lastCutoff
=
kernels
.
cutoffDistance
;
lastCutoff
=
kernels
.
cutoffDistance
;
context
.
getQueue
().
enqueueReadBuffer
(
interactionCount
->
getDeviceBuffer
(),
CL_FALSE
,
0
,
sizeof
(
int
),
pinnedCountMemory
,
NULL
,
&
downloadCountEvent
);
}
}
void
OpenCLNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
void
OpenCLNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
...
@@ -385,20 +385,22 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include
...
@@ -385,20 +385,22 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include
setPeriodicBoxArgs
(
context
,
kernel
,
9
);
setPeriodicBoxArgs
(
context
,
kernel
,
9
);
context
.
executeKernel
(
kernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
context
.
executeKernel
(
kernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
}
if
(
useCutoff
&&
numTiles
>
0
)
{
downloadCountEvent
.
wait
();
updateNeighborListSize
();
}
}
}
bool
OpenCLNonbondedUtilities
::
updateNeighborListSize
()
{
bool
OpenCLNonbondedUtilities
::
updateNeighborListSize
()
{
if
(
!
useCutoff
)
if
(
!
useCutoff
)
return
false
;
return
false
;
unsigned
int
*
pinnedInteractionCount
=
(
unsigned
int
*
)
context
.
getPinnedBuffer
();
if
(
pinnedCountMemory
[
0
]
<=
(
unsigned
int
)
interactingTiles
->
getSize
())
interactionCount
->
download
(
pinnedInteractionCount
);
if
(
pinnedInteractionCount
[
0
]
<=
(
unsigned
int
)
interactingTiles
->
getSize
())
return
false
;
return
false
;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
// this from happening in the future.
int
maxTiles
=
(
int
)
(
1.2
*
pinned
InteractionCount
[
0
]);
int
maxTiles
=
(
int
)
(
1.2
*
pinned
CountMemory
[
0
]);
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
if
(
maxTiles
>
totalTiles
)
if
(
maxTiles
>
totalTiles
)
maxTiles
=
totalTiles
;
maxTiles
=
totalTiles
;
...
@@ -430,6 +432,7 @@ bool OpenCLNonbondedUtilities::updateNeighborListSize() {
...
@@ -430,6 +432,7 @@ bool OpenCLNonbondedUtilities::updateNeighborListSize() {
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
}
}
forceRebuildNeighborList
=
true
;
forceRebuildNeighborList
=
true
;
context
.
setForcesValid
(
false
);
return
true
;
return
true
;
}
}
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
73183c61
...
@@ -56,6 +56,11 @@ extern "C" OPENMM_EXPORT_OPENCL void registerPlatforms() {
...
@@ -56,6 +56,11 @@ extern "C" OPENMM_EXPORT_OPENCL void registerPlatforms() {
#endif
#endif
OpenCLPlatform
::
OpenCLPlatform
()
{
OpenCLPlatform
::
OpenCLPlatform
()
{
deprecatedPropertyReplacements
[
"OpenCLDeviceIndex"
]
=
OpenCLDeviceIndex
();
deprecatedPropertyReplacements
[
"OpenCLDeviceName"
]
=
OpenCLDeviceName
();
deprecatedPropertyReplacements
[
"OpenCLPrecision"
]
=
OpenCLPrecision
();
deprecatedPropertyReplacements
[
"OpenCLUseCpuPme"
]
=
OpenCLUseCpuPme
();
deprecatedPropertyReplacements
[
"OpenCLDisablePmeStream"
]
=
OpenCLDisablePmeStream
();
OpenCLKernelFactory
*
factory
=
new
OpenCLKernelFactory
();
OpenCLKernelFactory
*
factory
=
new
OpenCLKernelFactory
();
registerKernelFactory
(
CalcForcesAndEnergyKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcForcesAndEnergyKernel
::
Name
(),
factory
);
registerKernelFactory
(
UpdateStateDataKernel
::
Name
(),
factory
);
registerKernelFactory
(
UpdateStateDataKernel
::
Name
(),
factory
);
...
@@ -139,7 +144,10 @@ bool OpenCLPlatform::isPlatformSupported() {
...
@@ -139,7 +144,10 @@ bool OpenCLPlatform::isPlatformSupported() {
const
string
&
OpenCLPlatform
::
getPropertyValue
(
const
Context
&
context
,
const
string
&
property
)
const
{
const
string
&
OpenCLPlatform
::
getPropertyValue
(
const
Context
&
context
,
const
string
&
property
)
const
{
const
ContextImpl
&
impl
=
getContextImpl
(
context
);
const
ContextImpl
&
impl
=
getContextImpl
(
context
);
const
PlatformData
*
data
=
reinterpret_cast
<
const
PlatformData
*>
(
impl
.
getPlatformData
());
const
PlatformData
*
data
=
reinterpret_cast
<
const
PlatformData
*>
(
impl
.
getPlatformData
());
map
<
string
,
string
>::
const_iterator
value
=
data
->
propertyValues
.
find
(
property
);
string
propertyName
=
property
;
if
(
deprecatedPropertyReplacements
.
find
(
property
)
!=
deprecatedPropertyReplacements
.
end
())
propertyName
=
deprecatedPropertyReplacements
.
find
(
property
)
->
second
;
map
<
string
,
string
>::
const_iterator
value
=
data
->
propertyValues
.
find
(
propertyName
);
if
(
value
!=
data
->
propertyValues
.
end
())
if
(
value
!=
data
->
propertyValues
.
end
())
return
value
->
second
;
return
value
->
second
;
return
Platform
::
getPropertyValue
(
context
,
property
);
return
Platform
::
getPropertyValue
(
context
,
property
);
...
...
platforms/opencl/src/kernels/angleForce.cl
View file @
73183c61
real4
v0
=
pos2-pos1
;
real4
v0
=
pos2-pos1
;
real4
v1
=
pos2-pos3
;
real4
v1
=
pos2-pos3
;
#
if
APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0
)
APPLY_PERIODIC_TO_DELTA
(
v1
)
#
endif
real4
cp
=
cross
(
v0,
v1
)
;
real4
cp
=
cross
(
v0,
v1
)
;
real
rp
=
cp.x*cp.x
+
cp.y*cp.y
+
cp.z*cp.z
;
real
rp
=
cp.x*cp.x
+
cp.y*cp.y
+
cp.z*cp.z
;
rp
=
max
(
SQRT
(
rp
)
,
(
real
)
1.0e-06f
)
;
rp
=
max
(
SQRT
(
rp
)
,
(
real
)
1.0e-06f
)
;
...
...
platforms/opencl/src/kernels/bondForce.cl
View file @
73183c61
real4
delta
=
pos2-pos1
;
real4
delta
=
pos2-pos1
;
#
if
APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r
=
SQRT
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
real
r
=
SQRT
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
COMPUTE_FORCE
COMPUTE_FORCE
dEdR
=
(
r
>
0.0f
)
?
(
dEdR
/
r
)
:
0.0f
;
dEdR
=
(
r
>
0.0f
)
?
(
dEdR
/
r
)
:
0.0f
;
...
...
platforms/opencl/src/kernels/cmapTorsionForce.cl
View file @
73183c61
...
@@ -5,6 +5,11 @@ const real PI = 3.14159265358979323846f;
...
@@ -5,6 +5,11 @@ const real PI = 3.14159265358979323846f;
real4
v0a
=
(
real4
)
(
pos1.xyz-pos2.xyz,
0.0f
)
;
real4
v0a
=
(
real4
)
(
pos1.xyz-pos2.xyz,
0.0f
)
;
real4
v1a
=
(
real4
)
(
pos3.xyz-pos2.xyz,
0.0f
)
;
real4
v1a
=
(
real4
)
(
pos3.xyz-pos2.xyz,
0.0f
)
;
real4
v2a
=
(
real4
)
(
pos3.xyz-pos4.xyz,
0.0f
)
;
real4
v2a
=
(
real4
)
(
pos3.xyz-pos4.xyz,
0.0f
)
;
#
if
APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0a
)
APPLY_PERIODIC_TO_DELTA
(
v1a
)
APPLY_PERIODIC_TO_DELTA
(
v2a
)
#
endif
real4
cp0a
=
cross
(
v0a,
v1a
)
;
real4
cp0a
=
cross
(
v0a,
v1a
)
;
real4
cp1a
=
cross
(
v1a,
v2a
)
;
real4
cp1a
=
cross
(
v1a,
v2a
)
;
real
cosangle
=
dot
(
normalize
(
cp0a
)
,
normalize
(
cp1a
))
;
real
cosangle
=
dot
(
normalize
(
cp0a
)
,
normalize
(
cp1a
))
;
...
@@ -28,6 +33,11 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
...
@@ -28,6 +33,11 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
real4 v0b = (real4) (pos5.xyz-pos6.xyz, 0.0f);
real4 v0b = (real4) (pos5.xyz-pos6.xyz, 0.0f);
real4 v1b = (real4) (pos7.xyz-pos6.xyz, 0.0f);
real4 v1b = (real4) (pos7.xyz-pos6.xyz, 0.0f);
real4 v2b = (real4) (pos7.xyz-pos8.xyz, 0.0f);
real4 v2b = (real4) (pos7.xyz-pos8.xyz, 0.0f);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0b)
APPLY_PERIODIC_TO_DELTA(v1b)
APPLY_PERIODIC_TO_DELTA(v2b)
#endif
real4 cp0b = cross(v0b, v1b);
real4 cp0b = cross(v0b, v1b);
real4 cp1b = cross(v1b, v2b);
real4 cp1b = cross(v1b, v2b);
cosangle = dot(normalize(cp0b), normalize(cp1b));
cosangle = dot(normalize(cp0b), normalize(cp1b));
...
...
platforms/opencl/src/kernels/customCentroidBond.cl
View file @
73183c61
...
@@ -70,8 +70,11 @@ __kernel void computeGroupCenters(__global const real4* restrict posq, __global
...
@@ -70,8 +70,11 @@ __kernel void computeGroupCenters(__global const real4* restrict posq, __global
/**
/**
*
Compute
the
difference
between
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*
Compute
the
difference
between
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
*/
real4
delta
(
real4
vec1,
real4
vec2
)
{
real4
delta
(
real4
vec1,
real4
vec2,
bool
periodic,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
)
{
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
if
(
periodic
)
APPLY_PERIODIC_TO_DELTA
(
result
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
return
result
;
}
}
...
@@ -110,7 +113,7 @@ real4 computeCross(real4 vec1, real4 vec2) {
...
@@ -110,7 +113,7 @@ real4 computeCross(real4 vec1, real4 vec2) {
*
Compute
the
forces
on
groups
based
on
the
bonds.
*
Compute
the
forces
on
groups
based
on
the
bonds.
*/
*/
__kernel
void
computeGroupForces
(
__global
long*
restrict
groupForce,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
centerPositions,
__kernel
void
computeGroupForces
(
__global
long*
restrict
groupForce,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
centerPositions,
__global
const
int*
restrict
bondGroups
__global
const
int*
restrict
bondGroups
,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
EXTRA_ARGS
)
{
EXTRA_ARGS
)
{
mixed
energy
=
0
;
mixed
energy
=
0
;
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_BONDS; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_BONDS; index += get_global_size(0)) {
...
...
platforms/opencl/src/kernels/customCompoundBond.cl
View file @
73183c61
/**
/**
*
Compute
the
difference
between
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*
Compute
the
difference
between
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
*/
real4
ccb_delta
(
real4
vec1,
real4
vec2
)
{
real4
ccb_delta
(
real4
vec1,
real4
vec2,
bool
periodic,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
)
{
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
if
(
periodic
)
APPLY_PERIODIC_TO_DELTA
(
result
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
return
result
;
}
}
...
...
platforms/opencl/src/kernels/customGBEnergyN2.cl
View file @
73183c61
...
@@ -181,6 +181,8 @@ __kernel void computeN2Energy(
...
@@ -181,6 +181,8 @@ __kernel void computeN2Energy(
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else
#else
...
@@ -204,16 +206,12 @@ __kernel void computeN2Energy(
...
@@ -204,16 +206,12 @@ __kernel void computeN2Energy(
int x, y;
int x, y;
bool singlePeriodicCopy = false;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
x = tiles[pos];
real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
#else
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
...
@@ -239,7 +237,7 @@ __kernel void computeN2Energy(
...
@@ -239,7 +237,7 @@ __kernel void computeN2Energy(
while
(
skipTiles[currentSkipIndex]
<
pos
)
while
(
skipTiles[currentSkipIndex]
<
pos
)
currentSkipIndex++
;
currentSkipIndex++
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
}
#
endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
...
...
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
View file @
73183c61
...
@@ -201,6 +201,8 @@ __kernel void computeN2Energy(
...
@@ -201,6 +201,8 @@ __kernel void computeN2Energy(
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0];
const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
#else
#else
...
@@ -220,16 +222,12 @@ __kernel void computeN2Energy(
...
@@ -220,16 +222,12 @@ __kernel void computeN2Energy(
int x, y;
int x, y;
bool singlePeriodicCopy = false;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
x = tiles[pos];
real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
#else
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
...
@@ -248,7 +246,7 @@ __kernel void computeN2Energy(
...
@@ -248,7 +246,7 @@ __kernel void computeN2Energy(
nextToSkip
=
end
;
nextToSkip
=
end
;
}
}
includeTile
=
(
nextToSkip
!=
pos
)
;
includeTile
=
(
nextToSkip
!=
pos
)
;
}
#
endif
if
(
includeTile
)
{
if
(
includeTile
)
{
//
Load
the
data
for
this
tile.
//
Load
the
data
for
this
tile.
...
...
platforms/opencl/src/kernels/customGBValueN2.cl
View file @
73183c61
...
@@ -157,6 +157,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -157,6 +157,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else
#else
...
@@ -178,16 +180,12 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -178,16 +180,12 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
int x, y;
int x, y;
bool singlePeriodicCopy = false;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
x = tiles[pos];
real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
#else
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
...
@@ -213,7 +211,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -213,7 +211,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
while
(
skipTiles[currentSkipIndex]
<
pos
)
while
(
skipTiles[currentSkipIndex]
<
pos
)
currentSkipIndex++
;
currentSkipIndex++
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
}
#
endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
...
...
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
View file @
73183c61
...
@@ -170,6 +170,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -170,6 +170,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0];
const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
#else
#else
...
@@ -188,16 +190,12 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -188,16 +190,12 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
int x, y;
int x, y;
bool singlePeriodicCopy = false;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
x = tiles[pos];
real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
#else
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
...
@@ -216,7 +214,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -216,7 +214,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
nextToSkip
=
end
;
nextToSkip
=
end
;
}
}
includeTile
=
(
nextToSkip
!=
pos
)
;
includeTile
=
(
nextToSkip
!=
pos
)
;
}
#
endif
if
(
includeTile
)
{
if
(
includeTile
)
{
//
Load
the
data
for
this
tile.
//
Load
the
data
for
this
tile.
...
...
platforms/opencl/src/kernels/gbsaObc.cl
View file @
73183c61
...
@@ -169,6 +169,8 @@ __kernel void computeBornSum(
...
@@ -169,6 +169,8 @@ __kernel void computeBornSum(
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
numTiles
=
interactionCount[0]
;
if
(
numTiles
>
maxTiles
)
return
; // There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
((
long
)
NUM_BLOCKS+1
)
/2
:
(
long
)
numTiles
)
/totalWarps
)
;
int
pos
=
(
int
)
(
warp*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
((
long
)
NUM_BLOCKS+1
)
/2
:
(
long
)
numTiles
)
/totalWarps
)
;
int
end
=
(
int
)
((
warp+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
((
long
)
NUM_BLOCKS+1
)
/2
:
(
long
)
numTiles
)
/totalWarps
)
;
int
end
=
(
int
)
((
warp+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
((
long
)
NUM_BLOCKS+1
)
/2
:
(
long
)
numTiles
)
/totalWarps
)
;
#
else
#
else
...
@@ -190,16 +192,12 @@ __kernel void computeBornSum(
...
@@ -190,16 +192,12 @@ __kernel void computeBornSum(
int
x,
y
;
int
x,
y
;
bool
singlePeriodicCopy
=
false
;
bool
singlePeriodicCopy
=
false
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles[pos]
;
x
=
tiles[pos]
;
real4
blockSizeX
=
blockSize[x]
;
real4
blockSizeX
=
blockSize[x]
;
singlePeriodicCopy
=
(
0.5f*periodicBoxSize.x-blockSizeX.x
>=
CUTOFF
&&
singlePeriodicCopy
=
(
0.5f*periodicBoxSize.x-blockSizeX.x
>=
CUTOFF
&&
0.5f*periodicBoxSize.y-blockSizeX.y
>=
CUTOFF
&&
0.5f*periodicBoxSize.y-blockSizeX.y
>=
CUTOFF
&&
0.5f*periodicBoxSize.z-blockSizeX.z
>=
CUTOFF
)
;
0.5f*periodicBoxSize.z-blockSizeX.z
>=
CUTOFF
)
;
}
#
else
else
#
endif
{
y
=
(
int
)
floor
(
NUM_BLOCKS+0.5f-SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
y
=
(
int
)
floor
(
NUM_BLOCKS+0.5f-SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
...
@@ -225,7 +223,7 @@ __kernel void computeBornSum(
...
@@ -225,7 +223,7 @@ __kernel void computeBornSum(
while (skipTiles[currentSkipIndex] < pos)
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
includeTile = (skipTiles[currentSkipIndex] != pos);
}
#endif
if (includeTile) {
if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx;
unsigned int atom1 = x*TILE_SIZE + tgx;
...
@@ -556,6 +554,8 @@ __kernel void computeGBSAForce1(
...
@@ -556,6 +554,8 @@ __kernel void computeGBSAForce1(
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else
#else
...
@@ -577,16 +577,12 @@ __kernel void computeGBSAForce1(
...
@@ -577,16 +577,12 @@ __kernel void computeGBSAForce1(
int x, y;
int x, y;
bool singlePeriodicCopy = false;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
x = tiles[pos];
real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
#else
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
...
@@ -612,7 +608,7 @@ __kernel void computeGBSAForce1(
...
@@ -612,7 +608,7 @@ __kernel void computeGBSAForce1(
while
(
skipTiles[currentSkipIndex]
<
pos
)
while
(
skipTiles[currentSkipIndex]
<
pos
)
currentSkipIndex++
;
currentSkipIndex++
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
}
#
endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
...
...
Prev
1
2
3
4
5
6
7
8
9
…
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment