Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
62a83f80
Commit
62a83f80
authored
Apr 26, 2010
by
Peter Eastman
Browse files
Optimized OpenCL version of CCMA
parent
e1a9418d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
25 additions
and
56 deletions
+25
-56
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+13
-17
platforms/opencl/src/kernels/ccma.cl
platforms/opencl/src/kernels/ccma.cl
+12
-39
No files found.
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
View file @
62a83f80
...
...
@@ -450,7 +450,7 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
ccmaNumAtomConstraints
=
new
OpenCLArray
<
cl_int
>
(
context
,
numAtoms
,
"CcmaAtomConstraintsIndex"
);
ccmaDelta1
=
new
OpenCLArray
<
cl_float
>
(
context
,
numCCMA
,
"CcmaDelta1"
);
ccmaDelta2
=
new
OpenCLArray
<
cl_float
>
(
context
,
numCCMA
,
"CcmaDelta2"
);
ccmaConverged
=
new
OpenCLArray
<
cl_int
>
(
context
,
context
.
getNumThreadBlocks
()
,
"CcmaConverged"
);
ccmaConverged
=
new
OpenCLArray
<
cl_int
>
(
context
,
1
,
"CcmaConverged"
,
true
);
ccmaReducedMass
=
new
OpenCLArray
<
cl_float
>
(
context
,
numCCMA
,
"CcmaReducedMass"
);
ccmaConstraintMatrixColumn
=
new
OpenCLArray
<
cl_int
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixColumn"
);
ccmaConstraintMatrixValue
=
new
OpenCLArray
<
cl_float
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixValue"
);
...
...
@@ -572,20 +572,17 @@ void OpenCLIntegrationUtilities::applyConstraints(double tol) {
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaAtoms
->
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDistance
->
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getPosq
().
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
ccmaConverged
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaAtoms
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDistance
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
posDelta
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
ccmaReducedMass
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
ccmaDelta1
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
ccmaConverged
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
(
6
,
sizeof
(
cl_int
)
*
OpenCLContext
::
ThreadBlockSize
,
NULL
);
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaDelta1
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDelta2
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
ccmaConstraintMatrixColumn
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
ccmaConstraintMatrixValue
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
ccmaConverged
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
(
5
,
sizeof
(
cl_int
)
*
OpenCLContext
::
ThreadBlockSize
,
NULL
);
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaNumAtomConstraints
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaAtomConstraints
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
ccmaDistance
->
getDeviceBuffer
());
...
...
@@ -595,22 +592,21 @@ void OpenCLIntegrationUtilities::applyConstraints(double tol) {
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
ccmaDelta2
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
ccmaConverged
->
getDeviceBuffer
());
}
ccmaForceKernel
.
setArg
<
cl_float
>
(
7
,
(
cl_float
)
tol
);
ccmaForceKernel
.
setArg
<
cl_float
>
(
6
,
(
cl_float
)
tol
);
context
.
executeKernel
(
ccmaDirectionsKernel
,
ccmaAtoms
->
getSize
());
vector
<
cl_int
>
converged
;
for
(
int
i
=
0
;
i
<
75
;
i
++
)
{
// To reduce the overhead of checking for convergence, perform two iterations
// each time through the loop.
context
.
executeKernel
(
ccmaForceKernel
,
ccmaAtoms
->
getSize
());
context
.
executeKernel
(
ccmaMultiplyKernel
,
ccmaAtoms
->
getSize
());
ccmaUpdateKernel
.
setArg
<
cl_int
>
(
8
,
i
);
context
.
executeKernel
(
ccmaUpdateKernel
,
context
.
getNumAtoms
());
const
int
checkInterval
=
3
;
for
(
int
i
=
0
;
i
<
150
;
i
++
)
{
if
((
i
+
1
)
%
checkInterval
==
0
)
{
(
*
ccmaConverged
)[
0
]
=
1
;
ccmaConverged
->
upload
();
}
context
.
executeKernel
(
ccmaForceKernel
,
ccmaAtoms
->
getSize
());
if
((
i
+
1
)
%
checkInterval
==
0
)
{
ccmaConverged
->
download
();
if
((
*
ccmaConverged
)[
0
])
break
;
}
context
.
executeKernel
(
ccmaMultiplyKernel
,
ccmaAtoms
->
getSize
());
ccmaConverged
->
download
(
converged
);
if
(
converged
[
0
])
break
;
ccmaUpdateKernel
.
setArg
<
cl_int
>
(
8
,
i
);
context
.
executeKernel
(
ccmaUpdateKernel
,
context
.
getNumAtoms
());
}
...
...
platforms/opencl/src/kernels/ccma.cl
View file @
62a83f80
/**
*
Compute
the
direction
each
constraint
is
pointing
in.
This
is
called
once
at
the
beginning
of
constraint
evaluation.
*/
__kernel
void
computeConstraintDirections
(
__global
int2*
constraintAtoms,
__global
float4*
constraintDistance,
__global
float4*
atomPositions,
__global
int*
converged
)
{
__kernel
void
computeConstraintDirections
(
__global
int2*
constraintAtoms,
__global
float4*
constraintDistance,
__global
float4*
atomPositions
)
{
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_CONSTRAINTS; index += get_global_size(0)) {
//
Compute
the
direction
for
this
constraint.
...
...
@@ -15,23 +14,19 @@ __kernel void computeConstraintDirections(__global int2* constraintAtoms, __glob
dir.z
=
oldPos1.z-oldPos2.z
;
constraintDistance[index]
=
dir
;
}
//
Mark
that
no
work
groups
have
converged
yet.
for
(
int
index
=
get_global_id
(
0
)
; index < get_num_groups(0); index += get_global_size(0))
converged[index]
=
false
;
}
/**
*
Compute
the
force
applied
by
each
constraint.
*/
__kernel
void
computeConstraintForce
(
__global
int2*
constraintAtoms,
__global
float4*
constraintDistance,
__global
float4*
atomPositions,
__global
float*
reducedMass,
__global
float*
delta1,
__global
int*
converged,
__local
int*
convergedBuffer,
float
tol
)
{
if
(
converged[get_group_id
(
0
)
]
)
return
; // The constraint iteration has already converged.
__global
float*
reducedMass,
__global
float*
delta1,
__global
int*
converged,
float
tol
)
{
__local
int
groupConverged
;
if
(
get_local_id
(
0
)
==
0
)
groupConverged
=
1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
float
lowerTol
=
1.0f-2.0f*tol+tol*tol
;
float
upperTol
=
1.0f+2.0f*tol+tol*tol
;
int
threadConverged
=
1
;
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_CONSTRAINTS; index += get_global_size(0)) {
//
Compute
the
force
due
to
this
constraint.
...
...
@@ -48,40 +43,18 @@ __kernel void computeConstraintForce(__global int2* constraintAtoms, __global fl
//
See
whether
it
has
converged.
threadConverged
&=
(
rp2
>=
lowerTol*dist2
&&
rp2
<=
upperTol*dist2
)
;
}
//
Perform
a
parallel
reduction
to
see
if
all
constraints
handled
by
this
work
group
have
converged.
convergedBuffer[get_local_id
(
0
)
]
=
threadConverged
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
step
=
1
; step < get_local_size(0); step *= 2) {
if
(
get_local_id
(
0
)
%
(
2*step
)
==
0
)
convergedBuffer[get_local_id
(
0
)
]
&=
convergedBuffer[get_local_id
(
0
)
+step]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
groupConverged
&&
(
rp2
<
lowerTol*dist2
||
rp2
>
upperTol*dist2
))
{
groupConverged
=
0
;
converged[0]
=
0
;
}
}
if
(
get_local_id
(
0
)
==
0
)
converged[get_group_id
(
0
)
]
=
convergedBuffer[0]
;
}
/**
*
Multiply
the
vector
of
constraint
forces
by
the
constraint
matrix.
*/
__kernel
void
multiplyByConstraintMatrix
(
__global
float*
delta1,
__global
float*
delta2,
__global
int*
constraintMatrixColumn,
__global
float*
constraintMatrixValue,
__global
int*
converged,
__local
int*
convergedBuffer
)
{
//
First
see
whether
all
work
groups
have
converged.
convergedBuffer[get_local_id
(
0
)
]
=
true
;
for
(
int
index
=
get_local_id
(
0
)
; index < get_num_groups(0); index += get_local_size(0))
convergedBuffer[get_local_id
(
0
)
]
&=
converged[index]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
step
=
1
; step < get_local_size(0); step *= 2) {
if
(
get_local_id
(
0
)
%
(
2*step
)
==
0
)
convergedBuffer[get_local_id
(
0
)
]
&=
convergedBuffer[get_local_id
(
0
)
+step]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
get_local_id
(
0
)
==
0
)
converged[get_group_id
(
0
)
]
=
convergedBuffer[0]
;
__global
float*
constraintMatrixValue,
__global
int*
converged
)
{
if
(
converged[0]
)
return
; // The constraint iteration has already converged.
...
...
@@ -105,7 +78,7 @@ __kernel void multiplyByConstraintMatrix(__global float* delta1, __global float*
*/
__kernel
void
updateAtomPositions
(
__global
int*
numAtomConstraints,
__global
int*
atomConstraints,
__global
float4*
constraintDistance,
__global
float4*
atomPositions,
__global
float4*
velm,
__global
float*
delta1,
__global
float*
delta2,
__global
int*
converged,
int
iteration
)
{
if
(
converged[
get_group_id
(
0
)
]
)
if
(
converged[
0
]
)
return
; // The constraint iteration has already converged.
float
damping
=
(
iteration
<
2
?
0.5f
:
1.0f
)
;
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment