Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
62a83f80
"platforms/cuda/vscode:/vscode.git/clone" did not exist on "72d59cbe1f3a902bf6493d0308d826672aaa49b2"
Commit
62a83f80
authored
Apr 26, 2010
by
Peter Eastman
Browse files
Optimized OpenCL version of CCMA
parent
e1a9418d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
25 additions
and
56 deletions
+25
-56
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+13
-17
platforms/opencl/src/kernels/ccma.cl
platforms/opencl/src/kernels/ccma.cl
+12
-39
No files found.
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
View file @
62a83f80
...
@@ -450,7 +450,7 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
...
@@ -450,7 +450,7 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
ccmaNumAtomConstraints
=
new
OpenCLArray
<
cl_int
>
(
context
,
numAtoms
,
"CcmaAtomConstraintsIndex"
);
ccmaNumAtomConstraints
=
new
OpenCLArray
<
cl_int
>
(
context
,
numAtoms
,
"CcmaAtomConstraintsIndex"
);
ccmaDelta1
=
new
OpenCLArray
<
cl_float
>
(
context
,
numCCMA
,
"CcmaDelta1"
);
ccmaDelta1
=
new
OpenCLArray
<
cl_float
>
(
context
,
numCCMA
,
"CcmaDelta1"
);
ccmaDelta2
=
new
OpenCLArray
<
cl_float
>
(
context
,
numCCMA
,
"CcmaDelta2"
);
ccmaDelta2
=
new
OpenCLArray
<
cl_float
>
(
context
,
numCCMA
,
"CcmaDelta2"
);
ccmaConverged
=
new
OpenCLArray
<
cl_int
>
(
context
,
context
.
getNumThreadBlocks
()
,
"CcmaConverged"
);
ccmaConverged
=
new
OpenCLArray
<
cl_int
>
(
context
,
1
,
"CcmaConverged"
,
true
);
ccmaReducedMass
=
new
OpenCLArray
<
cl_float
>
(
context
,
numCCMA
,
"CcmaReducedMass"
);
ccmaReducedMass
=
new
OpenCLArray
<
cl_float
>
(
context
,
numCCMA
,
"CcmaReducedMass"
);
ccmaConstraintMatrixColumn
=
new
OpenCLArray
<
cl_int
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixColumn"
);
ccmaConstraintMatrixColumn
=
new
OpenCLArray
<
cl_int
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixColumn"
);
ccmaConstraintMatrixValue
=
new
OpenCLArray
<
cl_float
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixValue"
);
ccmaConstraintMatrixValue
=
new
OpenCLArray
<
cl_float
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixValue"
);
...
@@ -572,20 +572,17 @@ void OpenCLIntegrationUtilities::applyConstraints(double tol) {
...
@@ -572,20 +572,17 @@ void OpenCLIntegrationUtilities::applyConstraints(double tol) {
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaAtoms
->
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaAtoms
->
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDistance
->
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDistance
->
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getPosq
().
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getPosq
().
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
ccmaConverged
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaAtoms
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaAtoms
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDistance
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDistance
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
posDelta
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
posDelta
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
ccmaReducedMass
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
ccmaReducedMass
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
ccmaDelta1
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
ccmaDelta1
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
ccmaConverged
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
ccmaConverged
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
(
6
,
sizeof
(
cl_int
)
*
OpenCLContext
::
ThreadBlockSize
,
NULL
);
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaDelta1
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaDelta1
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDelta2
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDelta2
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
ccmaConstraintMatrixColumn
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
ccmaConstraintMatrixColumn
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
ccmaConstraintMatrixValue
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
ccmaConstraintMatrixValue
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
ccmaConverged
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
ccmaConverged
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
(
5
,
sizeof
(
cl_int
)
*
OpenCLContext
::
ThreadBlockSize
,
NULL
);
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaNumAtomConstraints
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaNumAtomConstraints
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaAtomConstraints
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaAtomConstraints
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
ccmaDistance
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
ccmaDistance
->
getDeviceBuffer
());
...
@@ -595,22 +592,21 @@ void OpenCLIntegrationUtilities::applyConstraints(double tol) {
...
@@ -595,22 +592,21 @@ void OpenCLIntegrationUtilities::applyConstraints(double tol) {
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
ccmaDelta2
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
ccmaDelta2
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
ccmaConverged
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
ccmaConverged
->
getDeviceBuffer
());
}
}
ccmaForceKernel
.
setArg
<
cl_float
>
(
7
,
(
cl_float
)
tol
);
ccmaForceKernel
.
setArg
<
cl_float
>
(
6
,
(
cl_float
)
tol
);
context
.
executeKernel
(
ccmaDirectionsKernel
,
ccmaAtoms
->
getSize
());
context
.
executeKernel
(
ccmaDirectionsKernel
,
ccmaAtoms
->
getSize
());
vector
<
cl_int
>
converged
;
const
int
checkInterval
=
3
;
for
(
int
i
=
0
;
i
<
75
;
i
++
)
{
for
(
int
i
=
0
;
i
<
150
;
i
++
)
{
// To reduce the overhead of checking for convergence, perform two iterations
if
((
i
+
1
)
%
checkInterval
==
0
)
{
// each time through the loop.
(
*
ccmaConverged
)[
0
]
=
1
;
ccmaConverged
->
upload
();
context
.
executeKernel
(
ccmaForceKernel
,
ccmaAtoms
->
getSize
());
}
context
.
executeKernel
(
ccmaMultiplyKernel
,
ccmaAtoms
->
getSize
());
ccmaUpdateKernel
.
setArg
<
cl_int
>
(
8
,
i
);
context
.
executeKernel
(
ccmaUpdateKernel
,
context
.
getNumAtoms
());
context
.
executeKernel
(
ccmaForceKernel
,
ccmaAtoms
->
getSize
());
context
.
executeKernel
(
ccmaForceKernel
,
ccmaAtoms
->
getSize
());
if
((
i
+
1
)
%
checkInterval
==
0
)
{
ccmaConverged
->
download
();
if
((
*
ccmaConverged
)[
0
])
break
;
}
context
.
executeKernel
(
ccmaMultiplyKernel
,
ccmaAtoms
->
getSize
());
context
.
executeKernel
(
ccmaMultiplyKernel
,
ccmaAtoms
->
getSize
());
ccmaConverged
->
download
(
converged
);
if
(
converged
[
0
])
break
;
ccmaUpdateKernel
.
setArg
<
cl_int
>
(
8
,
i
);
ccmaUpdateKernel
.
setArg
<
cl_int
>
(
8
,
i
);
context
.
executeKernel
(
ccmaUpdateKernel
,
context
.
getNumAtoms
());
context
.
executeKernel
(
ccmaUpdateKernel
,
context
.
getNumAtoms
());
}
}
...
...
platforms/opencl/src/kernels/ccma.cl
View file @
62a83f80
/**
/**
*
Compute
the
direction
each
constraint
is
pointing
in.
This
is
called
once
at
the
beginning
of
constraint
evaluation.
*
Compute
the
direction
each
constraint
is
pointing
in.
This
is
called
once
at
the
beginning
of
constraint
evaluation.
*/
*/
__kernel
void
computeConstraintDirections
(
__global
int2*
constraintAtoms,
__global
float4*
constraintDistance,
__global
float4*
atomPositions,
__kernel
void
computeConstraintDirections
(
__global
int2*
constraintAtoms,
__global
float4*
constraintDistance,
__global
float4*
atomPositions
)
{
__global
int*
converged
)
{
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_CONSTRAINTS; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_CONSTRAINTS; index += get_global_size(0)) {
//
Compute
the
direction
for
this
constraint.
//
Compute
the
direction
for
this
constraint.
...
@@ -15,23 +14,19 @@ __kernel void computeConstraintDirections(__global int2* constraintAtoms, __glob
...
@@ -15,23 +14,19 @@ __kernel void computeConstraintDirections(__global int2* constraintAtoms, __glob
dir.z
=
oldPos1.z-oldPos2.z
;
dir.z
=
oldPos1.z-oldPos2.z
;
constraintDistance[index]
=
dir
;
constraintDistance[index]
=
dir
;
}
}
//
Mark
that
no
work
groups
have
converged
yet.
for
(
int
index
=
get_global_id
(
0
)
; index < get_num_groups(0); index += get_global_size(0))
converged[index]
=
false
;
}
}
/**
/**
*
Compute
the
force
applied
by
each
constraint.
*
Compute
the
force
applied
by
each
constraint.
*/
*/
__kernel
void
computeConstraintForce
(
__global
int2*
constraintAtoms,
__global
float4*
constraintDistance,
__global
float4*
atomPositions,
__kernel
void
computeConstraintForce
(
__global
int2*
constraintAtoms,
__global
float4*
constraintDistance,
__global
float4*
atomPositions,
__global
float*
reducedMass,
__global
float*
delta1,
__global
int*
converged,
__local
int*
convergedBuffer,
float
tol
)
{
__global
float*
reducedMass,
__global
float*
delta1,
__global
int*
converged,
float
tol
)
{
if
(
converged[get_group_id
(
0
)
]
)
__local
int
groupConverged
;
return
; // The constraint iteration has already converged.
if
(
get_local_id
(
0
)
==
0
)
groupConverged
=
1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
float
lowerTol
=
1.0f-2.0f*tol+tol*tol
;
float
lowerTol
=
1.0f-2.0f*tol+tol*tol
;
float
upperTol
=
1.0f+2.0f*tol+tol*tol
;
float
upperTol
=
1.0f+2.0f*tol+tol*tol
;
int
threadConverged
=
1
;
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_CONSTRAINTS; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_CONSTRAINTS; index += get_global_size(0)) {
//
Compute
the
force
due
to
this
constraint.
//
Compute
the
force
due
to
this
constraint.
...
@@ -48,40 +43,18 @@ __kernel void computeConstraintForce(__global int2* constraintAtoms, __global fl
...
@@ -48,40 +43,18 @@ __kernel void computeConstraintForce(__global int2* constraintAtoms, __global fl
//
See
whether
it
has
converged.
//
See
whether
it
has
converged.
threadConverged
&=
(
rp2
>=
lowerTol*dist2
&&
rp2
<=
upperTol*dist2
)
;
if
(
groupConverged
&&
(
rp2
<
lowerTol*dist2
||
rp2
>
upperTol*dist2
))
{
}
groupConverged
=
0
;
converged[0]
=
0
;
//
Perform
a
parallel
reduction
to
see
if
all
constraints
handled
by
this
work
group
have
converged.
}
convergedBuffer[get_local_id
(
0
)
]
=
threadConverged
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
step
=
1
; step < get_local_size(0); step *= 2) {
if
(
get_local_id
(
0
)
%
(
2*step
)
==
0
)
convergedBuffer[get_local_id
(
0
)
]
&=
convergedBuffer[get_local_id
(
0
)
+step]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
if
(
get_local_id
(
0
)
==
0
)
converged[get_group_id
(
0
)
]
=
convergedBuffer[0]
;
}
}
/**
/**
*
Multiply
the
vector
of
constraint
forces
by
the
constraint
matrix.
*
Multiply
the
vector
of
constraint
forces
by
the
constraint
matrix.
*/
*/
__kernel
void
multiplyByConstraintMatrix
(
__global
float*
delta1,
__global
float*
delta2,
__global
int*
constraintMatrixColumn,
__kernel
void
multiplyByConstraintMatrix
(
__global
float*
delta1,
__global
float*
delta2,
__global
int*
constraintMatrixColumn,
__global
float*
constraintMatrixValue,
__global
int*
converged,
__local
int*
convergedBuffer
)
{
__global
float*
constraintMatrixValue,
__global
int*
converged
)
{
//
First
see
whether
all
work
groups
have
converged.
convergedBuffer[get_local_id
(
0
)
]
=
true
;
for
(
int
index
=
get_local_id
(
0
)
; index < get_num_groups(0); index += get_local_size(0))
convergedBuffer[get_local_id
(
0
)
]
&=
converged[index]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
step
=
1
; step < get_local_size(0); step *= 2) {
if
(
get_local_id
(
0
)
%
(
2*step
)
==
0
)
convergedBuffer[get_local_id
(
0
)
]
&=
convergedBuffer[get_local_id
(
0
)
+step]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
get_local_id
(
0
)
==
0
)
converged[get_group_id
(
0
)
]
=
convergedBuffer[0]
;
if
(
converged[0]
)
if
(
converged[0]
)
return
; // The constraint iteration has already converged.
return
; // The constraint iteration has already converged.
...
@@ -105,7 +78,7 @@ __kernel void multiplyByConstraintMatrix(__global float* delta1, __global float*
...
@@ -105,7 +78,7 @@ __kernel void multiplyByConstraintMatrix(__global float* delta1, __global float*
*/
*/
__kernel
void
updateAtomPositions
(
__global
int*
numAtomConstraints,
__global
int*
atomConstraints,
__global
float4*
constraintDistance,
__kernel
void
updateAtomPositions
(
__global
int*
numAtomConstraints,
__global
int*
atomConstraints,
__global
float4*
constraintDistance,
__global
float4*
atomPositions,
__global
float4*
velm,
__global
float*
delta1,
__global
float*
delta2,
__global
int*
converged,
int
iteration
)
{
__global
float4*
atomPositions,
__global
float4*
velm,
__global
float*
delta1,
__global
float*
delta2,
__global
int*
converged,
int
iteration
)
{
if
(
converged[
get_group_id
(
0
)
]
)
if
(
converged[
0
]
)
return
; // The constraint iteration has already converged.
return
; // The constraint iteration has already converged.
float
damping
=
(
iteration
<
2
?
0.5f
:
1.0f
)
;
float
damping
=
(
iteration
<
2
?
0.5f
:
1.0f
)
;
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment