Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c0a43bfc
Commit
c0a43bfc
authored
Apr 05, 2013
by
Peter Eastman
Browse files
Improved performance of CCMA
parent
022a0190
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
55 additions
and
24 deletions
+55
-24
platforms/cuda/src/CudaIntegrationUtilities.cpp
platforms/cuda/src/CudaIntegrationUtilities.cpp
+9
-7
platforms/cuda/src/CudaIntegrationUtilities.h
platforms/cuda/src/CudaIntegrationUtilities.h
+2
-0
platforms/cuda/src/kernels/integrationUtilities.cu
platforms/cuda/src/kernels/integrationUtilities.cu
+8
-4
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+28
-9
platforms/opencl/src/OpenCLIntegrationUtilities.h
platforms/opencl/src/OpenCLIntegrationUtilities.h
+4
-2
platforms/opencl/src/kernels/ccma.cl
platforms/opencl/src/kernels/ccma.cl
+4
-2
No files found.
platforms/cuda/src/CudaIntegrationUtilities.cpp
View file @
c0a43bfc
...
...
@@ -99,7 +99,7 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
posDelta
(
NULL
),
settleAtoms
(
NULL
),
settleParams
(
NULL
),
shakeAtoms
(
NULL
),
shakeParams
(
NULL
),
random
(
NULL
),
randomSeed
(
NULL
),
randomPos
(
0
),
stepSize
(
NULL
),
ccmaAtoms
(
NULL
),
ccmaDistance
(
NULL
),
ccmaReducedMass
(
NULL
),
ccmaAtomConstraints
(
NULL
),
ccmaNumAtomConstraints
(
NULL
),
ccmaConstraintMatrixColumn
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConvergedMemory
(
NULL
),
vsite2AvgAtoms
(
NULL
),
vsite2AvgWeights
(
NULL
),
vsite3AvgAtoms
(
NULL
),
vsite3AvgWeights
(
NULL
),
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
)
{
// Create workspace arrays.
...
...
@@ -468,6 +468,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
ccmaNumAtomConstraints
=
CudaArray
::
create
<
int
>
(
context
,
numAtoms
,
"CcmaAtomConstraintsIndex"
);
ccmaConstraintMatrixColumn
=
CudaArray
::
create
<
int
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixColumn"
);
ccmaConverged
=
CudaArray
::
create
<
int
>
(
context
,
2
,
"ccmaConverged"
);
CHECK_RESULT2
(
cuMemHostAlloc
((
void
**
)
&
ccmaConvergedMemory
,
sizeof
(
int
),
CU_MEMHOSTALLOC_DEVICEMAP
),
"Error allocating pinned memory"
);
CHECK_RESULT2
(
cuMemHostGetDevicePointer
(
&
ccmaConvergedDeviceMemory
,
ccmaConvergedMemory
,
0
),
"Error getting device address for pinned memory"
);
vector
<
int2
>
atomsVec
(
ccmaAtoms
->
getSize
());
vector
<
int
>
atomConstraintsVec
(
ccmaAtomConstraints
->
getSize
());
vector
<
int
>
numAtomConstraintsVec
(
ccmaNumAtomConstraints
->
getSize
());
...
...
@@ -681,6 +683,8 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() {
delete
ccmaDelta2
;
if
(
ccmaConverged
!=
NULL
)
delete
ccmaConverged
;
if
(
ccmaConvergedMemory
!=
NULL
)
cuMemFreeHost
(
ccmaConvergedMemory
);
if
(
vsite2AvgAtoms
!=
NULL
)
delete
vsite2AvgAtoms
;
if
(
vsite2AvgWeights
!=
NULL
)
...
...
@@ -739,7 +743,7 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
void
*
forceArgs
[]
=
{
&
ccmaAtoms
->
getDevicePointer
(),
&
ccmaDistance
->
getDevicePointer
(),
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
->
getDevicePointer
(),
&
ccmaReducedMass
->
getDevicePointer
(),
&
ccmaDelta1
->
getDevicePointer
(),
&
ccmaConverged
->
getDevicePointer
(),
tolPointer
,
&
i
};
&
ccmaConvergedDeviceMemory
,
tolPointer
,
&
i
};
void
*
multiplyArgs
[]
=
{
&
ccmaDelta1
->
getDevicePointer
(),
&
ccmaDelta2
->
getDevicePointer
(),
&
ccmaConstraintMatrixColumn
->
getDevicePointer
(),
&
ccmaConstraintMatrixValue
->
getDevicePointer
(),
&
ccmaConverged
->
getDevicePointer
(),
&
i
};
void
*
updateArgs
[]
=
{
&
ccmaNumAtomConstraints
->
getDevicePointer
(),
&
ccmaAtomConstraints
->
getDevicePointer
(),
&
ccmaDistance
->
getDevicePointer
(),
...
...
@@ -747,18 +751,16 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
&
context
.
getVelm
().
getDevicePointer
(),
&
ccmaDelta1
->
getDevicePointer
(),
&
ccmaDelta2
->
getDevicePointer
(),
&
ccmaConverged
->
getDevicePointer
(),
&
i
};
const
int
checkInterval
=
4
;
int
*
c
onverged
=
(
int
*
)
context
.
getPinnedBuffer
()
;
ccmaC
onverged
Memory
[
0
]
=
0
;
for
(
i
=
0
;
i
<
150
;
i
++
)
{
context
.
executeKernel
(
ccmaForceKernel
,
forceArgs
,
ccmaAtoms
->
getSize
());
if
((
i
+
1
)
%
checkInterval
==
0
)
{
ccmaConverged
->
download
(
converged
,
false
);
if
((
i
+
1
)
%
checkInterval
==
0
)
CHECK_RESULT2
(
cuEventRecord
(
ccmaEvent
,
0
),
"Error recording event for CCMA"
);
}
context
.
executeKernel
(
ccmaMultiplyKernel
,
multiplyArgs
,
ccmaAtoms
->
getSize
());
context
.
executeKernel
(
ccmaUpdateKernel
,
updateArgs
,
context
.
getNumAtoms
());
if
((
i
+
1
)
%
checkInterval
==
0
)
{
CHECK_RESULT2
(
cuEventSynchronize
(
ccmaEvent
),
"Error synchronizing on event for CCMA"
);
if
(
converged
[
i
%
2
])
if
(
c
cmaC
onverged
Memory
[
0
])
break
;
}
}
...
...
platforms/cuda/src/CudaIntegrationUtilities.h
View file @
c0a43bfc
...
...
@@ -141,6 +141,8 @@ private:
CudaArray
*
ccmaDelta1
;
CudaArray
*
ccmaDelta2
;
CudaArray
*
ccmaConverged
;
int
*
ccmaConvergedMemory
;
CUdeviceptr
ccmaConvergedDeviceMemory
;
CUevent
ccmaEvent
;
CudaArray
*
vsite2AvgAtoms
;
CudaArray
*
vsite2AvgWeights
;
...
...
platforms/cuda/src/kernels/integrationUtilities.cu
View file @
c0a43bfc
...
...
@@ -598,11 +598,13 @@ extern "C" __global__ void computeCCMAConstraintDirections(const int2* __restric
* Compute the force applied by each CCMA position constraint.
*/
extern
"C"
__global__
void
computeCCMAPositionConstraintForce
(
const
int2
*
__restrict__
constraintAtoms
,
const
mixed4
*
__restrict__
constraintDistance
,
const
mixed4
*
__restrict__
atomPositions
,
const
mixed
*
__restrict__
reducedMass
,
mixed
*
__restrict__
delta1
,
int
*
__restrict__
converged
,
mixed
tol
,
int
iteration
)
{
const
mixed
*
__restrict__
reducedMass
,
mixed
*
__restrict__
delta1
,
int
*
__restrict__
converged
,
int
*
__restrict__
hostConvergedFlag
,
mixed
tol
,
int
iteration
)
{
__shared__
int
groupConverged
;
if
(
converged
[
1
-
iteration
%
2
])
{
if
(
blockIdx
.
x
==
0
&&
threadIdx
.
x
==
0
)
if
(
blockIdx
.
x
==
0
&&
threadIdx
.
x
==
0
)
{
converged
[
iteration
%
2
]
=
1
;
hostConvergedFlag
[
0
]
=
1
;
}
return
;
// The constraint iteration has already converged.
}
if
(
threadIdx
.
x
==
0
)
...
...
@@ -639,11 +641,13 @@ extern "C" __global__ void computeCCMAPositionConstraintForce(const int2* __rest
* Compute the force applied by each CCMA velocity constraint.
*/
extern
"C"
__global__
void
computeCCMAVelocityConstraintForce
(
const
int2
*
__restrict__
constraintAtoms
,
const
mixed4
*
__restrict__
constraintDistance
,
const
mixed4
*
__restrict__
atomPositions
,
const
mixed
*
__restrict__
reducedMass
,
mixed
*
__restrict__
delta1
,
int
*
__restrict__
converged
,
mixed
tol
,
int
iteration
)
{
const
mixed
*
__restrict__
reducedMass
,
mixed
*
__restrict__
delta1
,
int
*
__restrict__
converged
,
int
*
__restrict__
hostConvergedFlag
,
mixed
tol
,
int
iteration
)
{
__shared__
int
groupConverged
;
if
(
converged
[
1
-
iteration
%
2
])
{
if
(
blockIdx
.
x
==
0
&&
threadIdx
.
x
==
0
)
if
(
blockIdx
.
x
==
0
&&
threadIdx
.
x
==
0
)
{
converged
[
iteration
%
2
]
=
1
;
hostConvergedFlag
[
0
]
=
1
;
}
return
;
// The constraint iteration has already converged.
}
if
(
threadIdx
.
x
==
0
)
...
...
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
View file @
c0a43bfc
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors.
*
* Portions copyright (c) 2009
-2013
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -98,7 +98,7 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
posDelta
(
NULL
),
settleAtoms
(
NULL
),
settleParams
(
NULL
),
shakeAtoms
(
NULL
),
shakeParams
(
NULL
),
random
(
NULL
),
randomSeed
(
NULL
),
randomPos
(
0
),
stepSize
(
NULL
),
ccmaAtoms
(
NULL
),
ccmaDistance
(
NULL
),
ccmaReducedMass
(
NULL
),
ccmaAtomConstraints
(
NULL
),
ccmaNumAtomConstraints
(
NULL
),
ccmaConstraintMatrixColumn
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConvergedHostBuffer
(
NULL
),
vsite2AvgAtoms
(
NULL
),
vsite2AvgWeights
(
NULL
),
vsite3AvgAtoms
(
NULL
),
vsite3AvgWeights
(
NULL
),
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
),
hasInitializedPosConstraintKernels
(
false
),
hasInitializedVelConstraintKernels
(
false
)
{
// Create workspace arrays.
...
...
@@ -479,6 +479,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
ccmaNumAtomConstraints
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
numAtoms
,
"CcmaAtomConstraintsIndex"
);
ccmaConstraintMatrixColumn
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixColumn"
);
ccmaConverged
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
2
,
"CcmaConverged"
);
ccmaConvergedHostBuffer
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
1
,
"CcmaConvergedHostBuffer"
,
CL_MEM_WRITE_ONLY
|
CL_MEM_ALLOC_HOST_PTR
);
ccmaConvergedHostMemory
=
(
int
*
)
context
.
getQueue
().
enqueueMapBuffer
(
ccmaConvergedHostBuffer
->
getDeviceBuffer
(),
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
sizeof
(
cl_int
));
// Different communication mechanisms give optimal performance on AMD and on NVIDIA.
string
vendor
=
context
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
ccmaUseDirectBuffer
=
(
vendor
.
size
()
>=
28
&&
vendor
.
substr
(
0
,
28
)
==
"Advanced Micro Devices, Inc."
);
vector
<
mm_int2
>
atomsVec
(
ccmaAtoms
->
getSize
());
vector
<
cl_int
>
atomConstraintsVec
(
ccmaAtomConstraints
->
getSize
());
vector
<
cl_int
>
numAtomConstraintsVec
(
ccmaNumAtomConstraints
->
getSize
());
...
...
@@ -720,6 +725,8 @@ OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() {
delete
ccmaDelta2
;
if
(
ccmaConverged
!=
NULL
)
delete
ccmaConverged
;
if
(
ccmaConvergedHostBuffer
!=
NULL
)
delete
ccmaConvergedHostBuffer
;
if
(
vsite2AvgAtoms
!=
NULL
)
delete
vsite2AvgAtoms
;
if
(
vsite2AvgWeights
!=
NULL
)
...
...
@@ -814,6 +821,7 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
ccmaReducedMass
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
ccmaDelta1
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
ccmaConverged
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
ccmaConvergedHostBuffer
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaDelta1
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDelta2
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
ccmaConstraintMatrixColumn
->
getDeviceBuffer
());
...
...
@@ -829,26 +837,37 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
ccmaConverged
->
getDeviceBuffer
());
}
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
ccmaForceKernel
.
setArg
<
cl_double
>
(
6
,
(
cl_double
)
tol
);
ccmaForceKernel
.
setArg
<
cl_double
>
(
7
,
(
cl_double
)
tol
);
else
ccmaForceKernel
.
setArg
<
cl_float
>
(
6
,
(
cl_float
)
tol
);
ccmaForceKernel
.
setArg
<
cl_float
>
(
7
,
(
cl_float
)
tol
);
context
.
executeKernel
(
ccmaDirectionsKernel
,
ccmaAtoms
->
getSize
());
const
int
checkInterval
=
4
;
cl
::
Event
event
;
int
*
converged
=
(
int
*
)
context
.
getPinnedBuffer
();
ccmaConvergedHostMemory
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
150
;
i
++
)
{
ccmaForceKernel
.
setArg
<
cl_int
>
(
7
,
i
);
ccmaForceKernel
.
setArg
<
cl_int
>
(
8
,
i
);
context
.
executeKernel
(
ccmaForceKernel
,
ccmaAtoms
->
getSize
());
if
((
i
+
1
)
%
checkInterval
==
0
)
context
.
getQueue
().
enqueueReadBuffer
(
ccmaConverged
->
getDeviceBuffer
(),
CL_FALSE
,
0
,
2
*
sizeof
(
cl_int
),
converged
,
NULL
,
&
event
);
if
((
i
+
1
)
%
checkInterval
==
0
)
{
if
(
ccmaUseDirectBuffer
)
context
.
getQueue
().
enqueueMarker
(
&
event
);
else
context
.
getQueue
().
enqueueReadBuffer
(
ccmaConverged
->
getDeviceBuffer
(),
CL_FALSE
,
0
,
2
*
sizeof
(
cl_int
),
converged
,
NULL
,
&
event
);
}
ccmaMultiplyKernel
.
setArg
<
cl_int
>
(
5
,
i
);
context
.
executeKernel
(
ccmaMultiplyKernel
,
ccmaAtoms
->
getSize
());
ccmaUpdateKernel
.
setArg
<
cl_int
>
(
8
,
i
);
context
.
executeKernel
(
ccmaUpdateKernel
,
context
.
getNumAtoms
());
if
((
i
+
1
)
%
checkInterval
==
0
)
{
event
.
wait
();
if
(
converged
[
i
%
2
])
break
;
if
(
ccmaUseDirectBuffer
)
{
if
(
ccmaConvergedHostMemory
[
0
])
break
;
}
else
{
if
(
converged
[
i
%
2
])
break
;
}
}
}
}
...
...
platforms/opencl/src/OpenCLIntegrationUtilities.h
View file @
c0a43bfc
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors.
*
* Portions copyright (c) 2009
-2013
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -141,6 +141,8 @@ private:
OpenCLArray
*
ccmaDelta1
;
OpenCLArray
*
ccmaDelta2
;
OpenCLArray
*
ccmaConverged
;
int
*
ccmaConvergedHostMemory
;
OpenCLArray
*
ccmaConvergedHostBuffer
;
OpenCLArray
*
vsite2AvgAtoms
;
OpenCLArray
*
vsite2AvgWeights
;
OpenCLArray
*
vsite3AvgAtoms
;
...
...
@@ -149,7 +151,7 @@ private:
OpenCLArray
*
vsiteOutOfPlaneWeights
;
int
randomPos
;
int
lastSeed
,
numVsites
;
bool
hasInitializedPosConstraintKernels
,
hasInitializedVelConstraintKernels
;
bool
hasInitializedPosConstraintKernels
,
hasInitializedVelConstraintKernels
,
ccmaUseDirectBuffer
;
struct
ShakeCluster
;
struct
ConstraintOrderer
;
};
...
...
platforms/opencl/src/kernels/ccma.cl
View file @
c0a43bfc
...
...
@@ -34,11 +34,13 @@ __kernel void computeConstraintDirections(__global const int2* restrict constrai
*
Compute
the
force
applied
by
each
constraint.
*/
__kernel
void
computeConstraintForce
(
__global
const
int2*
restrict
constraintAtoms,
__global
const
mixed4*
restrict
constraintDistance,
__global
const
mixed4*
restrict
atomPositions,
__global
const
mixed*
restrict
reducedMass,
__global
mixed*
restrict
delta1,
__global
int*
restrict
converged,
mixed
tol,
int
iteration
)
{
__global
const
mixed*
restrict
reducedMass,
__global
mixed*
restrict
delta1,
__global
int*
restrict
converged,
__global
int*
restrict
hostConvergedFlag,
mixed
tol,
int
iteration
)
{
__local
int
groupConverged
;
if
(
converged[1-iteration%2]
)
{
if
(
get_global_id
(
0
)
==
0
)
if
(
get_global_id
(
0
)
==
0
)
{
converged[iteration%2]
=
1
;
hostConvergedFlag[0]
=
1
;
}
return
; // The constraint iteration has already converged.
}
if
(
get_local_id
(
0
)
==
0
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment