Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c0a43bfc
"platforms/cuda/include/CudaQueue.h" did not exist on "05198df6b07d5f7f543fde46212c07147f1c3b11"
Commit
c0a43bfc
authored
Apr 05, 2013
by
Peter Eastman
Browse files
Improved performance of CCMA
parent
022a0190
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
55 additions
and
24 deletions
+55
-24
platforms/cuda/src/CudaIntegrationUtilities.cpp
platforms/cuda/src/CudaIntegrationUtilities.cpp
+9
-7
platforms/cuda/src/CudaIntegrationUtilities.h
platforms/cuda/src/CudaIntegrationUtilities.h
+2
-0
platforms/cuda/src/kernels/integrationUtilities.cu
platforms/cuda/src/kernels/integrationUtilities.cu
+8
-4
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+28
-9
platforms/opencl/src/OpenCLIntegrationUtilities.h
platforms/opencl/src/OpenCLIntegrationUtilities.h
+4
-2
platforms/opencl/src/kernels/ccma.cl
platforms/opencl/src/kernels/ccma.cl
+4
-2
No files found.
platforms/cuda/src/CudaIntegrationUtilities.cpp
View file @
c0a43bfc
...
...
@@ -99,7 +99,7 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
posDelta
(
NULL
),
settleAtoms
(
NULL
),
settleParams
(
NULL
),
shakeAtoms
(
NULL
),
shakeParams
(
NULL
),
random
(
NULL
),
randomSeed
(
NULL
),
randomPos
(
0
),
stepSize
(
NULL
),
ccmaAtoms
(
NULL
),
ccmaDistance
(
NULL
),
ccmaReducedMass
(
NULL
),
ccmaAtomConstraints
(
NULL
),
ccmaNumAtomConstraints
(
NULL
),
ccmaConstraintMatrixColumn
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConvergedMemory
(
NULL
),
vsite2AvgAtoms
(
NULL
),
vsite2AvgWeights
(
NULL
),
vsite3AvgAtoms
(
NULL
),
vsite3AvgWeights
(
NULL
),
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
)
{
// Create workspace arrays.
...
...
@@ -468,6 +468,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
ccmaNumAtomConstraints
=
CudaArray
::
create
<
int
>
(
context
,
numAtoms
,
"CcmaAtomConstraintsIndex"
);
ccmaConstraintMatrixColumn
=
CudaArray
::
create
<
int
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixColumn"
);
ccmaConverged
=
CudaArray
::
create
<
int
>
(
context
,
2
,
"ccmaConverged"
);
CHECK_RESULT2
(
cuMemHostAlloc
((
void
**
)
&
ccmaConvergedMemory
,
sizeof
(
int
),
CU_MEMHOSTALLOC_DEVICEMAP
),
"Error allocating pinned memory"
);
CHECK_RESULT2
(
cuMemHostGetDevicePointer
(
&
ccmaConvergedDeviceMemory
,
ccmaConvergedMemory
,
0
),
"Error getting device address for pinned memory"
);
vector
<
int2
>
atomsVec
(
ccmaAtoms
->
getSize
());
vector
<
int
>
atomConstraintsVec
(
ccmaAtomConstraints
->
getSize
());
vector
<
int
>
numAtomConstraintsVec
(
ccmaNumAtomConstraints
->
getSize
());
...
...
@@ -681,6 +683,8 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() {
delete
ccmaDelta2
;
if
(
ccmaConverged
!=
NULL
)
delete
ccmaConverged
;
if
(
ccmaConvergedMemory
!=
NULL
)
cuMemFreeHost
(
ccmaConvergedMemory
);
if
(
vsite2AvgAtoms
!=
NULL
)
delete
vsite2AvgAtoms
;
if
(
vsite2AvgWeights
!=
NULL
)
...
...
@@ -739,7 +743,7 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
void
*
forceArgs
[]
=
{
&
ccmaAtoms
->
getDevicePointer
(),
&
ccmaDistance
->
getDevicePointer
(),
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
->
getDevicePointer
(),
&
ccmaReducedMass
->
getDevicePointer
(),
&
ccmaDelta1
->
getDevicePointer
(),
&
ccmaConverged
->
getDevicePointer
(),
tolPointer
,
&
i
};
&
ccmaConvergedDeviceMemory
,
tolPointer
,
&
i
};
void
*
multiplyArgs
[]
=
{
&
ccmaDelta1
->
getDevicePointer
(),
&
ccmaDelta2
->
getDevicePointer
(),
&
ccmaConstraintMatrixColumn
->
getDevicePointer
(),
&
ccmaConstraintMatrixValue
->
getDevicePointer
(),
&
ccmaConverged
->
getDevicePointer
(),
&
i
};
void
*
updateArgs
[]
=
{
&
ccmaNumAtomConstraints
->
getDevicePointer
(),
&
ccmaAtomConstraints
->
getDevicePointer
(),
&
ccmaDistance
->
getDevicePointer
(),
...
...
@@ -747,18 +751,16 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
&
context
.
getVelm
().
getDevicePointer
(),
&
ccmaDelta1
->
getDevicePointer
(),
&
ccmaDelta2
->
getDevicePointer
(),
&
ccmaConverged
->
getDevicePointer
(),
&
i
};
const
int
checkInterval
=
4
;
int
*
c
onverged
=
(
int
*
)
context
.
getPinnedBuffer
()
;
ccmaC
onverged
Memory
[
0
]
=
0
;
for
(
i
=
0
;
i
<
150
;
i
++
)
{
context
.
executeKernel
(
ccmaForceKernel
,
forceArgs
,
ccmaAtoms
->
getSize
());
if
((
i
+
1
)
%
checkInterval
==
0
)
{
ccmaConverged
->
download
(
converged
,
false
);
if
((
i
+
1
)
%
checkInterval
==
0
)
CHECK_RESULT2
(
cuEventRecord
(
ccmaEvent
,
0
),
"Error recording event for CCMA"
);
}
context
.
executeKernel
(
ccmaMultiplyKernel
,
multiplyArgs
,
ccmaAtoms
->
getSize
());
context
.
executeKernel
(
ccmaUpdateKernel
,
updateArgs
,
context
.
getNumAtoms
());
if
((
i
+
1
)
%
checkInterval
==
0
)
{
CHECK_RESULT2
(
cuEventSynchronize
(
ccmaEvent
),
"Error synchronizing on event for CCMA"
);
if
(
converged
[
i
%
2
])
if
(
c
cmaC
onverged
Memory
[
0
])
break
;
}
}
...
...
platforms/cuda/src/CudaIntegrationUtilities.h
View file @
c0a43bfc
...
...
@@ -141,6 +141,8 @@ private:
CudaArray
*
ccmaDelta1
;
CudaArray
*
ccmaDelta2
;
CudaArray
*
ccmaConverged
;
int
*
ccmaConvergedMemory
;
CUdeviceptr
ccmaConvergedDeviceMemory
;
CUevent
ccmaEvent
;
CudaArray
*
vsite2AvgAtoms
;
CudaArray
*
vsite2AvgWeights
;
...
...
platforms/cuda/src/kernels/integrationUtilities.cu
View file @
c0a43bfc
...
...
@@ -598,11 +598,13 @@ extern "C" __global__ void computeCCMAConstraintDirections(const int2* __restric
* Compute the force applied by each CCMA position constraint.
*/
extern
"C"
__global__
void
computeCCMAPositionConstraintForce
(
const
int2
*
__restrict__
constraintAtoms
,
const
mixed4
*
__restrict__
constraintDistance
,
const
mixed4
*
__restrict__
atomPositions
,
const
mixed
*
__restrict__
reducedMass
,
mixed
*
__restrict__
delta1
,
int
*
__restrict__
converged
,
mixed
tol
,
int
iteration
)
{
const
mixed
*
__restrict__
reducedMass
,
mixed
*
__restrict__
delta1
,
int
*
__restrict__
converged
,
int
*
__restrict__
hostConvergedFlag
,
mixed
tol
,
int
iteration
)
{
__shared__
int
groupConverged
;
if
(
converged
[
1
-
iteration
%
2
])
{
if
(
blockIdx
.
x
==
0
&&
threadIdx
.
x
==
0
)
if
(
blockIdx
.
x
==
0
&&
threadIdx
.
x
==
0
)
{
converged
[
iteration
%
2
]
=
1
;
hostConvergedFlag
[
0
]
=
1
;
}
return
;
// The constraint iteration has already converged.
}
if
(
threadIdx
.
x
==
0
)
...
...
@@ -639,11 +641,13 @@ extern "C" __global__ void computeCCMAPositionConstraintForce(const int2* __rest
* Compute the force applied by each CCMA velocity constraint.
*/
extern
"C"
__global__
void
computeCCMAVelocityConstraintForce
(
const
int2
*
__restrict__
constraintAtoms
,
const
mixed4
*
__restrict__
constraintDistance
,
const
mixed4
*
__restrict__
atomPositions
,
const
mixed
*
__restrict__
reducedMass
,
mixed
*
__restrict__
delta1
,
int
*
__restrict__
converged
,
mixed
tol
,
int
iteration
)
{
const
mixed
*
__restrict__
reducedMass
,
mixed
*
__restrict__
delta1
,
int
*
__restrict__
converged
,
int
*
__restrict__
hostConvergedFlag
,
mixed
tol
,
int
iteration
)
{
__shared__
int
groupConverged
;
if
(
converged
[
1
-
iteration
%
2
])
{
if
(
blockIdx
.
x
==
0
&&
threadIdx
.
x
==
0
)
if
(
blockIdx
.
x
==
0
&&
threadIdx
.
x
==
0
)
{
converged
[
iteration
%
2
]
=
1
;
hostConvergedFlag
[
0
]
=
1
;
}
return
;
// The constraint iteration has already converged.
}
if
(
threadIdx
.
x
==
0
)
...
...
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
View file @
c0a43bfc
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors.
*
* Portions copyright (c) 2009
-2013
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -98,7 +98,7 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
posDelta
(
NULL
),
settleAtoms
(
NULL
),
settleParams
(
NULL
),
shakeAtoms
(
NULL
),
shakeParams
(
NULL
),
random
(
NULL
),
randomSeed
(
NULL
),
randomPos
(
0
),
stepSize
(
NULL
),
ccmaAtoms
(
NULL
),
ccmaDistance
(
NULL
),
ccmaReducedMass
(
NULL
),
ccmaAtomConstraints
(
NULL
),
ccmaNumAtomConstraints
(
NULL
),
ccmaConstraintMatrixColumn
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConvergedHostBuffer
(
NULL
),
vsite2AvgAtoms
(
NULL
),
vsite2AvgWeights
(
NULL
),
vsite3AvgAtoms
(
NULL
),
vsite3AvgWeights
(
NULL
),
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
),
hasInitializedPosConstraintKernels
(
false
),
hasInitializedVelConstraintKernels
(
false
)
{
// Create workspace arrays.
...
...
@@ -479,6 +479,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
ccmaNumAtomConstraints
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
numAtoms
,
"CcmaAtomConstraintsIndex"
);
ccmaConstraintMatrixColumn
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixColumn"
);
ccmaConverged
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
2
,
"CcmaConverged"
);
ccmaConvergedHostBuffer
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
1
,
"CcmaConvergedHostBuffer"
,
CL_MEM_WRITE_ONLY
|
CL_MEM_ALLOC_HOST_PTR
);
ccmaConvergedHostMemory
=
(
int
*
)
context
.
getQueue
().
enqueueMapBuffer
(
ccmaConvergedHostBuffer
->
getDeviceBuffer
(),
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
sizeof
(
cl_int
));
// Different communication mechanisms give optimal performance on AMD and on NVIDIA.
string
vendor
=
context
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
ccmaUseDirectBuffer
=
(
vendor
.
size
()
>=
28
&&
vendor
.
substr
(
0
,
28
)
==
"Advanced Micro Devices, Inc."
);
vector
<
mm_int2
>
atomsVec
(
ccmaAtoms
->
getSize
());
vector
<
cl_int
>
atomConstraintsVec
(
ccmaAtomConstraints
->
getSize
());
vector
<
cl_int
>
numAtomConstraintsVec
(
ccmaNumAtomConstraints
->
getSize
());
...
...
@@ -720,6 +725,8 @@ OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() {
delete
ccmaDelta2
;
if
(
ccmaConverged
!=
NULL
)
delete
ccmaConverged
;
if
(
ccmaConvergedHostBuffer
!=
NULL
)
delete
ccmaConvergedHostBuffer
;
if
(
vsite2AvgAtoms
!=
NULL
)
delete
vsite2AvgAtoms
;
if
(
vsite2AvgWeights
!=
NULL
)
...
...
@@ -814,6 +821,7 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
ccmaReducedMass
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
ccmaDelta1
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
ccmaConverged
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
ccmaConvergedHostBuffer
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaDelta1
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDelta2
->
getDeviceBuffer
());
ccmaMultiplyKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
ccmaConstraintMatrixColumn
->
getDeviceBuffer
());
...
...
@@ -829,29 +837,40 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
ccmaConverged
->
getDeviceBuffer
());
}
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
ccmaForceKernel
.
setArg
<
cl_double
>
(
6
,
(
cl_double
)
tol
);
ccmaForceKernel
.
setArg
<
cl_double
>
(
7
,
(
cl_double
)
tol
);
else
ccmaForceKernel
.
setArg
<
cl_float
>
(
6
,
(
cl_float
)
tol
);
ccmaForceKernel
.
setArg
<
cl_float
>
(
7
,
(
cl_float
)
tol
);
context
.
executeKernel
(
ccmaDirectionsKernel
,
ccmaAtoms
->
getSize
());
const
int
checkInterval
=
4
;
cl
::
Event
event
;
int
*
converged
=
(
int
*
)
context
.
getPinnedBuffer
();
ccmaConvergedHostMemory
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
150
;
i
++
)
{
ccmaForceKernel
.
setArg
<
cl_int
>
(
7
,
i
);
ccmaForceKernel
.
setArg
<
cl_int
>
(
8
,
i
);
context
.
executeKernel
(
ccmaForceKernel
,
ccmaAtoms
->
getSize
());
if
((
i
+
1
)
%
checkInterval
==
0
)
if
((
i
+
1
)
%
checkInterval
==
0
)
{
if
(
ccmaUseDirectBuffer
)
context
.
getQueue
().
enqueueMarker
(
&
event
);
else
context
.
getQueue
().
enqueueReadBuffer
(
ccmaConverged
->
getDeviceBuffer
(),
CL_FALSE
,
0
,
2
*
sizeof
(
cl_int
),
converged
,
NULL
,
&
event
);
}
ccmaMultiplyKernel
.
setArg
<
cl_int
>
(
5
,
i
);
context
.
executeKernel
(
ccmaMultiplyKernel
,
ccmaAtoms
->
getSize
());
ccmaUpdateKernel
.
setArg
<
cl_int
>
(
8
,
i
);
context
.
executeKernel
(
ccmaUpdateKernel
,
context
.
getNumAtoms
());
if
((
i
+
1
)
%
checkInterval
==
0
)
{
event
.
wait
();
if
(
ccmaUseDirectBuffer
)
{
if
(
ccmaConvergedHostMemory
[
0
])
break
;
}
else
{
if
(
converged
[
i
%
2
])
break
;
}
}
}
}
}
void
OpenCLIntegrationUtilities
::
computeVirtualSites
()
{
...
...
platforms/opencl/src/OpenCLIntegrationUtilities.h
View file @
c0a43bfc
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors.
*
* Portions copyright (c) 2009
-2013
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -141,6 +141,8 @@ private:
OpenCLArray
*
ccmaDelta1
;
OpenCLArray
*
ccmaDelta2
;
OpenCLArray
*
ccmaConverged
;
int
*
ccmaConvergedHostMemory
;
OpenCLArray
*
ccmaConvergedHostBuffer
;
OpenCLArray
*
vsite2AvgAtoms
;
OpenCLArray
*
vsite2AvgWeights
;
OpenCLArray
*
vsite3AvgAtoms
;
...
...
@@ -149,7 +151,7 @@ private:
OpenCLArray
*
vsiteOutOfPlaneWeights
;
int
randomPos
;
int
lastSeed
,
numVsites
;
bool
hasInitializedPosConstraintKernels
,
hasInitializedVelConstraintKernels
;
bool
hasInitializedPosConstraintKernels
,
hasInitializedVelConstraintKernels
,
ccmaUseDirectBuffer
;
struct
ShakeCluster
;
struct
ConstraintOrderer
;
};
...
...
platforms/opencl/src/kernels/ccma.cl
View file @
c0a43bfc
...
...
@@ -34,11 +34,13 @@ __kernel void computeConstraintDirections(__global const int2* restrict constrai
*
Compute
the
force
applied
by
each
constraint.
*/
__kernel
void
computeConstraintForce
(
__global
const
int2*
restrict
constraintAtoms,
__global
const
mixed4*
restrict
constraintDistance,
__global
const
mixed4*
restrict
atomPositions,
__global
const
mixed*
restrict
reducedMass,
__global
mixed*
restrict
delta1,
__global
int*
restrict
converged,
mixed
tol,
int
iteration
)
{
__global
const
mixed*
restrict
reducedMass,
__global
mixed*
restrict
delta1,
__global
int*
restrict
converged,
__global
int*
restrict
hostConvergedFlag,
mixed
tol,
int
iteration
)
{
__local
int
groupConverged
;
if
(
converged[1-iteration%2]
)
{
if
(
get_global_id
(
0
)
==
0
)
if
(
get_global_id
(
0
)
==
0
)
{
converged[iteration%2]
=
1
;
hostConvergedFlag[0]
=
1
;
}
return
; // The constraint iteration has already converged.
}
if
(
get_local_id
(
0
)
==
0
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment