Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
59ab6fc9
Commit
59ab6fc9
authored
Nov 14, 2014
by
peastman
Browse files
Merge pull request #724 from peastman/opt
Minor optimization to sorting
parents
a8df200d
4c0621ed
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
19 additions
and
7 deletions
+19
-7
platforms/cuda/src/CudaSort.cpp
platforms/cuda/src/CudaSort.cpp
+2
-3
platforms/cuda/src/kernels/sort.cu
platforms/cuda/src/kernels/sort.cu
+7
-1
platforms/opencl/src/OpenCLSort.cpp
platforms/opencl/src/OpenCLSort.cpp
+3
-2
platforms/opencl/src/kernels/sort.cl
platforms/opencl/src/kernels/sort.cl
+7
-1
No files found.
platforms/cuda/src/CudaSort.cpp
View file @
59ab6fc9
...
@@ -112,13 +112,12 @@ void CudaSort::sort(CudaArray& data) {
...
@@ -112,13 +112,12 @@ void CudaSort::sort(CudaArray& data) {
else
{
else
{
// Compute the range of data values.
// Compute the range of data values.
void
*
rangeArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
dataLength
,
&
dataRange
->
getDevicePointer
()};
unsigned
int
numBuckets
=
bucketOffset
->
getSize
();
void
*
rangeArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
dataLength
,
&
dataRange
->
getDevicePointer
(),
&
numBuckets
,
&
bucketOffset
->
getDevicePointer
()};
context
.
executeKernel
(
computeRangeKernel
,
rangeArgs
,
rangeKernelSize
,
rangeKernelSize
,
rangeKernelSize
*
trait
->
getKeySize
());
context
.
executeKernel
(
computeRangeKernel
,
rangeArgs
,
rangeKernelSize
,
rangeKernelSize
,
rangeKernelSize
*
trait
->
getKeySize
());
// Assign array elements to buckets.
// Assign array elements to buckets.
unsigned
int
numBuckets
=
bucketOffset
->
getSize
();
context
.
clearBuffer
(
*
bucketOffset
);
void
*
elementsArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
dataLength
,
&
numBuckets
,
&
dataRange
->
getDevicePointer
(),
void
*
elementsArgs
[]
=
{
&
data
.
getDevicePointer
(),
&
dataLength
,
&
numBuckets
,
&
dataRange
->
getDevicePointer
(),
&
bucketOffset
->
getDevicePointer
(),
&
bucketOfElement
->
getDevicePointer
(),
&
offsetInBucket
->
getDevicePointer
()};
&
bucketOffset
->
getDevicePointer
(),
&
bucketOfElement
->
getDevicePointer
(),
&
offsetInBucket
->
getDevicePointer
()};
context
.
executeKernel
(
assignElementsKernel
,
elementsArgs
,
data
.
getSize
());
context
.
executeKernel
(
assignElementsKernel
,
elementsArgs
,
data
.
getSize
());
...
...
platforms/cuda/src/kernels/sort.cu
View file @
59ab6fc9
...
@@ -50,7 +50,8 @@ __global__ void sortShortList(DATA_TYPE* __restrict__ data, unsigned int length)
...
@@ -50,7 +50,8 @@ __global__ void sortShortList(DATA_TYPE* __restrict__ data, unsigned int length)
* Calculate the minimum and maximum value in the array to be sorted. This kernel
* Calculate the minimum and maximum value in the array to be sorted. This kernel
* is executed as a single work group.
* is executed as a single work group.
*/
*/
__global__
void
computeRange
(
const
DATA_TYPE
*
__restrict__
data
,
unsigned
int
length
,
KEY_TYPE
*
__restrict__
range
)
{
__global__
void
computeRange
(
const
DATA_TYPE
*
__restrict__
data
,
unsigned
int
length
,
KEY_TYPE
*
__restrict__
range
,
unsigned
int
numBuckets
,
unsigned
int
*
__restrict__
bucketOffset
)
{
extern
__shared__
KEY_TYPE
rangeBuffer
[];
extern
__shared__
KEY_TYPE
rangeBuffer
[];
KEY_TYPE
minimum
=
MAX_KEY
;
KEY_TYPE
minimum
=
MAX_KEY
;
KEY_TYPE
maximum
=
MIN_KEY
;
KEY_TYPE
maximum
=
MIN_KEY
;
...
@@ -86,6 +87,11 @@ __global__ void computeRange(const DATA_TYPE* __restrict__ data, unsigned int le
...
@@ -86,6 +87,11 @@ __global__ void computeRange(const DATA_TYPE* __restrict__ data, unsigned int le
range
[
0
]
=
minimum
;
range
[
0
]
=
minimum
;
range
[
1
]
=
maximum
;
range
[
1
]
=
maximum
;
}
}
// Clear the bucket counters in preparation for the next kernel.
for
(
unsigned
int
index
=
threadIdx
.
x
;
index
<
numBuckets
;
index
+=
blockDim
.
x
)
bucketOffset
[
index
]
=
0
;
}
}
/**
/**
...
...
platforms/opencl/src/OpenCLSort.cpp
View file @
59ab6fc9
...
@@ -116,16 +116,17 @@ void OpenCLSort::sort(OpenCLArray& data) {
...
@@ -116,16 +116,17 @@ void OpenCLSort::sort(OpenCLArray& data) {
else
{
else
{
// Compute the range of data values.
// Compute the range of data values.
unsigned
int
numBuckets
=
bucketOffset
->
getSize
();
computeRangeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
computeRangeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
computeRangeKernel
.
setArg
<
cl_uint
>
(
1
,
data
.
getSize
());
computeRangeKernel
.
setArg
<
cl_uint
>
(
1
,
data
.
getSize
());
computeRangeKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
dataRange
->
getDeviceBuffer
());
computeRangeKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
dataRange
->
getDeviceBuffer
());
computeRangeKernel
.
setArg
(
3
,
rangeKernelSize
*
trait
->
getKeySize
(),
NULL
);
computeRangeKernel
.
setArg
(
3
,
rangeKernelSize
*
trait
->
getKeySize
(),
NULL
);
computeRangeKernel
.
setArg
<
cl_int
>
(
4
,
numBuckets
);
computeRangeKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
bucketOffset
->
getDeviceBuffer
());
context
.
executeKernel
(
computeRangeKernel
,
rangeKernelSize
,
rangeKernelSize
);
context
.
executeKernel
(
computeRangeKernel
,
rangeKernelSize
,
rangeKernelSize
);
// Assign array elements to buckets.
// Assign array elements to buckets.
unsigned
int
numBuckets
=
bucketOffset
->
getSize
();
context
.
clearBuffer
(
*
bucketOffset
);
assignElementsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
assignElementsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
assignElementsKernel
.
setArg
<
cl_int
>
(
1
,
data
.
getSize
());
assignElementsKernel
.
setArg
<
cl_int
>
(
1
,
data
.
getSize
());
assignElementsKernel
.
setArg
<
cl_int
>
(
2
,
numBuckets
);
assignElementsKernel
.
setArg
<
cl_int
>
(
2
,
numBuckets
);
...
...
platforms/opencl/src/kernels/sort.cl
View file @
59ab6fc9
...
@@ -49,7 +49,8 @@ __kernel void sortShortList(__global DATA_TYPE* __restrict__ data, uint length,
...
@@ -49,7 +49,8 @@ __kernel void sortShortList(__global DATA_TYPE* __restrict__ data, uint length,
*
Calculate
the
minimum
and
maximum
value
in
the
array
to
be
sorted.
This
kernel
*
Calculate
the
minimum
and
maximum
value
in
the
array
to
be
sorted.
This
kernel
*
is
executed
as
a
single
work
group.
*
is
executed
as
a
single
work
group.
*/
*/
__kernel
void
computeRange
(
__global
const
DATA_TYPE*
restrict
data,
uint
length,
__global
KEY_TYPE*
restrict
range,
__local
KEY_TYPE*
restrict
buffer
)
{
__kernel
void
computeRange
(
__global
const
DATA_TYPE*
restrict
data,
uint
length,
__global
KEY_TYPE*
restrict
range,
__local
KEY_TYPE*
restrict
buffer,
uint
numBuckets,
__global
uint*
restrict
bucketOffset
)
{
KEY_TYPE
minimum
=
MAX_KEY
;
KEY_TYPE
minimum
=
MAX_KEY
;
KEY_TYPE
maximum
=
MIN_KEY
;
KEY_TYPE
maximum
=
MIN_KEY
;
...
@@ -84,6 +85,11 @@ __kernel void computeRange(__global const DATA_TYPE* restrict data, uint length,
...
@@ -84,6 +85,11 @@ __kernel void computeRange(__global const DATA_TYPE* restrict data, uint length,
range[0]
=
minimum
;
range[0]
=
minimum
;
range[1]
=
maximum
;
range[1]
=
maximum
;
}
}
//
Clear
the
bucket
counters
in
preparation
for
the
next
kernel.
for
(
uint
index
=
get_local_id
(
0
)
; index < numBuckets; index += get_local_size(0))
bucketOffset[index]
=
0
;
}
}
/**
/**
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment