Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
e7e6eebc
Commit
e7e6eebc
authored
Apr 29, 2013
by
Yutong Zhao
Browse files
fixes fft thread divergence, makes hd 6xxx 5xxx cards work
parent
d96606e2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
9 additions
and
3 deletions
+9
-3
platforms/opencl/src/OpenCLFFT3D.cpp
platforms/opencl/src/OpenCLFFT3D.cpp
+3
-1
platforms/opencl/src/kernels/fft.cl
platforms/opencl/src/kernels/fft.cl
+6
-2
No files found.
platforms/opencl/src/OpenCLFFT3D.cpp
View file @
e7e6eebc
...
@@ -227,8 +227,10 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threa
...
@@ -227,8 +227,10 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threa
source
<<
"for (int z = get_local_id(0); z < ZSIZE; z += get_local_size(0))
\n
"
;
source
<<
"for (int z = get_local_id(0); z < ZSIZE; z += get_local_size(0))
\n
"
;
source
<<
"out[y*(ZSIZE*XSIZE)+z*XSIZE+x] = data"
<<
(
stage
%
2
)
<<
"[z];
\n
"
;
source
<<
"out[y*(ZSIZE*XSIZE)+z*XSIZE+x] = data"
<<
(
stage
%
2
)
<<
"[z];
\n
"
;
}
}
else
else
{
source
<<
"if (index < XSIZE*ZSIZE)
\n
"
;
source
<<
"out[y*(ZSIZE*XSIZE)+(get_local_id(0)%ZSIZE)*XSIZE+x] = data"
<<
(
stage
%
2
)
<<
"[get_local_id(0)];
\n
"
;
source
<<
"out[y*(ZSIZE*XSIZE)+(get_local_id(0)%ZSIZE)*XSIZE+x] = data"
<<
(
stage
%
2
)
<<
"[get_local_id(0)];
\n
"
;
}
source
<<
"barrier(CLK_GLOBAL_MEM_FENCE);"
;
source
<<
"barrier(CLK_GLOBAL_MEM_FENCE);"
;
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"XSIZE"
]
=
context
.
intToString
(
xsize
);
replacements
[
"XSIZE"
]
=
context
.
intToString
(
xsize
);
...
...
platforms/opencl/src/kernels/fft.cl
View file @
e7e6eebc
...
@@ -11,16 +11,20 @@ __kernel void execFFT(__global const real2* restrict in, __global real2* restric
...
@@ -11,16 +11,20 @@ __kernel void execFFT(__global const real2* restrict in, __global real2* restric
for
(
int
i
=
get_local_id
(
0
)
; i < ZSIZE; i += get_local_size(0))
for
(
int
i
=
get_local_id
(
0
)
; i < ZSIZE; i += get_local_size(0))
w[i]
=
(
real2
)
(
cos
(
-sign*i*2*M_PI/ZSIZE
)
,
sin
(
-sign*i*2*M_PI/ZSIZE
))
;
w[i]
=
(
real2
)
(
cos
(
-sign*i*2*M_PI/ZSIZE
)
,
sin
(
-sign*i*2*M_PI/ZSIZE
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
index
=
get_group_id
(
0
)
*BLOCKS_PER_GROUP+get_local_id
(
0
)
/ZSIZE
; index < XSIZE*YSIZE; index += get_num_groups(0)*BLOCKS_PER_GROUP) {
for
(
int
baseIndex
=
get_group_id
(
0
)
*BLOCKS_PER_GROUP
; baseIndex < XSIZE*YSIZE; baseIndex += get_num_groups(0)*BLOCKS_PER_GROUP) {
int
index
=
baseIndex+get_local_id
(
0
)
/ZSIZE
;
int
x
=
index/YSIZE
;
int
x
=
index/YSIZE
;
int
y
=
index-x*YSIZE
;
int
y
=
index-x*YSIZE
;
#
if
LOOP_REQUIRED
#
if
LOOP_REQUIRED
for
(
int
z
=
get_local_id
(
0
)
; z < ZSIZE; z += get_local_size(0))
for
(
int
z
=
get_local_id
(
0
)
; z < ZSIZE; z += get_local_size(0))
data0[z]
=
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+z]
;
data0[z]
=
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+z]
;
#
else
#
else
data0[get_local_id
(
0
)
]
=
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+get_local_id
(
0
)
%ZSIZE]
;
if
(
index
<
XSIZE*ZSIZE
)
data0[get_local_id
(
0
)
]
=
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+get_local_id
(
0
)
%ZSIZE]
;
#
endif
#
endif
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
COMPUTE_FFT
COMPUTE_FFT
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment