Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
1330af5c
Commit
1330af5c
authored
Sep 09, 2013
by
peastman
Browse files
Minor code cleanup
parent
cfd815ec
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
52 additions
and
87 deletions
+52
-87
platforms/opencl/src/OpenCLFFT3D.cpp
platforms/opencl/src/OpenCLFFT3D.cpp
+3
-3
platforms/opencl/src/kernels/fft.cl
platforms/opencl/src/kernels/fft.cl
+3
-4
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
+23
-40
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
+23
-40
No files found.
platforms/opencl/src/OpenCLFFT3D.cpp
View file @
1330af5c
...
@@ -227,10 +227,10 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threa
...
@@ -227,10 +227,10 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threa
source
<<
"for (int z = get_local_id(0); z < ZSIZE; z += get_local_size(0))
\n
"
;
source
<<
"for (int z = get_local_id(0); z < ZSIZE; z += get_local_size(0))
\n
"
;
source
<<
"out[y*(ZSIZE*XSIZE)+z*XSIZE+x] = data"
<<
(
stage
%
2
)
<<
"[z];
\n
"
;
source
<<
"out[y*(ZSIZE*XSIZE)+z*XSIZE+x] = data"
<<
(
stage
%
2
)
<<
"[z];
\n
"
;
}
}
else
{
else
{
source
<<
"if (index < XSIZE*YSIZE)
\n
"
;
source
<<
"if (index < XSIZE*YSIZE)
\n
"
;
source
<<
"out[y*(ZSIZE*XSIZE)+(get_local_id(0)%ZSIZE)*XSIZE+x] = data"
<<
(
stage
%
2
)
<<
"[get_local_id(0)];
\n
"
;
source
<<
"out[y*(ZSIZE*XSIZE)+(get_local_id(0)%ZSIZE)*XSIZE+x] = data"
<<
(
stage
%
2
)
<<
"[get_local_id(0)];
\n
"
;
}
}
source
<<
"barrier(CLK_GLOBAL_MEM_FENCE);"
;
source
<<
"barrier(CLK_GLOBAL_MEM_FENCE);"
;
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"XSIZE"
]
=
context
.
intToString
(
xsize
);
replacements
[
"XSIZE"
]
=
context
.
intToString
(
xsize
);
...
...
platforms/opencl/src/kernels/fft.cl
View file @
1330af5c
...
@@ -12,19 +12,18 @@ __kernel void execFFT(__global const real2* restrict in, __global real2* restric
...
@@ -12,19 +12,18 @@ __kernel void execFFT(__global const real2* restrict in, __global real2* restric
w[i]
=
(
real2
)
(
cos
(
-sign*i*2*M_PI/ZSIZE
)
,
sin
(
-sign*i*2*M_PI/ZSIZE
))
;
w[i]
=
(
real2
)
(
cos
(
-sign*i*2*M_PI/ZSIZE
)
,
sin
(
-sign*i*2*M_PI/ZSIZE
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
baseIndex
=
get_group_id
(
0
)
*BLOCKS_PER_GROUP
; baseIndex < XSIZE*YSIZE; baseIndex += get_num_groups(0)*BLOCKS_PER_GROUP) {
for
(
int
baseIndex
=
get_group_id
(
0
)
*BLOCKS_PER_GROUP
; baseIndex < XSIZE*YSIZE; baseIndex += get_num_groups(0)*BLOCKS_PER_GROUP) {
int
index
=
baseIndex+get_local_id
(
0
)
/ZSIZE
;
int
index
=
baseIndex+get_local_id
(
0
)
/ZSIZE
;
int
x
=
index/YSIZE
;
int
x
=
index/YSIZE
;
int
y
=
index-x*YSIZE
;
int
y
=
index-x*YSIZE
;
#
if
LOOP_REQUIRED
#
if
LOOP_REQUIRED
for
(
int
z
=
get_local_id
(
0
)
; z < ZSIZE; z += get_local_size(0))
for
(
int
z
=
get_local_id
(
0
)
; z < ZSIZE; z += get_local_size(0))
data0[z]
=
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+z]
;
data0[z]
=
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+z]
;
#
else
#
else
if
(
index
<
XSIZE*YSIZE
)
if
(
index
<
XSIZE*YSIZE
)
data0[get_local_id
(
0
)
]
=
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+get_local_id
(
0
)
%ZSIZE]
;
data0[get_local_id
(
0
)
]
=
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+get_local_id
(
0
)
%ZSIZE]
;
#
endif
#
endif
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
COMPUTE_FFT
COMPUTE_FFT
}
}
}
}
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
View file @
1330af5c
...
@@ -382,7 +382,6 @@ void CudaIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context)
...
@@ -382,7 +382,6 @@ void CudaIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context)
string
CudaIntegrateRPMDStepKernel
::
createFFT
(
int
size
,
const
string
&
variable
,
bool
forward
)
{
string
CudaIntegrateRPMDStepKernel
::
createFFT
(
int
size
,
const
string
&
variable
,
bool
forward
)
{
stringstream
source
;
stringstream
source
;
int
unfactored
=
size
;
int
stage
=
0
;
int
stage
=
0
;
int
L
=
size
;
int
L
=
size
;
int
m
=
1
;
int
m
=
1
;
...
@@ -398,16 +397,27 @@ string CudaIntegrateRPMDStepKernel::createFFT(int size, const string& variable,
...
@@ -398,16 +397,27 @@ string CudaIntegrateRPMDStepKernel::createFFT(int size, const string& variable,
// Factor size, generating an appropriate block of code for each factor.
// Factor size, generating an appropriate block of code for each factor.
while
(
unfactored
>
1
)
{
while
(
L
>
1
)
{
int
input
=
stage
%
2
;
int
input
=
stage
%
2
;
int
output
=
1
-
input
;
int
output
=
1
-
input
;
int
radix
;
if
(
L
%
5
==
0
)
radix
=
5
;
else
if
(
L
%
4
==
0
)
radix
=
4
;
else
if
(
L
%
3
==
0
)
radix
=
3
;
else
if
(
L
%
2
==
0
)
radix
=
2
;
else
throw
OpenMMException
(
"Illegal size for FFT: "
+
cu
.
intToString
(
size
));
source
<<
"{
\n
"
;
source
<<
"{
\n
"
;
if
(
unfactored
%
5
==
0
)
{
L
=
L
/
radix
;
L
=
L
/
5
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix "
<<
radix
<<
")
\n
"
;
source
<<
"
// Pass "
<<
(
stage
+
1
)
<<
" (radix 5)
\n
"
;
source
<<
"
if (indexInBlock < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"i
f (
indexInBlock
< "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"i
nt i =
indexInBlock
;
\n
"
;
source
<<
"int
i
= i
ndexInBlock
;
\n
"
;
source
<<
"int
j
= i
/"
<<
m
<<
"
;
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
if
(
radix
==
5
)
{
source
<<
"mixed3 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"mixed3 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
...
@@ -451,16 +461,8 @@ string CudaIntegrateRPMDStepKernel::createFFT(int size, const string& variable,
...
@@ -451,16 +461,8 @@ string CudaIntegrateRPMDStepKernel::createFFT(int size, const string& variable,
source
<<
"imag"
<<
output
<<
"[i+(4*j+3)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8r-d10r, d8i-d10i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(4*j+3)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8r-d10r, d8i-d10i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(4*j+4)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
4
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d7r-d9r, d7i-d9i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(4*j+4)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
4
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d7r-d9r, d7i-d9i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(4*j+4)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
4
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d7r-d9r, d7i-d9i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(4*j+4)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
4
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d7r-d9r, d7i-d9i);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
5
;
unfactored
/=
5
;
}
}
else
if
(
unfactored
%
4
==
0
)
{
else
if
(
radix
==
4
)
{
L
=
L
/
4
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 4)
\n
"
;
source
<<
"if (indexInBlock < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int i = indexInBlock;
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"mixed3 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"mixed3 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
...
@@ -485,16 +487,8 @@ string CudaIntegrateRPMDStepKernel::createFFT(int size, const string& variable,
...
@@ -485,16 +487,8 @@ string CudaIntegrateRPMDStepKernel::createFFT(int size, const string& variable,
source
<<
"imag"
<<
output
<<
"[i+(3*j+2)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d0r-d2r, d0i-d2i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(3*j+2)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d0r-d2r, d0i-d2i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(3*j+3)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d1r-d3r, d1i-d3i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(3*j+3)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d1r-d3r, d1i-d3i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(3*j+3)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d1r-d3r, d1i-d3i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(3*j+3)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d1r-d3r, d1i-d3i);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
4
;
unfactored
/=
4
;
}
}
else
if
(
unfactored
%
3
==
0
)
{
else
if
(
radix
==
3
)
{
L
=
L
/
3
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 3)
\n
"
;
source
<<
"if (indexInBlock < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int i = indexInBlock;
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"mixed3 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"mixed3 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
...
@@ -513,16 +507,8 @@ string CudaIntegrateRPMDStepKernel::createFFT(int size, const string& variable,
...
@@ -513,16 +507,8 @@ string CudaIntegrateRPMDStepKernel::createFFT(int size, const string& variable,
source
<<
"imag"
<<
output
<<
"[i+(2*j+1)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
size
<<
"/"
<<
(
3
*
L
)
<<
"], d1r+d2r, d1i+d2i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(2*j+1)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
size
<<
"/"
<<
(
3
*
L
)
<<
"], d1r+d2r, d1i+d2i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1r-d2r, d1i-d2i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1r-d2r, d1i-d2i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1r-d2r, d1i-d2i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1r-d2r, d1i-d2i);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
3
;
unfactored
/=
3
;
}
}
else
if
(
unfactored
%
2
==
0
)
{
else
if
(
radix
==
2
)
{
L
=
L
/
2
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 2)
\n
"
;
source
<<
"if (indexInBlock < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int i = indexInBlock;
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"mixed3 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed3 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"mixed3 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
...
@@ -531,12 +517,9 @@ string CudaIntegrateRPMDStepKernel::createFFT(int size, const string& variable,
...
@@ -531,12 +517,9 @@ string CudaIntegrateRPMDStepKernel::createFFT(int size, const string& variable,
source
<<
"imag"
<<
output
<<
"[i+j*"
<<
m
<<
"] = c0i+c1i;
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+j*"
<<
m
<<
"] = c0i+c1i;
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
size
<<
"/"
<<
(
2
*
L
)
<<
"], c0r-c1r, c0i-c1i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
size
<<
"/"
<<
(
2
*
L
)
<<
"], c0r-c1r, c0i-c1i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
size
<<
"/"
<<
(
2
*
L
)
<<
"], c0r-c1r, c0i-c1i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
size
<<
"/"
<<
(
2
*
L
)
<<
"], c0r-c1r, c0i-c1i);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
2
;
unfactored
/=
2
;
}
}
else
source
<<
"}
\n
"
;
throw
OpenMMException
(
"Illegal size for FFT: "
+
cu
.
intToString
(
size
))
;
m
=
m
*
radix
;
source
<<
"__syncthreads();
\n
"
;
source
<<
"__syncthreads();
\n
"
;
source
<<
"}
\n
"
;
source
<<
"}
\n
"
;
++
stage
;
++
stage
;
...
...
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
View file @
1330af5c
...
@@ -387,7 +387,6 @@ void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context
...
@@ -387,7 +387,6 @@ void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context
string
OpenCLIntegrateRPMDStepKernel
::
createFFT
(
int
size
,
const
string
&
variable
,
bool
forward
)
{
string
OpenCLIntegrateRPMDStepKernel
::
createFFT
(
int
size
,
const
string
&
variable
,
bool
forward
)
{
stringstream
source
;
stringstream
source
;
int
unfactored
=
size
;
int
stage
=
0
;
int
stage
=
0
;
int
L
=
size
;
int
L
=
size
;
int
m
=
1
;
int
m
=
1
;
...
@@ -403,16 +402,27 @@ string OpenCLIntegrateRPMDStepKernel::createFFT(int size, const string& variable
...
@@ -403,16 +402,27 @@ string OpenCLIntegrateRPMDStepKernel::createFFT(int size, const string& variable
// Factor size, generating an appropriate block of code for each factor.
// Factor size, generating an appropriate block of code for each factor.
while
(
unfactored
>
1
)
{
while
(
L
>
1
)
{
int
input
=
stage
%
2
;
int
input
=
stage
%
2
;
int
output
=
1
-
input
;
int
output
=
1
-
input
;
int
radix
;
if
(
L
%
5
==
0
)
radix
=
5
;
else
if
(
L
%
4
==
0
)
radix
=
4
;
else
if
(
L
%
3
==
0
)
radix
=
3
;
else
if
(
L
%
2
==
0
)
radix
=
2
;
else
throw
OpenMMException
(
"Illegal size for FFT: "
+
cl
.
intToString
(
size
));
source
<<
"{
\n
"
;
source
<<
"{
\n
"
;
if
(
unfactored
%
5
==
0
)
{
L
=
L
/
radix
;
L
=
L
/
5
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix "
<<
radix
<<
")
\n
"
;
source
<<
"
// Pass "
<<
(
stage
+
1
)
<<
" (radix 5)
\n
"
;
source
<<
"
if (indexInBlock < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"i
f (
indexInBlock
< "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"i
nt i =
indexInBlock
;
\n
"
;
source
<<
"int
i
= i
ndexInBlock
;
\n
"
;
source
<<
"int
j
= i
/"
<<
m
<<
"
;
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
if
(
radix
==
5
)
{
source
<<
"mixed4 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"mixed4 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
...
@@ -456,16 +466,8 @@ string OpenCLIntegrateRPMDStepKernel::createFFT(int size, const string& variable
...
@@ -456,16 +466,8 @@ string OpenCLIntegrateRPMDStepKernel::createFFT(int size, const string& variable
source
<<
"imag"
<<
output
<<
"[i+(4*j+3)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8r-d10r, d8i-d10i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(4*j+3)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8r-d10r, d8i-d10i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(4*j+4)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
4
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d7r-d9r, d7i-d9i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(4*j+4)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
4
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d7r-d9r, d7i-d9i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(4*j+4)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
4
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d7r-d9r, d7i-d9i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(4*j+4)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
4
*
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d7r-d9r, d7i-d9i);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
5
;
unfactored
/=
5
;
}
}
else
if
(
unfactored
%
4
==
0
)
{
else
if
(
radix
==
4
)
{
L
=
L
/
4
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 4)
\n
"
;
source
<<
"if (indexInBlock < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int i = indexInBlock;
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"mixed4 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"mixed4 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
...
@@ -490,16 +492,8 @@ string OpenCLIntegrateRPMDStepKernel::createFFT(int size, const string& variable
...
@@ -490,16 +492,8 @@ string OpenCLIntegrateRPMDStepKernel::createFFT(int size, const string& variable
source
<<
"imag"
<<
output
<<
"[i+(3*j+2)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d0r-d2r, d0i-d2i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(3*j+2)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d0r-d2r, d0i-d2i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(3*j+3)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d1r-d3r, d1i-d3i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(3*j+3)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d1r-d3r, d1i-d3i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(3*j+3)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d1r-d3r, d1i-d3i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(3*j+3)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
3
*
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d1r-d3r, d1i-d3i);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
4
;
unfactored
/=
4
;
}
}
else
if
(
unfactored
%
3
==
0
)
{
else
if
(
radix
==
3
)
{
L
=
L
/
3
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 3)
\n
"
;
source
<<
"if (indexInBlock < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int i = indexInBlock;
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"mixed4 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"mixed4 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
...
@@ -518,16 +512,8 @@ string OpenCLIntegrateRPMDStepKernel::createFFT(int size, const string& variable
...
@@ -518,16 +512,8 @@ string OpenCLIntegrateRPMDStepKernel::createFFT(int size, const string& variable
source
<<
"imag"
<<
output
<<
"[i+(2*j+1)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
size
<<
"/"
<<
(
3
*
L
)
<<
"], d1r+d2r, d1i+d2i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(2*j+1)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
size
<<
"/"
<<
(
3
*
L
)
<<
"], d1r+d2r, d1i+d2i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1r-d2r, d1i-d2i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1r-d2r, d1i-d2i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1r-d2r, d1i-d2i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
(
2
*
size
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1r-d2r, d1i-d2i);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
3
;
unfactored
/=
3
;
}
}
else
if
(
unfactored
%
2
==
0
)
{
else
if
(
radix
==
2
)
{
L
=
L
/
2
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 2)
\n
"
;
source
<<
"if (indexInBlock < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int i = indexInBlock;
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"mixed4 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c0r = real"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c0i = imag"
<<
input
<<
"[i];
\n
"
;
source
<<
"mixed4 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"mixed4 c1r = real"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
...
@@ -536,12 +522,9 @@ string OpenCLIntegrateRPMDStepKernel::createFFT(int size, const string& variable
...
@@ -536,12 +522,9 @@ string OpenCLIntegrateRPMDStepKernel::createFFT(int size, const string& variable
source
<<
"imag"
<<
output
<<
"[i+j*"
<<
m
<<
"] = c0i+c1i;
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+j*"
<<
m
<<
"] = c0i+c1i;
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
size
<<
"/"
<<
(
2
*
L
)
<<
"], c0r-c1r, c0i-c1i);
\n
"
;
source
<<
"real"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = "
<<
multReal
<<
"(w[j*"
<<
size
<<
"/"
<<
(
2
*
L
)
<<
"], c0r-c1r, c0i-c1i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
size
<<
"/"
<<
(
2
*
L
)
<<
"], c0r-c1r, c0i-c1i);
\n
"
;
source
<<
"imag"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = "
<<
multImag
<<
"(w[j*"
<<
size
<<
"/"
<<
(
2
*
L
)
<<
"], c0r-c1r, c0i-c1i);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
2
;
unfactored
/=
2
;
}
}
else
source
<<
"}
\n
"
;
throw
OpenMMException
(
"Illegal size for FFT: "
+
cl
.
intToString
(
size
))
;
m
=
m
*
radix
;
source
<<
"barrier(CLK_LOCAL_MEM_FENCE);
\n
"
;
source
<<
"barrier(CLK_LOCAL_MEM_FENCE);
\n
"
;
source
<<
"}
\n
"
;
source
<<
"}
\n
"
;
++
stage
;
++
stage
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment