Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c08d8a53
"csrc/vscode:/vscode.git/clone" did not exist on "c8dceba110cb574065dbd8c0e97b8a2624a9d280"
Commit
c08d8a53
authored
Sep 07, 2011
by
Peter Eastman
Browse files
Optimizations to FFT
parent
e85c741b
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
203 additions
and
63 deletions
+203
-63
platforms/opencl/src/OpenCLFFT3D.cpp
platforms/opencl/src/OpenCLFFT3D.cpp
+69
-47
platforms/opencl/src/OpenCLFFT3D.h
platforms/opencl/src/OpenCLFFT3D.h
+7
-4
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+6
-3
platforms/opencl/src/OpenCLKernels.h
platforms/opencl/src/OpenCLKernels.h
+2
-1
platforms/opencl/src/kernels/fft.cl
platforms/opencl/src/kernels/fft.cl
+12
-8
platforms/opencl/tests/TestOpenCLFFT.cpp
platforms/opencl/tests/TestOpenCLFFT.cpp
+107
-0
No files found.
platforms/opencl/src/OpenCLFFT3D.cpp
View file @
c08d8a53
...
...
@@ -36,24 +36,27 @@ using namespace OpenMM;
using
namespace
std
;
OpenCLFFT3D
::
OpenCLFFT3D
(
OpenCLContext
&
context
,
int
xsize
,
int
ysize
,
int
zsize
)
:
context
(
context
),
xsize
(
xsize
),
ysize
(
ysize
),
zsize
(
zsize
)
{
x
kernel
=
createKernel
(
xsize
,
ysize
,
zsize
,
ysize
*
zsize
,
zsize
,
1
);
y
kernel
=
createKernel
(
ysize
,
zsize
,
xsize
,
zsize
,
1
,
ysize
*
zsize
);
z
kernel
=
createKernel
(
zsize
,
xsize
,
ysize
,
1
,
ysize
*
zsize
,
zsize
);
z
kernel
=
createKernel
(
xsize
,
ysize
,
zsize
);
x
kernel
=
createKernel
(
ysize
,
zsize
,
xsize
);
y
kernel
=
createKernel
(
zsize
,
xsize
,
ysize
);
}
void
OpenCLFFT3D
::
execFFT
(
OpenCLArray
<
mm_float2
>&
data
,
bool
forward
)
{
void
OpenCLFFT3D
::
execFFT
(
OpenCLArray
<
mm_float2
>&
in
,
OpenCLArray
<
mm_float2
>&
out
,
bool
forward
)
{
int
maxSize
=
xkernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
());
if
(
context
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
)
maxSize
=
1
;
xkernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
xkernel
.
setArg
<
cl_float
>
(
1
,
forward
?
1.0
f
:
-
1.0
f
);
zkernel
.
setArg
<
cl
::
Buffer
>
(
0
,
in
.
getDeviceBuffer
());
zkernel
.
setArg
<
cl
::
Buffer
>
(
1
,
out
.
getDeviceBuffer
());
zkernel
.
setArg
<
cl_float
>
(
2
,
forward
?
1.0
f
:
-
1.0
f
);
context
.
executeKernel
(
zkernel
,
xsize
*
ysize
*
zsize
,
min
(
zsize
,
(
int
)
maxSize
));
xkernel
.
setArg
<
cl
::
Buffer
>
(
0
,
out
.
getDeviceBuffer
());
xkernel
.
setArg
<
cl
::
Buffer
>
(
1
,
in
.
getDeviceBuffer
());
xkernel
.
setArg
<
cl_float
>
(
2
,
forward
?
1.0
f
:
-
1.0
f
);
context
.
executeKernel
(
xkernel
,
xsize
*
ysize
*
zsize
,
min
(
xsize
,
(
int
)
maxSize
));
ykernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
ykernel
.
setArg
<
cl_float
>
(
1
,
forward
?
1.0
f
:
-
1.0
f
);
ykernel
.
setArg
<
cl
::
Buffer
>
(
0
,
in
.
getDeviceBuffer
());
ykernel
.
setArg
<
cl
::
Buffer
>
(
1
,
out
.
getDeviceBuffer
());
ykernel
.
setArg
<
cl_float
>
(
2
,
forward
?
1.0
f
:
-
1.0
f
);
context
.
executeKernel
(
ykernel
,
xsize
*
ysize
*
zsize
,
min
(
ysize
,
(
int
)
maxSize
));
zkernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
zkernel
.
setArg
<
cl_float
>
(
1
,
forward
?
1.0
f
:
-
1.0
f
);
context
.
executeKernel
(
zkernel
,
xsize
*
ysize
*
zsize
,
min
(
zsize
,
(
int
)
maxSize
));
}
int
OpenCLFFT3D
::
findLegalDimension
(
int
minimum
)
{
...
...
@@ -73,23 +76,28 @@ int OpenCLFFT3D::findLegalDimension(int minimum) {
}
}
cl
::
Kernel
OpenCLFFT3D
::
createKernel
(
int
xsize
,
int
ysize
,
int
zsize
,
int
xmult
,
int
ymult
,
int
zmult
)
{
cl
::
Kernel
OpenCLFFT3D
::
createKernel
(
int
xsize
,
int
ysize
,
int
zsize
)
{
bool
loopRequired
=
(
context
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
stringstream
source
;
int
unfactored
=
xsize
;
int
stage
=
0
;
int
L
=
x
size
;
int
L
=
z
size
;
int
m
=
1
;
// Factor
x
size, generating an appropriate block of code for each factor.
// Factor
z
size, generating an appropriate block of code for each factor.
while
(
unfactored
>
1
)
{
while
(
L
>
1
)
{
int
input
=
stage
%
2
;
int
output
=
1
-
input
;
source
<<
"{
\n
"
;
if
(
unfactored
%
5
==
0
)
{
if
(
L
%
5
==
0
)
{
L
=
L
/
5
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 5)
\n
"
;
source
<<
"for (int i = get_local_id(0); i < "
<<
(
L
*
m
)
<<
"; i += get_local_size(0)) {
\n
"
;
if
(
loopRequired
)
source
<<
"for (int i = get_local_id(0); i < "
<<
(
L
*
m
)
<<
"; i += get_local_size(0)) {
\n
"
;
else
{
source
<<
"if (get_local_id(0) < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int i = get_local_id(0);
\n
"
;
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"float2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"float2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
...
...
@@ -109,18 +117,22 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int xmult,
source
<<
"float2 d9 = sign*(float2) (d2.y+"
<<
coeff
<<
"*d3.y, -d2.x-"
<<
coeff
<<
"*d3.x);
\n
"
;
source
<<
"float2 d10 = sign*(float2) ("
<<
coeff
<<
"*d2.y-d3.y, d3.x-"
<<
coeff
<<
"*d2.x);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+4*j*"
<<
m
<<
"] = c0+d4;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
x
size
<<
"/"
<<
(
5
*
L
)
<<
"], d7+d9);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
x
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8+d10);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+3)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
3
*
x
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8-d10);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+4)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
4
*
x
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d7-d9);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
z
size
<<
"/"
<<
(
5
*
L
)
<<
"], d7+d9);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
z
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8+d10);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+3)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
3
*
z
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8-d10);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+4)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
4
*
z
size
)
<<
"/"
<<
(
5
*
L
)
<<
"], d7-d9);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
5
;
unfactored
/=
5
;
}
else
if
(
unfactored
%
4
==
0
)
{
else
if
(
L
%
4
==
0
)
{
L
=
L
/
4
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 4)
\n
"
;
source
<<
"for (int i = get_local_id(0); i < "
<<
(
L
*
m
)
<<
"; i += get_local_size(0)) {
\n
"
;
if
(
loopRequired
)
source
<<
"for (int i = get_local_id(0); i < "
<<
(
L
*
m
)
<<
"; i += get_local_size(0)) {
\n
"
;
else
{
source
<<
"if (get_local_id(0) < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int i = get_local_id(0);
\n
"
;
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"float2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"float2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
...
...
@@ -131,17 +143,21 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int xmult,
source
<<
"float2 d2 = c1+c3;
\n
"
;
source
<<
"float2 d3 = sign*(float2) (c1.y-c3.y, c3.x-c1.x);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+3*j*"
<<
m
<<
"] = d0+d2;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
x
size
<<
"/"
<<
(
4
*
L
)
<<
"], d1+d3);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
x
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d0-d2);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+3)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
3
*
x
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d1-d3);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
z
size
<<
"/"
<<
(
4
*
L
)
<<
"], d1+d3);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
z
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d0-d2);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+3)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
3
*
z
size
)
<<
"/"
<<
(
4
*
L
)
<<
"], d1-d3);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
4
;
unfactored
/=
4
;
}
else
if
(
unfactored
%
3
==
0
)
{
else
if
(
L
%
3
==
0
)
{
L
=
L
/
3
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 3)
\n
"
;
source
<<
"for (int i = get_local_id(0); i < "
<<
(
L
*
m
)
<<
"; i += get_local_size(0)) {
\n
"
;
if
(
loopRequired
)
source
<<
"for (int i = get_local_id(0); i < "
<<
(
L
*
m
)
<<
"; i += get_local_size(0)) {
\n
"
;
else
{
source
<<
"if (get_local_id(0) < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int i = get_local_id(0);
\n
"
;
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"float2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"float2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
...
...
@@ -150,27 +166,30 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int xmult,
source
<<
"float2 d1 = c0-0.5f*d0;
\n
"
;
source
<<
"float2 d2 = sign*"
<<
OpenCLExpressionUtilities
::
doubleToString
(
sin
(
M_PI
/
3.0
))
<<
"*(float2) (c1.y-c2.y, c2.x-c1.x);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+2*j*"
<<
m
<<
"] = c0+d0;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(2*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
x
size
<<
"/"
<<
(
3
*
L
)
<<
"], d1+d2);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
x
size
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1-d2);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(2*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
z
size
<<
"/"
<<
(
3
*
L
)
<<
"], d1+d2);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
z
size
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1-d2);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
3
;
unfactored
/=
3
;
}
else
if
(
unfactored
%
2
==
0
)
{
else
if
(
L
%
2
==
0
)
{
L
=
L
/
2
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 2)
\n
"
;
source
<<
"for (int i = get_local_id(0); i < "
<<
(
L
*
m
)
<<
"; i += get_local_size(0)) {
\n
"
;
if
(
loopRequired
)
source
<<
"for (int i = get_local_id(0); i < "
<<
(
L
*
m
)
<<
"; i += get_local_size(0)) {
\n
"
;
else
{
source
<<
"if (get_local_id(0) < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int i = get_local_id(0);
\n
"
;
}
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"float2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"float2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"data"
<<
output
<<
"[i+j*"
<<
m
<<
"] = c0+c1;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
x
size
<<
"/"
<<
(
2
*
L
)
<<
"], c0-c1);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
z
size
<<
"/"
<<
(
2
*
L
)
<<
"], c0-c1);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
2
;
unfactored
/=
2
;
}
else
throw
OpenMMException
(
"Illegal size for FFT: "
+
OpenCLExpressionUtilities
::
intToString
(
x
size
));
throw
OpenMMException
(
"Illegal size for FFT: "
+
OpenCLExpressionUtilities
::
intToString
(
z
size
));
source
<<
"barrier(CLK_LOCAL_MEM_FENCE);
\n
"
;
source
<<
"}
\n
"
;
++
stage
;
...
...
@@ -178,22 +197,25 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int xmult,
// Create the kernel.
source
<<
"for (int i = get_local_id(0); i < XSIZE; i += get_local_size(0))
\n
"
;
source
<<
"matrix[i*XMULT+y*YMULT+z*ZMULT] = data"
<<
(
stage
%
2
)
<<
"[i];
\n
"
;
if
(
loopRequired
)
{
source
<<
"for (int z = get_local_id(0); z < ZSIZE; z += get_local_size(0))
\n
"
;
source
<<
"out[y*(ZSIZE*XSIZE)+z*XSIZE+x] = data"
<<
(
stage
%
2
)
<<
"[z];
\n
"
;
}
else
source
<<
"out[y*(ZSIZE*XSIZE)+get_local_id(0)*XSIZE+x] = data"
<<
(
stage
%
2
)
<<
"[get_local_id(0)];
\n
"
;
source
<<
"barrier(CLK_GLOBAL_MEM_FENCE);"
;
map
<
string
,
string
>
replacements
;
replacements
[
"XSIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
xsize
);
replacements
[
"YSIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
ysize
);
replacements
[
"ZSIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
zsize
);
replacements
[
"XMULT"
]
=
OpenCLExpressionUtilities
::
intToString
(
xmult
);
replacements
[
"YMULT"
]
=
OpenCLExpressionUtilities
::
intToString
(
ymult
);
replacements
[
"ZMULT"
]
=
OpenCLExpressionUtilities
::
intToString
(
zmult
);
replacements
[
"M_PI"
]
=
OpenCLExpressionUtilities
::
doubleToString
(
M_PI
);
replacements
[
"COMPUTE_FFT"
]
=
source
.
str
();
if
(
loopRequired
)
replacements
[
"LOOP_REQUIRED"
]
=
"1"
;
cl
::
Program
program
=
context
.
createProgram
(
context
.
replaceStrings
(
OpenCLKernelSources
::
fft
,
replacements
));
cl
::
Kernel
kernel
(
program
,
"execFFT"
);
kernel
.
setArg
(
2
,
x
size
*
sizeof
(
mm_float2
),
NULL
);
kernel
.
setArg
(
3
,
x
size
*
sizeof
(
mm_float2
),
NULL
);
kernel
.
setArg
(
4
,
x
size
*
sizeof
(
mm_float2
),
NULL
);
kernel
.
setArg
(
3
,
z
size
*
sizeof
(
mm_float2
),
NULL
);
kernel
.
setArg
(
4
,
z
size
*
sizeof
(
mm_float2
),
NULL
);
kernel
.
setArg
(
5
,
z
size
*
sizeof
(
mm_float2
),
NULL
);
return
kernel
;
}
platforms/opencl/src/OpenCLFFT3D.h
View file @
c08d8a53
...
...
@@ -64,12 +64,15 @@ public:
*/
OpenCLFFT3D
(
OpenCLContext
&
context
,
int
xsize
,
int
ysize
,
int
zsize
);
/**
* Perform an in-place Fourier transform.
* Perform a Fourier transform. The transform cannot be done in-place: the input and output
* arrays must be different. Also, the input array is used as workspace, so its contents
* are destroyed.
*
* @param data the data to transform, ordered such that data[x*ysize*zsize + y*zsize + z] contains element (x, y, z)
* @param in the data to transform, ordered such that in[x*ysize*zsize + y*zsize + z] contains element (x, y, z)
* @param out on exit, this contains the transformed data
* @param forward true to perform a forward transform, false to perform an inverse transform
*/
void
execFFT
(
OpenCLArray
<
mm_float2
>&
data
,
bool
forward
=
true
);
void
execFFT
(
OpenCLArray
<
mm_float2
>&
in
,
OpenCLArray
<
mm_float2
>&
out
,
bool
forward
=
true
);
/**
* Get the smallest legal size for a dimension of the grid (that is, a size with no prime
* factors other than 2, 3, and 5).
...
...
@@ -78,7 +81,7 @@ public:
*/
static
int
findLegalDimension
(
int
minimum
);
private:
cl
::
Kernel
createKernel
(
int
xsize
,
int
ysize
,
int
zsize
,
int
xmult
,
int
ymult
,
int
zmult
);
cl
::
Kernel
createKernel
(
int
xsize
,
int
ysize
,
int
zsize
);
int
xsize
,
ysize
,
zsize
;
OpenCLContext
&
context
;
cl
::
Kernel
xkernel
,
ykernel
,
zkernel
;
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
c08d8a53
...
...
@@ -1139,6 +1139,8 @@ OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
delete
cosSinSums
;
if
(
pmeGrid
!=
NULL
)
delete
pmeGrid
;
if
(
pmeGrid2
!=
NULL
)
delete
pmeGrid2
;
if
(
pmeBsplineModuliX
!=
NULL
)
delete
pmeBsplineModuliX
;
if
(
pmeBsplineModuliY
!=
NULL
)
...
...
@@ -1266,6 +1268,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// Create required data structures.
pmeGrid
=
new
OpenCLArray
<
mm_float2
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
"pmeGrid"
);
pmeGrid2
=
new
OpenCLArray
<
mm_float2
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
"pmeGrid2"
);
pmeBsplineModuliX
=
new
OpenCLArray
<
cl_float
>
(
cl
,
gridSizeX
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
OpenCLArray
<
cl_float
>
(
cl
,
gridSizeY
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
OpenCLArray
<
cl_float
>
(
cl
,
gridSizeZ
,
"pmeBsplineModuliZ"
);
...
...
@@ -1419,7 +1422,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeAtomRange
->
getDeviceBuffer
());
pmeSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pmeGrid
->
getDeviceBuffer
());
pmeSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
pmeBsplineTheta
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeGrid
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeGrid
2
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeBsplineModuliX
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pmeBsplineModuliY
->
getDeviceBuffer
());
...
...
@@ -1474,11 +1477,11 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
else
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
());
}
fft
->
execFFT
(
*
pmeGrid
,
true
);
fft
->
execFFT
(
*
pmeGrid
,
*
pmeGrid2
,
true
);
pmeConvolutionKernel
.
setArg
<
mm_float4
>
(
5
,
invBoxSize
);
pmeConvolutionKernel
.
setArg
<
cl_float
>
(
6
,
(
float
)
(
1.0
/
(
M_PI
*
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
)));
cl
.
executeKernel
(
pmeConvolutionKernel
,
cl
.
getNumAtoms
());
fft
->
execFFT
(
*
pmeGrid
,
false
);
fft
->
execFFT
(
*
pmeGrid2
,
*
pmeGrid
,
false
);
pmeInterpolateForceKernel
.
setArg
<
mm_float4
>
(
5
,
boxSize
);
pmeInterpolateForceKernel
.
setArg
<
mm_float4
>
(
6
,
invBoxSize
);
cl
.
executeKernel
(
pmeInterpolateForceKernel
,
cl
.
getNumAtoms
());
...
...
platforms/opencl/src/OpenCLKernels.h
View file @
c08d8a53
...
...
@@ -475,7 +475,7 @@ private:
class
OpenCLCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
exceptionIndices
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
exceptionIndices
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeBsplineDtheta
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
sort
(
NULL
),
fft
(
NULL
)
{
}
...
...
@@ -504,6 +504,7 @@ private:
OpenCLArray
<
mm_int4
>*
exceptionIndices
;
OpenCLArray
<
mm_float2
>*
cosSinSums
;
OpenCLArray
<
mm_float2
>*
pmeGrid
;
OpenCLArray
<
mm_float2
>*
pmeGrid2
;
OpenCLArray
<
cl_float
>*
pmeBsplineModuliX
;
OpenCLArray
<
cl_float
>*
pmeBsplineModuliY
;
OpenCLArray
<
cl_float
>*
pmeBsplineModuliZ
;
...
...
platforms/opencl/src/kernels/fft.cl
View file @
c08d8a53
...
...
@@ -6,15 +6,19 @@ float2 multiplyComplex(float2 c1, float2 c2) {
*
Perform
a
1D
FFT
on
each
row
along
one
axis.
*/
__kernel
void
execFFT
(
__global
float2*
matrix
,
float
sign,
__local
float2*
w,
__local
float2*
data0,
__local
float2*
data1
)
{
for
(
int
i
=
get_local_id
(
0
)
; i <
X
SIZE; i += get_local_size(0))
w[i]
=
(
float2
)
(
cos
(
-sign*i*2*M_PI/
X
SIZE
)
,
sin
(
-sign*i*2*M_PI/
X
SIZE
))
;
__kernel
void
execFFT
(
__global
float2*
in,
__global
float2*
out
,
float
sign,
__local
float2*
w,
__local
float2*
data0,
__local
float2*
data1
)
{
for
(
int
i
=
get_local_id
(
0
)
; i <
Z
SIZE; i += get_local_size(0))
w[i]
=
(
float2
)
(
cos
(
-sign*i*2*M_PI/
Z
SIZE
)
,
sin
(
-sign*i*2*M_PI/
Z
SIZE
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
index
=
get_group_id
(
0
)
; index < YSIZE*ZSIZE; index += get_num_groups(0)) {
int
z
=
index/YSIZE
;
int
y
=
index-z*YSIZE
;
for
(
int
i
=
get_local_id
(
0
)
; i < XSIZE; i += get_local_size(0))
data0[i]
=
matrix[i*XMULT+y*YMULT+z*ZMULT]
;
for
(
int
index
=
get_group_id
(
0
)
; index < XSIZE*YSIZE; index += get_num_groups(0)) {
int
x
=
index/YSIZE
;
int
y
=
index-x*YSIZE
;
#
ifdef
LOOP_REQUIRED
for
(
int
z
=
get_local_id
(
0
)
; z < ZSIZE; z += get_local_size(0))
data0[z]
=
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+z]
;
#
else
data0[get_local_id
(
0
)
]
=
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+get_local_id
(
0
)
]
;
#
endif
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
COMPUTE_FFT
}
...
...
platforms/opencl/tests/TestOpenCLFFT.cpp
0 → 100644
View file @
c08d8a53
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the OpenCL implementation of sorting.
*/
#include "../../../tests/AssertionUtilities.h"
#include "../src/OpenCLArray.h"
#include "../src/OpenCLContext.h"
#include "../src/OpenCLFFT3D.h"
#include "../src/OpenCLSort.h"
#include "../src/SimTKReference/fftpack.h"
#include "sfmt/SFMT.h"
#include "openmm/System.h"
#include <iostream>
#include <cmath>
#include <set>
using
namespace
OpenMM
;
using
namespace
std
;
void
testTransform
()
{
System
system
;
system
.
addParticle
(
0.0
);
OpenCLPlatform
::
PlatformData
platformData
(
1
,
""
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
(
system
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
int
xsize
=
32
,
ysize
=
25
,
zsize
=
30
;
vector
<
mm_float2
>
original
(
xsize
*
ysize
*
zsize
);
vector
<
t_complex
>
reference
(
original
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
original
.
size
();
i
++
)
{
mm_float2
value
=
mm_float2
(
genrand_real2
(
sfmt
),
genrand_real2
(
sfmt
));
original
[
i
]
=
value
;
reference
[
i
]
=
t_complex
(
value
.
x
,
value
.
y
);
}
OpenCLArray
<
mm_float2
>
grid1
(
context
,
original
.
size
(),
"grid1"
);
OpenCLArray
<
mm_float2
>
grid2
(
context
,
original
.
size
(),
"grid2"
);
grid1
.
upload
(
original
);
OpenCLFFT3D
fft
(
context
,
xsize
,
ysize
,
zsize
);
// Perform a forward FFT, then verify the result is correct.
fft
.
execFFT
(
grid1
,
grid2
,
true
);
vector
<
mm_float2
>
result
;
grid2
.
download
(
result
);
fftpack_t
plan
;
fftpack_init_3d
(
&
plan
,
xsize
,
ysize
,
zsize
);
fftpack_exec_3d
(
plan
,
FFTPACK_FORWARD
,
&
reference
[
0
],
&
reference
[
0
]);
for
(
int
i
=
0
;
i
<
(
int
)
result
.
size
();
++
i
)
{
ASSERT_EQUAL_TOL
(
reference
[
i
].
re
,
result
[
i
].
x
,
1e-4
);
ASSERT_EQUAL_TOL
(
reference
[
i
].
im
,
result
[
i
].
y
,
1e-4
);
}
fftpack_destroy
(
plan
);
// Perform a backward transform and see if we get the original values.
fft
.
execFFT
(
grid2
,
grid1
,
false
);
grid1
.
download
(
result
);
double
scale
=
1.0
/
(
xsize
*
ysize
*
zsize
);
for
(
int
i
=
0
;
i
<
(
int
)
result
.
size
();
++
i
)
{
ASSERT_EQUAL_TOL
(
original
[
i
].
x
,
scale
*
result
[
i
].
x
,
1e-4
);
ASSERT_EQUAL_TOL
(
original
[
i
].
y
,
scale
*
result
[
i
].
y
,
1e-4
);
}
}
int
main
()
{
try
{
testTransform
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment