OpenCLFFT3D.cpp 10.5 KB
Newer Older
Peter Eastman's avatar
Peter Eastman committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2009 Stanford University and the Authors.           *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

#include "OpenCLFFT3D.h"
#include "OpenCLExpressionUtilities.h"
29
#include "OpenCLKernelSources.h"
Peter Eastman's avatar
Peter Eastman committed
30
31
32
33
34
35
36
37
38
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include <map>
#include <sstream>
#include <string>

using namespace OpenMM;
using namespace std;

OpenCLFFT3D::OpenCLFFT3D(OpenCLContext& context, int xsize, int ysize, int zsize) : context(context), xsize(xsize), ysize(ysize), zsize(zsize) {
Peter Eastman's avatar
Peter Eastman committed
39
40
41
    xkernel = createKernel(xsize, ysize, zsize, ysize*zsize, zsize, 1);
    ykernel = createKernel(ysize, zsize, xsize, zsize, 1, ysize*zsize);
    zkernel = createKernel(zsize, xsize, ysize, 1, ysize*zsize, zsize);
Peter Eastman's avatar
Peter Eastman committed
42
43
44
}

void OpenCLFFT3D::execFFT(OpenCLArray<mm_float2>& data, bool forward) {
45
    int maxSize = xkernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice());
Peter Eastman's avatar
Peter Eastman committed
46
47
    xkernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
    xkernel.setArg<cl_float>(1, forward ? 1.0f : -1.0f);
48
    context.executeKernel(xkernel, xsize*ysize*zsize, min(xsize, (int) maxSize));
Peter Eastman's avatar
Peter Eastman committed
49
50
    ykernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
    ykernel.setArg<cl_float>(1, forward ? 1.0f : -1.0f);
51
    context.executeKernel(ykernel, xsize*ysize*zsize, min(ysize, (int) maxSize));
Peter Eastman's avatar
Peter Eastman committed
52
53
    zkernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
    zkernel.setArg<cl_float>(1, forward ? 1.0f : -1.0f);
54
    context.executeKernel(zkernel, xsize*ysize*zsize, min(zsize, (int) maxSize));
Peter Eastman's avatar
Peter Eastman committed
55
56
}

Peter Eastman's avatar
Peter Eastman committed
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
int OpenCLFFT3D::findLegalDimension(int minimum) {
    if (minimum < 1)
        return 1;
    while (true) {
        // Attempt to factor the current value.

        int unfactored = minimum;
        for (int factor = 2; factor < 6; factor++) {
            while (unfactored > 1 && unfactored%factor == 0)
                unfactored /= factor;
        }
        if (unfactored == 1)
            return minimum;
        minimum++;
    }
}

cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int xmult, int ymult, int zmult) {
Peter Eastman's avatar
Peter Eastman committed
75
    stringstream source;
Peter Eastman's avatar
Peter Eastman committed
76
    int unfactored = xsize;
Peter Eastman's avatar
Peter Eastman committed
77
    int stage = 0;
Peter Eastman's avatar
Peter Eastman committed
78
    int L = xsize;
Peter Eastman's avatar
Peter Eastman committed
79
    int m = 1;
Peter Eastman's avatar
Peter Eastman committed
80
81
82

    // Factor xsize, generating an appropriate block of code for each factor.

Peter Eastman's avatar
Peter Eastman committed
83
84
85
86
87
88
89
    while (unfactored > 1) {
        int input = stage%2;
        int output = 1-input;
        source<<"{\n";
        if (unfactored%5 == 0) {
            L = L/5;
            source<<"// Pass "<<(stage+1)<<" (radix 5)\n";
90
            source<<"for (int i = get_local_id(0); i < "<<(L*m)<<"; i += get_local_size(0)) {\n";
Peter Eastman's avatar
Peter Eastman committed
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
            source<<"int j = i/"<<m<<";\n";
            source<<"float2 c0 = data"<<input<<"[i];\n";
            source<<"float2 c1 = data"<<input<<"[i+"<<(L*m)<<"];\n";
            source<<"float2 c2 = data"<<input<<"[i+"<<(2*L*m)<<"];\n";
            source<<"float2 c3 = data"<<input<<"[i+"<<(3*L*m)<<"];\n";
            source<<"float2 c4 = data"<<input<<"[i+"<<(4*L*m)<<"];\n";
            source<<"float2 d0 = c1+c4;\n";
            source<<"float2 d1 = c2+c3;\n";
            source<<"float2 d2 = "<<OpenCLExpressionUtilities::doubleToString(sin(0.4*M_PI))<<"*(c1-c4);\n";
            source<<"float2 d3 = "<<OpenCLExpressionUtilities::doubleToString(sin(0.4*M_PI))<<"*(c2-c3);\n";
            source<<"float2 d4 = d0+d1;\n";
            source<<"float2 d5 = "<<OpenCLExpressionUtilities::doubleToString(0.25*sqrt(5.0))<<"*(d0-d1);\n";
            source<<"float2 d6 = c0-0.25f*d4;\n";
            source<<"float2 d7 = d6+d5;\n";
            source<<"float2 d8 = d6-d5;\n";
            string coeff = OpenCLExpressionUtilities::doubleToString(sin(0.2*M_PI)/sin(0.4*M_PI));
            source<<"float2 d9 = sign*(float2) (d2.y+"<<coeff<<"*d3.y, -d2.x-"<<coeff<<"*d3.x);\n";
            source<<"float2 d10 = sign*(float2) ("<<coeff<<"*d2.y-d3.y, d3.x-"<<coeff<<"*d2.x);\n";
            source<<"data"<<output<<"[i+4*j*"<<m<<"] = c0+d4;\n";
Peter Eastman's avatar
Peter Eastman committed
110
111
112
113
            source<<"data"<<output<<"[i+(4*j+1)*"<<m<<"] = multiplyComplex(w[j*"<<xsize<<"/"<<(5*L)<<"], d7+d9);\n";
            source<<"data"<<output<<"[i+(4*j+2)*"<<m<<"] = multiplyComplex(w[j*"<<(2*xsize)<<"/"<<(5*L)<<"], d8+d10);\n";
            source<<"data"<<output<<"[i+(4*j+3)*"<<m<<"] = multiplyComplex(w[j*"<<(3*xsize)<<"/"<<(5*L)<<"], d8-d10);\n";
            source<<"data"<<output<<"[i+(4*j+4)*"<<m<<"] = multiplyComplex(w[j*"<<(4*xsize)<<"/"<<(5*L)<<"], d7-d9);\n";
114
            source<<"}\n";
Peter Eastman's avatar
Peter Eastman committed
115
116
117
118
119
120
            m = m*5;
            unfactored /= 5;
        }
        else if (unfactored%4 == 0) {
            L = L/4;
            source<<"// Pass "<<(stage+1)<<" (radix 4)\n";
121
            source<<"for (int i = get_local_id(0); i < "<<(L*m)<<"; i += get_local_size(0)) {\n";
Peter Eastman's avatar
Peter Eastman committed
122
123
124
125
126
127
128
129
130
131
            source<<"int j = i/"<<m<<";\n";
            source<<"float2 c0 = data"<<input<<"[i];\n";
            source<<"float2 c1 = data"<<input<<"[i+"<<(L*m)<<"];\n";
            source<<"float2 c2 = data"<<input<<"[i+"<<(2*L*m)<<"];\n";
            source<<"float2 c3 = data"<<input<<"[i+"<<(3*L*m)<<"];\n";
            source<<"float2 d0 = c0+c2;\n";
            source<<"float2 d1 = c0-c2;\n";
            source<<"float2 d2 = c1+c3;\n";
            source<<"float2 d3 = sign*(float2) (c1.y-c3.y, c3.x-c1.x);\n";
            source<<"data"<<output<<"[i+3*j*"<<m<<"] = d0+d2;\n";
Peter Eastman's avatar
Peter Eastman committed
132
133
134
            source<<"data"<<output<<"[i+(3*j+1)*"<<m<<"] = multiplyComplex(w[j*"<<xsize<<"/"<<(4*L)<<"], d1+d3);\n";
            source<<"data"<<output<<"[i+(3*j+2)*"<<m<<"] = multiplyComplex(w[j*"<<(2*xsize)<<"/"<<(4*L)<<"], d0-d2);\n";
            source<<"data"<<output<<"[i+(3*j+3)*"<<m<<"] = multiplyComplex(w[j*"<<(3*xsize)<<"/"<<(4*L)<<"], d1-d3);\n";
135
            source<<"}\n";
Peter Eastman's avatar
Peter Eastman committed
136
137
138
139
140
141
            m = m*4;
            unfactored /= 4;
        }
        else if (unfactored%3 == 0) {
            L = L/3;
            source<<"// Pass "<<(stage+1)<<" (radix 3)\n";
142
            source<<"for (int i = get_local_id(0); i < "<<(L*m)<<"; i += get_local_size(0)) {\n";
Peter Eastman's avatar
Peter Eastman committed
143
144
145
146
147
148
149
150
            source<<"int j = i/"<<m<<";\n";
            source<<"float2 c0 = data"<<input<<"[i];\n";
            source<<"float2 c1 = data"<<input<<"[i+"<<(L*m)<<"];\n";
            source<<"float2 c2 = data"<<input<<"[i+"<<(2*L*m)<<"];\n";
            source<<"float2 d0 = c1+c2;\n";
            source<<"float2 d1 = c0-0.5f*d0;\n";
            source<<"float2 d2 = sign*"<<OpenCLExpressionUtilities::doubleToString(sin(M_PI/3.0))<<"*(float2) (c1.y-c2.y, c2.x-c1.x);\n";
            source<<"data"<<output<<"[i+2*j*"<<m<<"] = c0+d0;\n";
Peter Eastman's avatar
Peter Eastman committed
151
152
            source<<"data"<<output<<"[i+(2*j+1)*"<<m<<"] = multiplyComplex(w[j*"<<xsize<<"/"<<(3*L)<<"], d1+d2);\n";
            source<<"data"<<output<<"[i+(2*j+2)*"<<m<<"] = multiplyComplex(w[j*"<<(2*xsize)<<"/"<<(3*L)<<"], d1-d2);\n";
153
            source<<"}\n";
Peter Eastman's avatar
Peter Eastman committed
154
155
156
157
158
159
            m = m*3;
            unfactored /= 3;
        }
        else if (unfactored%2 == 0) {
            L = L/2;
            source<<"// Pass "<<(stage+1)<<" (radix 2)\n";
160
            source<<"for (int i = get_local_id(0); i < "<<(L*m)<<"; i += get_local_size(0)) {\n";
Peter Eastman's avatar
Peter Eastman committed
161
162
163
164
            source<<"int j = i/"<<m<<";\n";
            source<<"float2 c0 = data"<<input<<"[i];\n";
            source<<"float2 c1 = data"<<input<<"[i+"<<(L*m)<<"];\n";
            source<<"data"<<output<<"[i+j*"<<m<<"] = c0+c1;\n";
Peter Eastman's avatar
Peter Eastman committed
165
            source<<"data"<<output<<"[i+(j+1)*"<<m<<"] = multiplyComplex(w[j*"<<xsize<<"/"<<(2*L)<<"], c0-c1);\n";
166
            source<<"}\n";
Peter Eastman's avatar
Peter Eastman committed
167
168
169
170
            m = m*2;
            unfactored /= 2;
        }
        else
Peter Eastman's avatar
Peter Eastman committed
171
            throw OpenMMException("Illegal size for FFT: "+OpenCLExpressionUtilities::intToString(xsize));
Peter Eastman's avatar
Peter Eastman committed
172
173
174
175
        source<<"barrier(CLK_LOCAL_MEM_FENCE);\n";
        source<<"}\n";
        ++stage;
    }
Peter Eastman's avatar
Peter Eastman committed
176
177
178

    // Create the kernel.

179
180
    source<<"for (int i = get_local_id(0); i < XSIZE; i += get_local_size(0))\n";
    source<<"matrix[i*XMULT+y*YMULT+z*ZMULT] = data"<<(stage%2)<<"[i];\n";
181
    source<<"barrier(CLK_GLOBAL_MEM_FENCE);";
Peter Eastman's avatar
Peter Eastman committed
182
183
184
185
186
187
188
189
    map<string, string> replacements;
    replacements["XSIZE"] = OpenCLExpressionUtilities::intToString(xsize);
    replacements["YSIZE"] = OpenCLExpressionUtilities::intToString(ysize);
    replacements["ZSIZE"] = OpenCLExpressionUtilities::intToString(zsize);
    replacements["XMULT"] = OpenCLExpressionUtilities::intToString(xmult);
    replacements["YMULT"] = OpenCLExpressionUtilities::intToString(ymult);
    replacements["ZMULT"] = OpenCLExpressionUtilities::intToString(zmult);
    replacements["M_PI"] = OpenCLExpressionUtilities::doubleToString(M_PI);
Peter Eastman's avatar
Peter Eastman committed
190
    replacements["COMPUTE_FFT"] = source.str();
191
    cl::Program program = context.createProgram(context.replaceStrings(OpenCLKernelSources::fft, replacements));
Peter Eastman's avatar
Peter Eastman committed
192
    cl::Kernel kernel(program, "execFFT");
Peter Eastman's avatar
Peter Eastman committed
193
194
195
    kernel.setArg(2, xsize*sizeof(mm_float2), NULL);
    kernel.setArg(3, xsize*sizeof(mm_float2), NULL);
    kernel.setArg(4, xsize*sizeof(mm_float2), NULL);
Peter Eastman's avatar
Peter Eastman committed
196
197
    return kernel;
}