OpenCLContext.h 11.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#ifndef OPENMM_OPENCLCONTEXT_H_
#define OPENMM_OPENCLCONTEXT_H_

/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2009 Stanford University and the Authors.           *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

30
31
#include <map>
#include <string>
32
33
34
35
36
#define __CL_ENABLE_EXCEPTIONS
#ifdef _MSC_VER
    // Prevent Windows from defining macros that interfere with other code.
    #define NOMINMAX
#endif
37
#include <cl.hpp>
38
39
40
41
42

namespace OpenMM {

template <class T>
class OpenCLArray;
43
class OpenCLForceInfo;
44
class OpenCLIntegrationUtilities;
45
class OpenCLNonbondedUtilities;
46
class System;
47

48
/**
49
50
51
 * We can't use predefined vector types like cl_float4, since different OpenCL implementations currently define
 * them in incompatible ways.  Hopefully that will be fixed in the future.  In the mean time, we define our own
 * types to represent them on the host.
52
53
 */

54
struct mm_float2 {
55
    cl_float x, y;
56
57
58
59
60
61
    mm_float2() {
    }
    mm_float2(cl_float x, cl_float y) : x(x), y(y) {
    }
};
 struct mm_float4 {
62
    cl_float x, y, z, w;
63
64
65
66
67
68
    mm_float4() {
    }
    mm_float4(cl_float x, cl_float y, cl_float z, cl_float w) : x(x), y(y), z(z), w(w) {
    }
};
struct mm_float8 {
69
    cl_float s0, s1, s2, s3, s4, s5, s6, s7;
70
71
72
73
74
75
76
    mm_float8() {
    }
    mm_float8(cl_float s0, cl_float s1, cl_float s2, cl_float s3, cl_float s4, cl_float s5, cl_float s6, cl_float s7) :
        s0(s0), s1(s1), s2(s2), s3(s3), s4(s4), s5(s5), s6(s6), s7(s7) {
    }
};
struct mm_int2 {
77
    cl_int x, y;
78
79
80
81
82
83
    mm_int2() {
    }
    mm_int2(cl_int x, cl_int y) : x(x), y(y) {
    }
};
struct mm_int4 {
84
    cl_int x, y, z, w;
85
86
87
88
89
90
    mm_int4() {
    }
    mm_int4(cl_int x, cl_int y, cl_int z, cl_int w) : x(x), y(y), z(z), w(w) {
    }
};
struct mm_int8 {
91
    cl_int s0, s1, s2, s3, s4, s5, s6, s7;
92
93
94
95
96
97
    mm_int8() {
    }
    mm_int8(cl_int s0, cl_int s1, cl_int s2, cl_int s3, cl_int s4, cl_int s5, cl_int s6, cl_int s7) :
        s0(s0), s1(s1), s2(s2), s3(s3), s4(s4), s5(s5), s6(s6), s7(s7) {
    }
};
98

99
100
101
102
103
104
/**
 * This class contains the information associated with a Context by the OpenCL Platform.
 */

class OpenCLContext {
public:
105
106
    static const int ThreadBlockSize = 64;
    static const int TileSize = 32;
107
    OpenCLContext(int numParticles, int deviceIndex);
108
    ~OpenCLContext();
109
110
111
112
113
114
115
116
117
    /**
     * This is called to initialize internal data structures after all Forces in the system
     * have been initialized.
     */
    void initialize(const System& system);
    /**
     * Add an OpenCLForce to this context.
     */
    void addForce(OpenCLForceInfo* force);
118
119
120
121
    /**
     * Get the cl::Context associated with this object.
     */
    cl::Context& getContext() {
122
        return context;
123
    }
124
125
126
127
128
129
    /**
     * Get the cl::Device associated with this object.
     */
    cl::Device& getDevice() {
        return device;
    }
Peter Eastman's avatar
Peter Eastman committed
130
131
132
133
134
135
    /**
     * Get the index of the cl::Device associated with this object.
     */
    int getDeviceIndex() {
        return deviceIndex;
    }
136
137
138
139
    /**
     * Get the cl::CommandQueue associated with this object.
     */
    cl::CommandQueue& getQueue() {
140
        return queue;
141
142
143
144
    }
    /**
     * Get the array which contains the position and charge of each atom.
     */
145
    OpenCLArray<mm_float4>& getPosq() {
146
147
148
        return *posq;
    }
    /**
149
     * Get the array which contains the velocity and inverse mass of each atom.
150
     */
151
    OpenCLArray<mm_float4>& getVelm() {
152
153
154
155
156
        return *velm;
    }
    /**
     * Get the array which contains the force on each atom.
     */
157
    OpenCLArray<mm_float4>& getForce() {
158
159
        return *force;
    }
160
161
162
    /**
     * Get the array which contains the buffers in which forces are computed.
     */
163
    OpenCLArray<mm_float4>& getForceBuffers() {
164
165
        return *forceBuffers;
    }
166
167
168
169
170
171
    /**
     * Get the array which contains the buffer in which energy is computed.
     */
    OpenCLArray<cl_float>& getEnergyBuffer() {
        return *energyBuffer;
    }
172
173
174
175
176
177
    /**
     * Get the array which contains the index of each atom.
     */
    OpenCLArray<cl_int>& getAtomIndex() {
        return *atomIndex;
    }
178
179
180
181
182
183
    /**
     * Get the number of cells by which the positions are offset.
     */
    std::vector<mm_int4>& getPosCellOffsets() {
        return posCellOffsets;
    }
184
185
186
187
    /**
     * Load OpenCL source code from a file in the kernels directory.
     */
    std::string loadSourceFromFile(const std::string& filename) const;
188
189
190
191
192
193
194
    /**
     * Load OpenCL source code from a file in the kernels directory.
     *
     * @param filename     the file to load
     * @param replacements a set of strings that should be replaced with new strings wherever they appear in the
     */
    std::string loadSourceFromFile(const std::string& filename, const std::map<std::string, std::string>& replacements) const;
195
196
197
198
    /**
     * Create an OpenCL Program from source code.
     */
    cl::Program createProgram(const std::string source);
199
200
201
202
203
204
    /**
     * Create an OpenCL Program from source code.
     *
     * @param defines    a set of preprocessor definitions (name, value) to define when compiling the program
     */
    cl::Program createProgram(const std::string source, const std::map<std::string, std::string>& defines);
205
206
207
    /**
     * Execute a kernel.
     *
208
209
     * @param kernel       the kernel to execute
     * @param workUnits    the maximum number of work units that should be used
210
     * @param blockSize    the size of each thread block to use
211
     */
212
    void executeKernel(cl::Kernel& kernel, int workUnits, int blockSize = -1);
213
214
215
216
217
218
219
    /**
     * Set all elements of an array to 0.
     */
    void clearBuffer(OpenCLArray<float>& array);
    /**
     * Set all elements of an array to 0.
     */
220
    void clearBuffer(OpenCLArray<mm_float4>& array);
221
222
223
224
225
226
227
    /**
     * Set all elements of an array to 0.
     *
     * @param buffer     the Buffer to clear
     * @param size       the number of float elements in the buffer
     */
    void clearBuffer(cl::Buffer& buffer, int size);
228
229
230
231
232
233
234
235
    /**
     * Given a collection of buffers packed into an array, sum them and store
     * the sum in the first buffer.
     *
     * @param array       the array containing the buffers to reduce
     * @param numBuffers  the number of buffers packed into the array
     */
    void reduceBuffer(OpenCLArray<mm_float4>& array, int numBuffers);
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
    /**
     * Get the current simulation time.
     */
    double getTime() {
        return time;
    }
    /**
     * Set the current simulation time.
     */
    void setTime(double t) {
        time = t;
    }
    /**
     * Get the number of integration steps that have been taken.
     */
    int getStepCount() {
        return stepCount;
    }
    /**
     * Set the number of integration steps that have been taken.
     */
    void setStepCount(int steps) {
258
259
260
261
262
263
264
265
266
267
268
269
270
        stepCount = steps;
    }
    /**
     * Get the number of times forces or energy has been computed.
     */
    int getComputeForceCount() {
        return computeForceCount;
    }
    /**
     * Set the number of times forces or energy has been computed.
     */
    void setComputeForceCount(int count) {
        computeForceCount = count;
271
    }
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
    /**
     * Get the number of atoms.
     */
    int getNumAtoms() const {
        return numAtoms;
    }
    /**
     * Get the number of atoms, rounded up to a multiple of TileSize.  This is the actual size of
     * most arrays with one element per atom.
     */
    int getPaddedNumAtoms() const {
        return paddedNumAtoms;
    }
    /**
     * Get the number of blocks of TileSize atoms.
     */
    int getNumAtomBlocks() const {
        return numAtomBlocks;
    }
    /**
     * Get the standard number of thread blocks to use when executing kernels.
     */
    int getNumThreadBlocks() const {
        return numThreadBlocks;
    }
    /**
     * Get the number of force buffers.
     */
    int getNumForceBuffers() const {
        return numForceBuffers;
    }
303
304
305
306
307
308
    /**
     * Get the SIMD width of the device being used.
     */
    int getSIMDWidth() const {
        return simdWidth;
    }
309
310
311
    /**
     * Get the OpenCLIntegrationUtilities for this context.
     */
312
    OpenCLIntegrationUtilities& getIntegrationUtilities() {
313
314
        return *integration;
    }
315
316
317
    /**
     * Get the OpenCLNonbondedUtilities for this context.
     */
318
    OpenCLNonbondedUtilities& getNonbondedUtilities() {
319
320
        return *nonbonded;
    }
321
322
323
324
325
    /**
     * Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
     * together in the arrays.
     */
    void reorderAtoms();
326
private:
327
328
329
330
    struct Molecule;
    struct MoleculeGroup;
    void findMoleculeGroups(const System& system);
    static void tagAtomsInMolecule(int atom, int molecule, std::vector<int>& atomMolecule, std::vector<std::vector<int> >& atomBonds);
331
    double time;
Peter Eastman's avatar
Peter Eastman committed
332
    int deviceIndex;
333
    int stepCount;
334
    int computeForceCount;
335
336
337
338
339
    int numAtoms;
    int paddedNumAtoms;
    int numAtomBlocks;
    int numThreadBlocks;
    int numForceBuffers;
340
    int simdWidth;
341
    std::string compilationOptions;
342
343
344
345
346
    cl::Context context;
    cl::Device device;
    cl::CommandQueue queue;
    cl::Program utilities;
    cl::Kernel clearBufferKernel;
347
348
    cl::Kernel reduceFloat4Kernel;
    std::vector<OpenCLForceInfo*> forces;
349
350
    std::vector<MoleculeGroup> moleculeGroups;
    std::vector<mm_int4> posCellOffsets;
351
352
353
354
    OpenCLArray<mm_float4>* posq;
    OpenCLArray<mm_float4>* velm;
    OpenCLArray<mm_float4>* force;
    OpenCLArray<mm_float4>* forceBuffers;
355
    OpenCLArray<cl_float>* energyBuffer;
356
    OpenCLArray<cl_int>* atomIndex;
357
    OpenCLIntegrationUtilities* integration;
358
    OpenCLNonbondedUtilities* nonbonded;
359
360
};

361
362
363
364
365
struct OpenCLContext::MoleculeGroup {
    std::vector<int> atoms;
    std::vector<int> instances;
};

366
367
368
} // namespace OpenMM

#endif /*OPENMM_OPENCLCONTEXT_H_*/