OpenCLNonbondedUtilities.cpp 21.8 KB
Newer Older
1
2
3
4
5
6
7
8
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
9
 * Portions copyright (c) 2009-2010 Stanford University and the Authors.      *
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

#include "OpenCLNonbondedUtilities.h"
#include "OpenCLArray.h"
29
#include "OpenCLKernelSources.h"
30
#include "OpenCLExpressionUtilities.h"
31
#include <map>
32
33
#include <set>
#include <utility>
34
35
36
37
38

using namespace OpenMM;
using namespace std;

OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : context(context), cutoff(-1.0), useCutoff(false),
39
40
        numForceBuffers(0), exclusionIndices(NULL), exclusionRowIndices(NULL), exclusions(NULL), interactingTiles(NULL), interactionFlags(NULL),
        interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL) {
41
42
43
44
45
46
47
48
49
50
    // Decide how many force buffers to use.

    forceBufferPerAtomBlock = false;
    numForceBuffers = context.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize/OpenCLContext::TileSize;
    if (numForceBuffers >= context.getNumAtomBlocks()) {
        // For small systems, it is more efficient to have one force buffer per block of 32 atoms instead of one per warp.

        forceBufferPerAtomBlock = true;
        numForceBuffers = context.getNumAtomBlocks();
    }
51
52
53
}

OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
54
55
56
57
    if (exclusionIndices != NULL)
        delete exclusionIndices;
    if (exclusionRowIndices != NULL)
        delete exclusionRowIndices;
58
59
    if (exclusions != NULL)
        delete exclusions;
60
61
62
63
64
65
66
67
68
69
    if (interactingTiles != NULL)
        delete interactingTiles;
    if (interactionFlags != NULL)
        delete interactionFlags;
    if (interactionCount != NULL)
        delete interactionCount;
    if (blockCenter != NULL)
        delete blockCenter;
    if (blockBoundingBox != NULL)
        delete blockBoundingBox;
70
71
}

72
void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel) {
73
74
75
76
77
78
79
    if (cutoff != -1.0) {
        if (usesCutoff != useCutoff)
            throw OpenMMException("All Forces must agree on whether to use a cutoff");
        if (usesPeriodic != usePeriodic)
            throw OpenMMException("All Forces must agree on whether to use periodic boundary conditions");
        if (cutoffDistance != cutoff)
            throw OpenMMException("All Forces must use the same cutoff distance");
80
81
    }
    if (usesExclusions && atomExclusions.size() != 0) {
82
        bool sameExclusions = (exclusionList.size() == atomExclusions.size());
83
        for (int i = 0; i < (int) exclusionList.size() && sameExclusions; i++) {
84
85
            if (exclusionList[i].size() != atomExclusions[i].size())
                sameExclusions = false;
86
            for (int j = 0; j < (int) exclusionList[i].size(); j++)
87
88
89
90
91
92
                if (exclusionList[i][j] != atomExclusions[i][j])
                    sameExclusions = false;
        }
        if (!sameExclusions)
            throw OpenMMException("All Forces must have identical exceptions");
    }
93
94
95
96
97
98
    useCutoff = usesCutoff;
    usePeriodic = usesPeriodic;
    cutoff = cutoffDistance;
    kernelSource += kernel+"\n";
    if (usesExclusions)
        atomExclusions = exclusionList;
99
100
}

101
102
void OpenCLNonbondedUtilities::addParameter(const ParameterInfo& parameter) {
    parameters.push_back(parameter);
103
104
}

105
106
107
108
void OpenCLNonbondedUtilities::addArgument(const ParameterInfo& parameter) {
    arguments.push_back(parameter);
}

109
110
111
112
void OpenCLNonbondedUtilities::initialize(const System& system) {
    if (cutoff == -1.0)
        return; // There are no nonbonded interactions in the System.
    
113
114
115
116
    if (atomExclusions.size() == 0) {
        // No exclusions were specifically requested, so just mark every atom as not interacting with itself.
        
        atomExclusions.resize(context.getNumAtoms());
117
        for (int i = 0; i < (int) atomExclusions.size(); i++)
118
119
120
            atomExclusions[i].push_back(i);
    }

121
122
123
124
125
126
127
    // Create the list of tiles.

    int numAtomBlocks = context.getNumAtomBlocks();
    int numTiles = numAtomBlocks*(numAtomBlocks+1)/2;

    // Build a list of indices for the tiles with exclusions.

128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
    set<pair<int, int> > tilesWithExclusions;
    for (int atom1 = 0; atom1 < (int) atomExclusions.size(); ++atom1) {
        int x = atom1/OpenCLContext::TileSize;
        for (int j = 0; j < (int) atomExclusions[atom1].size(); ++j) {
            int atom2 = atomExclusions[atom1][j];
            int y = atom2/OpenCLContext::TileSize;
            tilesWithExclusions.insert(make_pair(max(x, y), min(x, y)));
        }
    }
    if (context.getPaddedNumAtoms() > context.getNumAtoms()) {
        for (int i = 0; i < numAtomBlocks; ++i)
            tilesWithExclusions.insert(make_pair(numAtomBlocks-1, i));
    }
    vector<cl_uint> exclusionRowIndicesVec(numAtomBlocks+1, 0);
    vector<cl_uint> exclusionIndicesVec;
    int currentRow = 0;
    for (set<pair<int, int> >::const_iterator iter = tilesWithExclusions.begin(); iter != tilesWithExclusions.end(); ++iter) {
        while (iter->first != currentRow)
            exclusionRowIndicesVec[++currentRow] = exclusionIndicesVec.size();
        exclusionIndicesVec.push_back(iter->second);
    }
    exclusionRowIndicesVec[++currentRow] = exclusionIndicesVec.size();
    exclusionIndices = new OpenCLArray<cl_uint>(context, exclusionIndicesVec.size(), "exclusionIndices");
    exclusionRowIndices = new OpenCLArray<cl_uint>(context, exclusionRowIndicesVec.size(), "exclusionRowIndices");
    exclusionIndices->upload(exclusionIndicesVec);
    exclusionRowIndices->upload(exclusionRowIndicesVec);
154
155
156

    // Record the exclusion data.

157
    exclusions = new OpenCLArray<cl_uint>(context, tilesWithExclusions.size()*OpenCLContext::TileSize, "exclusions");
158
159
160
161
162
163
164
165
166
167
168
    vector<cl_uint> exclusionVec(exclusions->getSize());
    for (int i = 0; i < exclusions->getSize(); ++i)
        exclusionVec[i] = 0xFFFFFFFF;
    for (int atom1 = 0; atom1 < (int) atomExclusions.size(); ++atom1) {
        int x = atom1/OpenCLContext::TileSize;
        int offset1 = atom1-x*OpenCLContext::TileSize;
        for (int j = 0; j < (int) atomExclusions[atom1].size(); ++j) {
            int atom2 = atomExclusions[atom1][j];
            int y = atom2/OpenCLContext::TileSize;
            int offset2 = atom2-y*OpenCLContext::TileSize;
            if (x > y) {
169
170
                int index = findExclusionIndex(x, y, exclusionIndicesVec, exclusionRowIndicesVec);
                exclusionVec[index+offset1] &= 0xFFFFFFFF-(1<<offset2);
171
172
            }
            else {
173
174
                int index = findExclusionIndex(y, x, exclusionIndicesVec, exclusionRowIndicesVec);
                exclusionVec[index+offset2] &= 0xFFFFFFFF-(1<<offset1);
175
176
177
178
179
180
181
182
183
184
185
186
187
            }
        }
    }

    // Mark all interactions that involve a padding atom as being excluded.

    for (int atom1 = context.getNumAtoms(); atom1 < context.getPaddedNumAtoms(); ++atom1) {
        int x = atom1/OpenCLContext::TileSize;
        int offset1 = atom1-x*OpenCLContext::TileSize;
        for (int atom2 = 0; atom2 < context.getPaddedNumAtoms(); ++atom2) {
            int y = atom2/OpenCLContext::TileSize;
            int offset2 = atom2-y*OpenCLContext::TileSize;
            if (x >= y) {
188
189
                int index = findExclusionIndex(x, y, exclusionIndicesVec, exclusionRowIndicesVec);
                exclusionVec[index+offset1] &= 0xFFFFFFFF-(1<<offset2);
190
191
            }
            if (y >= x) {
192
193
                int index = findExclusionIndex(y, x, exclusionIndicesVec, exclusionRowIndicesVec);
                exclusionVec[index+offset2] &= 0xFFFFFFFF-(1<<offset1);
194
195
196
197
198
            }
        }
    }
    atomExclusions.clear(); // We won't use this again, so free the memory it used
    exclusions->upload(exclusionVec);
199
200
201
202

    // Create data structures for the neighbor list.

    if (useCutoff) {
203
204
205
206
207
208
209
210
211
212
213
214
        // Select a size for the arrays that hold the neighbor list.  This estimate is intentionally very
        // high, because if it ever is too small, we have to fall back to the N^2 algorithm.

        mm_float4 boxSize = context.getPeriodicBoxSize();
        int maxInteractingTiles = (int) (numTiles*(cutoff/boxSize.x+cutoff/boxSize.y+cutoff/boxSize.z));
        if (maxInteractingTiles < 1)
            maxInteractingTiles = 1;
        if (maxInteractingTiles > numTiles)
            maxInteractingTiles = numTiles;
        interactingTiles = new OpenCLArray<mm_ushort2>(context, maxInteractingTiles, "interactingTiles");
        if (context.getSIMDWidth() == 32)
            interactionFlags = new OpenCLArray<cl_uint>(context, maxInteractingTiles, "interactionFlags");
215
216
217
218
        interactionCount = new OpenCLArray<cl_uint>(context, 1, "interactionCount");
        blockCenter = new OpenCLArray<mm_float4>(context, numAtomBlocks, "blockCenter");
        blockBoundingBox = new OpenCLArray<mm_float4>(context, numAtomBlocks, "blockBoundingBox");
    }
219
220
221

    // Create kernels.

222
    forceKernel = createInteractionKernel(kernelSource, parameters, arguments, true, true);
223
224
    if (useCutoff) {
        map<string, string> defines;
225
        defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(context.getNumAtomBlocks());
226
        defines["MAX_TILES"] = OpenCLExpressionUtilities::intToString(interactingTiles->getSize());
227
228
229
230
        if (forceBufferPerAtomBlock)
            defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
        if (usePeriodic)
            defines["USE_PERIODIC"] = "1";
231
        cl::Program interactingBlocksProgram = context.createProgram(OpenCLKernelSources::findInteractingBlocks, defines);
232
233
        findBlockBoundsKernel = cl::Kernel(interactingBlocksProgram, "findBlockBounds");
        findBlockBoundsKernel.setArg<cl_int>(0, context.getNumAtoms());
234
235
236
        findBlockBoundsKernel.setArg<cl::Buffer>(3, context.getPosq().getDeviceBuffer());
        findBlockBoundsKernel.setArg<cl::Buffer>(4, blockCenter->getDeviceBuffer());
        findBlockBoundsKernel.setArg<cl::Buffer>(5, blockBoundingBox->getDeviceBuffer());
237
        findBlockBoundsKernel.setArg<cl::Buffer>(6, interactionCount->getDeviceBuffer());
238
        findInteractingBlocksKernel = cl::Kernel(interactingBlocksProgram, "findBlocksWithInteractions");
239
240
241
242
243
        findInteractingBlocksKernel.setArg<cl_float>(0, (cl_float) (cutoff*cutoff));
        findInteractingBlocksKernel.setArg<cl::Buffer>(3, blockCenter->getDeviceBuffer());
        findInteractingBlocksKernel.setArg<cl::Buffer>(4, blockBoundingBox->getDeviceBuffer());
        findInteractingBlocksKernel.setArg<cl::Buffer>(5, interactionCount->getDeviceBuffer());
        findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
244
        findInteractingBlocksKernel.setArg<cl::Buffer>(7, context.getPosq().getDeviceBuffer());
245
246
247
248
249
250
251
252
253
254
255
        if (context.getSIMDWidth() == 32) {
            findInteractionsWithinBlocksKernel = cl::Kernel(interactingBlocksProgram, "findInteractionsWithinBlocks");
            findInteractionsWithinBlocksKernel.setArg<cl_float>(0, (cl_float) (cutoff*cutoff));
            findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(3, context.getPosq().getDeviceBuffer());
            findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(4, interactingTiles->getDeviceBuffer());
            findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(5, blockCenter->getDeviceBuffer());
            findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(6, blockBoundingBox->getDeviceBuffer());
            findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(7, interactionFlags->getDeviceBuffer());
            findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(8, interactionCount->getDeviceBuffer());
            findInteractionsWithinBlocksKernel.setArg(9, OpenCLContext::ThreadBlockSize*sizeof(cl_uint), NULL);
        }
256
    }
257
258
}

259
260
261
262
263
264
265
266
267
int OpenCLNonbondedUtilities::findExclusionIndex(int x, int y, const vector<cl_uint>& exclusionIndices, const vector<cl_uint>& exclusionRowIndices) {
    int start = exclusionRowIndices[x];
    int end = exclusionRowIndices[x+1];
    for (int i = start; i < end; i++)
        if (exclusionIndices[i] == y)
            return i*OpenCLContext::TileSize;
    throw OpenMMException("Internal error: exclusion in unexpected tile");
}

268
269
270
void OpenCLNonbondedUtilities::prepareInteractions() {
    if (!useCutoff)
        return;
271
272
273

    // Compute the neighbor list.

274
275
    findBlockBoundsKernel.setArg<mm_float4>(1, context.getPeriodicBoxSize());
    findBlockBoundsKernel.setArg<mm_float4>(2, context.getInvPeriodicBoxSize());
276
    context.executeKernel(findBlockBoundsKernel, context.getNumAtoms());
277
278
    findInteractingBlocksKernel.setArg<mm_float4>(1, context.getPeriodicBoxSize());
    findInteractingBlocksKernel.setArg<mm_float4>(2, context.getInvPeriodicBoxSize());
279
    context.executeKernel(findInteractingBlocksKernel, context.getNumAtoms());
280
281
    vector<cl_uint> count;
    interactionCount->download(count);
282
283
284
    if (context.getSIMDWidth() == 32) {
        findInteractionsWithinBlocksKernel.setArg<mm_float4>(1, context.getPeriodicBoxSize());
        findInteractionsWithinBlocksKernel.setArg<mm_float4>(2, context.getInvPeriodicBoxSize());
Peter Eastman's avatar
Peter Eastman committed
285
        context.executeKernel(findInteractionsWithinBlocksKernel, context.getNumAtoms());
286
    }
287
288
289
}

void OpenCLNonbondedUtilities::computeInteractions() {
290
    if (cutoff != -1.0) {
291
        if (useCutoff) {
292
293
            forceKernel.setArg<mm_float4>(10, context.getPeriodicBoxSize());
            forceKernel.setArg<mm_float4>(11, context.getInvPeriodicBoxSize());
294
        }
295
        context.executeKernel(forceKernel, (context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2)*OpenCLContext::TileSize);
296
    }
297
298
}

299
cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& source, const vector<ParameterInfo>& params, const vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric) const {
300
301
    map<string, string> replacements;
    replacements["COMPUTE_INTERACTION"] = source;
302
    int localDataSize = 7*sizeof(cl_float);
303
    const string suffixes[] = {"x", "y", "z", "w"};
304
305
    stringstream localData;
    for (int i = 0; i < (int) params.size(); i++) {
306
307
308
309
310
311
        if (params[i].getNumComponents() == 1)
            localData<<params[i].getType()<<" "<<params[i].getName()<<";\n";
        else {
            for (int j = 0; j < params[i].getNumComponents(); ++j)
                localData<<params[i].getComponentType()<<" "<<params[i].getName()<<"_"<<suffixes[j]<<";\n";
        }
312
313
314
315
316
317
318
        localDataSize += params[i].getSize();
    }
    if ((localDataSize/4)%2 == 0) {
        localData << "float padding;\n";
        localDataSize += 4;
    }
    replacements["ATOM_PARAMETER_DATA"] = localData.str();
319
    stringstream args;
320
    for (int i = 0; i < (int) params.size(); i++) {
321
322
323
324
325
        args << ", __global ";
        args << params[i].getType();
        args << "* global_";
        args << params[i].getName();
    }
326
    for (int i = 0; i < (int) arguments.size(); i++) {
327
328
329
330
331
332
333
334
335
336
337
338
339
        if (arguments[i].getMemory().getInfo<CL_MEM_TYPE>() == CL_MEM_OBJECT_IMAGE2D) {
            args << ", __read_only image2d_t ";
            args << arguments[i].getName();
        }
        else {
            if ((arguments[i].getMemory().getInfo<CL_MEM_FLAGS>() & CL_MEM_READ_ONLY) == 0)
                args << ", __global ";
            else
                args << ", __constant ";
            args << arguments[i].getType();
            args << "* ";
            args << arguments[i].getName();
        }
340
    }
341
342
    replacements["PARAMETER_ARGUMENTS"] = args.str();
    stringstream loadLocal1;
343
    for (int i = 0; i < (int) params.size(); i++) {
344
        if (params[i].getNumComponents() == 1) {
345
            loadLocal1<<"localData[get_local_id(0)]."<<params[i].getName()<<" = "<<params[i].getName()<<"1;\n";
346
347
348
        }
        else {
            for (int j = 0; j < params[i].getNumComponents(); ++j)
349
                loadLocal1<<"localData[get_local_id(0)]."<<params[i].getName()<<"_"<<suffixes[j]<<" = "<<params[i].getName()<<"1."<<suffixes[j]<<";\n";
350
        }
351
352
353
    }
    replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
    stringstream loadLocal2;
354
    for (int i = 0; i < (int) params.size(); i++) {
355
        if (params[i].getNumComponents() == 1) {
356
            loadLocal2<<"localData[get_local_id(0)]."<<params[i].getName()<<" = global_"<<params[i].getName()<<"[j];\n";
357
358
359
360
        }
        else {
            loadLocal2<<params[i].getType()<<" temp_"<<params[i].getName()<<" = global_"<<params[i].getName()<<"[j];\n";
            for (int j = 0; j < params[i].getNumComponents(); ++j)
361
                loadLocal2<<"localData[get_local_id(0)]."<<params[i].getName()<<"_"<<suffixes[j]<<" = temp_"<<params[i].getName()<<"."<<suffixes[j]<<";\n";
362
        }
363
364
365
    }
    replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
    stringstream load1;
366
    for (int i = 0; i < (int) params.size(); i++) {
367
368
369
370
371
        load1 << params[i].getType();
        load1 << " ";
        load1 << params[i].getName();
        load1 << "1 = global_";
        load1 << params[i].getName();
372
        load1 << "[atom1];\n";
373
374
375
    }
    replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
    stringstream load2j;
376
    for (int i = 0; i < (int) params.size(); i++) {
377
        if (params[i].getNumComponents() == 1) {
378
            load2j<<params[i].getType()<<" "<<params[i].getName()<<"2 = localData[atom2]."<<params[i].getName()<<";\n";
379
380
381
382
383
384
        }
        else {
            load2j<<params[i].getType()<<" "<<params[i].getName()<<"2 = ("<<params[i].getType()<<") (";
            for (int j = 0; j < params[i].getNumComponents(); ++j) {
                if (j > 0)
                    load2j<<", ";
385
                load2j<<"localData[atom2]."<<params[i].getName()<<"_"<<suffixes[j];
386
387
388
            }
            load2j<<");\n";
        }
389
    }
390
    replacements["LOAD_ATOM2_PARAMETERS"] = load2j.str();
391
392
393
394
395
396
397
398
399
    map<string, string> defines;
    if (forceBufferPerAtomBlock)
        defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
    if (useCutoff)
        defines["USE_CUTOFF"] = "1";
    if (usePeriodic)
        defines["USE_PERIODIC"] = "1";
    if (useExclusions)
        defines["USE_EXCLUSIONS"] = "1";
400
401
    if (isSymmetric)
        defines["USE_SYMMETRIC"] = "1";
402
    defines["CUTOFF_SQUARED"] = OpenCLExpressionUtilities::doubleToString(cutoff*cutoff);
403
404
    defines["NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getNumAtoms());
    defines["PADDED_NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getPaddedNumAtoms());
405
    defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(context.getNumAtomBlocks());
406
407
    if (useCutoff)
        defines["MAX_TILES"] = OpenCLExpressionUtilities::intToString(interactingTiles->getSize());
408
409
    string file = (context.getSIMDWidth() == 32 ? OpenCLKernelSources::nonbonded_nvidia : OpenCLKernelSources::nonbonded_default);
    cl::Program program = context.createProgram(context.replaceStrings(file, replacements), defines);
410
411
412
    cl::Kernel kernel(program, "computeNonbonded");

    // Set arguments to the Kernel.
413

414
415
416
417
418
    int index = 0;
    kernel.setArg<cl::Buffer>(index++, context.getForceBuffers().getDeviceBuffer());
    kernel.setArg<cl::Buffer>(index++, context.getEnergyBuffer().getDeviceBuffer());
    kernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer());
    kernel.setArg<cl::Buffer>(index++, exclusions->getDeviceBuffer());
419
420
    kernel.setArg<cl::Buffer>(index++, exclusionIndices->getDeviceBuffer());
    kernel.setArg<cl::Buffer>(index++, exclusionRowIndices->getDeviceBuffer());
421
    kernel.setArg(index++, OpenCLContext::ThreadBlockSize*localDataSize, NULL);
422
    kernel.setArg(index++, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
423
    if (useCutoff) {
424
425
        kernel.setArg<cl::Buffer>(index++, interactingTiles->getDeviceBuffer());
        kernel.setArg<cl::Buffer>(index++, interactionCount->getDeviceBuffer());
426
        index += 2; // The periodic box size arguments are set when the kernel is executed.
427
428
        if (context.getSIMDWidth() == 32)
            kernel.setArg<cl::Buffer>(index++, interactionFlags->getDeviceBuffer());
429
    }
430
    else {
431
        kernel.setArg<cl_uint>(index++, context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2);
432
    }
433
    for (int i = 0; i < (int) params.size(); i++) {
434
        kernel.setArg<cl::Memory>(index++, params[i].getMemory());
435
    }
436
    for (int i = 0; i < (int) arguments.size(); i++) {
437
        kernel.setArg<cl::Memory>(index++, arguments[i].getMemory());
438
    }
439
    return kernel;
440
}