nonbonded_cpu.cl 18.2 KB
Newer Older
1
2
3
#ifdef SUPPORTS_64_BIT_ATOMICS
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#endif
4
5

typedef struct {
6
7
8
    real x, y, z;
    real q;
    real fx, fy, fz;
9
10
11
12
13
14
15
    ATOM_PARAMETER_DATA
} AtomData;

/**
 * Compute nonbonded interactions.
 */

16
17
18
__kernel void computeNonbonded(
#ifdef SUPPORTS_64_BIT_ATOMICS
        __global long* restrict forceBuffers,
19
#else
20
        __global real4* restrict forceBuffers,
21
#endif
22
        __global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
23
        __global const int2* restrict exclusionTiles, unsigned int startTileIndex, unsigned long numTileIndices
24
#ifdef USE_CUTOFF
25
        , __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
26
27
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
        __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
28
#endif
29
        PARAMETER_ARGUMENTS) {
30
    mixed energy = 0;
31
    INIT_DERIVATIVES
32
    __local AtomData localData[TILE_SIZE];
33

34
    // First loop: process tiles that contain exclusions.
35

36
37
38
    const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+get_group_id(0)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/get_num_groups(0);
    const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(get_group_id(0)+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/get_num_groups(0);
    for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
39
        const int2 tileIndices = exclusionTiles[pos];
40
41
        const unsigned int x = tileIndices.x;
        const unsigned int y = tileIndices.y;
42

43
        // Load the data for this tile.
44

45
46
47
48
49
50
51
52
        for (int localAtomIndex = 0; localAtomIndex < TILE_SIZE; localAtomIndex++) {
            unsigned int j = y*TILE_SIZE + localAtomIndex;
            real4 tempPosq = posq[j];
            localData[localAtomIndex].x = tempPosq.x;
            localData[localAtomIndex].y = tempPosq.y;
            localData[localAtomIndex].z = tempPosq.z;
            localData[localAtomIndex].q = tempPosq.w;
            LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
53
        }
54
        const bool hasExclusions = true;
55
56
57
58
59
        if (x == y) {
            // This tile is on the diagonal.

            for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef USE_EXCLUSIONS
60
                unsigned int excl = exclusions[pos*TILE_SIZE+tgx];
61
62
#endif
                unsigned int atom1 = x*TILE_SIZE+tgx;
63
64
                real4 force = 0;
                real4 posq1 = posq[atom1];
65
66
                LOAD_ATOM1_PARAMETERS
                for (unsigned int j = 0; j < TILE_SIZE; j++) {
67
68
                    real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
                    real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
69
#ifdef USE_PERIODIC
70
                    APPLY_PERIODIC_TO_DELTA(delta)
71
#endif
72
                    real r2 = dot(delta.xyz, delta.xyz);
73
#ifdef USE_CUTOFF
74
                    if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
75
#endif
76
                        real invR = RSQRT(r2);
peastman's avatar
peastman committed
77
                        real r = r2*invR;
78
79
80
                        unsigned int atom2 = j;
                        LOAD_ATOM2_PARAMETERS
                        atom2 = y*TILE_SIZE+j;
81
#ifdef USE_SYMMETRIC
82
                        real dEdR = 0;
83
#else
84
85
                        real4 dEdR1 = (real4) 0;
                        real4 dEdR2 = (real4) 0;
86
#endif
87
88
89
90
#ifdef USE_EXCLUSIONS
                        bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
#endif
                        real tempEnergy = 0;
91
                        const real interactionScale = 0.5f;
92
93
                        COMPUTE_INTERACTION
                        energy += 0.5f*tempEnergy;
94
#ifdef USE_SYMMETRIC
95
                        force.xyz -= delta.xyz*dEdR;
96
#else
97
                        force.xyz -= dEdR1.xyz;
98
99
100
101
#endif
#ifdef USE_CUTOFF
                    }
#endif
102
#ifdef USE_EXCLUSIONS
103
                    excl >>= 1;
104
#endif
105
106
107
108
                }

                // Write results.

109
#ifdef SUPPORTS_64_BIT_ATOMICS
110
111
112
                atom_add(&forceBuffers[atom1], realToFixedPoint(force.x));
                atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
                atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
113
114
#else
                unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
115
                forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
116
#endif
117
118
119
120
121
122
            }
        }
        else {
            // This is an off-diagonal tile.

            for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
123
124
125
                localData[tgx].fx = 0;
                localData[tgx].fy = 0;
                localData[tgx].fz = 0;
126
            }
127
128
129
130
131
132
133
134
135
136
137
138
            for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
                unsigned int atom1 = x*TILE_SIZE+tgx;
                real4 force = 0;
                real4 posq1 = posq[atom1];
                LOAD_ATOM1_PARAMETERS
#ifdef USE_EXCLUSIONS
                unsigned int excl = exclusions[pos*TILE_SIZE+tgx];
#endif
                for (unsigned int j = 0; j < TILE_SIZE; j++) {
                    real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
                    real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
139
                    APPLY_PERIODIC_TO_DELTA(delta)
140
141
#endif
                    real r2 = dot(delta.xyz, delta.xyz);
142
#ifdef USE_CUTOFF
143
                    if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
144
145
#endif
                        real invR = RSQRT(r2);
peastman's avatar
peastman committed
146
                        real r = r2*invR;
147
148
149
150
151
152
153
154
155
156
157
158
159
                        unsigned int atom2 = j;
                        LOAD_ATOM2_PARAMETERS
                        atom2 = y*TILE_SIZE+j;
#ifdef USE_SYMMETRIC
                        real dEdR = 0;
#else
                        real4 dEdR1 = (real4) 0;
                        real4 dEdR2 = (real4) 0;
#endif
#ifdef USE_EXCLUSIONS
                        bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
#endif
                        real tempEnergy = 0;
160
                        const real interactionScale = 1.0f;
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
                        COMPUTE_INTERACTION
                        energy += tempEnergy;
#ifdef USE_SYMMETRIC
                        delta.xyz *= dEdR;
                        force.xyz -= delta.xyz;
                        localData[j].fx += delta.x;
                        localData[j].fy += delta.y;
                        localData[j].fz += delta.z;
#else
                        force.xyz -= dEdR1.xyz;
                        localData[j].fx += dEdR2.x;
                        localData[j].fy += dEdR2.y;
                        localData[j].fz += dEdR2.z;
#endif
#ifdef USE_CUTOFF
                    }
#endif
#ifdef USE_EXCLUSIONS
                    excl >>= 1;
#endif
                }
182

183
184
185
               // Write results for atom1.

#ifdef SUPPORTS_64_BIT_ATOMICS
186
187
188
                atom_add(&forceBuffers[atom1], realToFixedPoint(force.x));
                atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
                atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
189
190
191
192
193
194
195
196
197
198
199
#else
                unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
                forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
#endif
            }

            // Write results.

            for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef SUPPORTS_64_BIT_ATOMICS
                unsigned int offset = y*TILE_SIZE + tgx;
200
201
202
                atom_add(&forceBuffers[offset], realToFixedPoint(localData[tgx].fx));
                atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fy));
                atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fz));
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#else
                unsigned int offset = y*TILE_SIZE+tgx + get_group_id(0)*PADDED_NUM_ATOMS;
                real4 f = forceBuffers[offset];
                f.x += localData[tgx].fx;
                f.y += localData[tgx].fy;
                f.z += localData[tgx].fz;
                forceBuffers[offset] = f;
#endif
            }
        }
    }

    // Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
    // of them (no cutoff).

#ifdef USE_CUTOFF
    const unsigned int numTiles = interactionCount[0];
220
221
    if (numTiles > maxTiles)
        return; // There wasn't enough memory for the neighbor list.
222
223
    int pos = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+get_group_id(0)*(long)numTileIndices/get_num_groups(0)) : get_group_id(0)*(long)numTiles/get_num_groups(0));
    int end = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+(get_group_id(0)+1)*(long)numTileIndices/get_num_groups(0)) : (get_group_id(0)+1)*(long)numTiles/get_num_groups(0));
224
225
#else
    const unsigned int numTiles = numTileIndices;
226
227
    int pos = (int) (startTileIndex+get_group_id(0)*(long)numTiles/get_num_groups(0));
    int end = (int) (startTileIndex+(get_group_id(0)+1)*(long)numTiles/get_num_groups(0));
228
229
230
231
232
233
234
235
#endif
    int nextToSkip = -1;
    int currentSkipIndex = 0;
    __local int atomIndices[TILE_SIZE];

    while (pos < end) {
        const bool hasExclusions = false;
        bool includeTile = true;
236

237
        // Extract the coordinates of this tile.
238

239
        int x, y;
240
241
        bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
242
243
244
245
246
247
248
249
250
251
        x = tiles[pos];
        real4 blockSizeX = blockSize[x];
        singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
                              0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
                              0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
#else
        y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
        x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
        if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
            y += (x < y ? -1 : 1);
252
            x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
253
        }
254

255
        // Skip over tiles that have exclusions, since they were already processed.
256

257
258
        while (nextToSkip < pos) {
            if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
259
                int2 tile = exclusionTiles[currentSkipIndex++];
260
                nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
261
            }
262
263
            else
                nextToSkip = end;
264
        }
265
266
        includeTile = (nextToSkip != pos);
#endif
267
268
269
270
271
        if (includeTile) {
            // Load the data for this tile.

            for (int localAtomIndex = 0; localAtomIndex < TILE_SIZE; localAtomIndex++) {
#ifdef USE_CUTOFF
peastman's avatar
peastman committed
272
                unsigned int j = interactingAtoms[pos*TILE_SIZE+localAtomIndex];
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
#else
                unsigned int j = y*TILE_SIZE+localAtomIndex;
#endif
                atomIndices[localAtomIndex] = j;
                if (j < PADDED_NUM_ATOMS) {
                    real4 tempPosq = posq[j];
                    localData[localAtomIndex].x = tempPosq.x;
                    localData[localAtomIndex].y = tempPosq.y;
                    localData[localAtomIndex].z = tempPosq.z;
                    localData[localAtomIndex].q = tempPosq.w;
                    LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
                    localData[localAtomIndex].fx = 0;
                    localData[localAtomIndex].fy = 0;
                    localData[localAtomIndex].fz = 0;
                }
            }
289
#ifdef USE_PERIODIC
290
291
292
293
294
295
            if (singlePeriodicCopy) {
                // The box is small enough that we can just translate all the atoms into a single periodic
                // box, then skip having to apply periodic boundary conditions later.

                real4 blockCenterX = blockCenter[x];
                for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
296
                    APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[tgx], blockCenterX)
297
298
299
300
301
                }
                for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
                    unsigned int atom1 = x*TILE_SIZE+tgx;
                    real4 force = 0;
                    real4 posq1 = posq[atom1];
302
                    APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
303
304
305
306
307
                    LOAD_ATOM1_PARAMETERS
                    for (unsigned int j = 0; j < TILE_SIZE; j++) {
                        real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
                        real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
                        real r2 = dot(delta.xyz, delta.xyz);
308
                        if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
309
                            real invR = RSQRT(r2);
peastman's avatar
peastman committed
310
                            real r = r2*invR;
311
312
313
                            unsigned int atom2 = j;
                            LOAD_ATOM2_PARAMETERS
                            atom2 = atomIndices[j];
314
#ifdef USE_SYMMETRIC
315
                            real dEdR = 0;
316
#else
317
318
319
320
321
                            real4 dEdR1 = (real4) 0;
                            real4 dEdR2 = (real4) 0;
#endif
#ifdef USE_EXCLUSIONS
                            bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
322
#endif
323
                            real tempEnergy = 0;
324
                            const real interactionScale = 1.0f;
325
326
                            COMPUTE_INTERACTION
                            energy += tempEnergy;
327
#ifdef USE_SYMMETRIC
328
329
330
331
332
                            delta.xyz *= dEdR;
                            force.xyz -= delta.xyz;
                            localData[j].fx += delta.x;
                            localData[j].fy += delta.y;
                            localData[j].fz += delta.z;
333
#else
334
335
336
337
                            force.xyz -= dEdR1.xyz;
                            localData[j].fx += dEdR2.x;
                            localData[j].fy += dEdR2.y;
                            localData[j].fz += dEdR2.z;
338
339
#endif
                        }
340
                    }
341

342
                   // Write results for atom1.
343

344
#ifdef SUPPORTS_64_BIT_ATOMICS
345
346
347
                    atom_add(&forceBuffers[atom1], realToFixedPoint(force.x));
                    atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
                    atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
348
349
350
351
#else
                    unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
                    forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
#endif
352
353
354
355
356
                }
            }
            else
#endif
            {
357
                // We need to apply periodic boundary conditions separately for each interaction.
358
359
360

                for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
                    unsigned int atom1 = x*TILE_SIZE+tgx;
361
362
                    real4 force = 0;
                    real4 posq1 = posq[atom1];
363
364
                    LOAD_ATOM1_PARAMETERS
                    for (unsigned int j = 0; j < TILE_SIZE; j++) {
365
366
                        real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
                        real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
367
#ifdef USE_PERIODIC
368
                        APPLY_PERIODIC_TO_DELTA(delta)
369
#endif
370
                        real r2 = dot(delta.xyz, delta.xyz);
371
#ifdef USE_CUTOFF
372
                        if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
373
#endif
374
                            real invR = RSQRT(r2);
peastman's avatar
peastman committed
375
                            real r = r2*invR;
376
377
378
                            unsigned int atom2 = j;
                            LOAD_ATOM2_PARAMETERS
                            atom2 = atomIndices[j];
379
#ifdef USE_SYMMETRIC
380
                            real dEdR = 0;
381
#else
382
383
                            real4 dEdR1 = (real4) 0;
                            real4 dEdR2 = (real4) 0;
384
#endif
385
386
387
388
#ifdef USE_EXCLUSIONS
                            bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
#endif
                            real tempEnergy = 0;
389
                            const real interactionScale = 1.0f;
390
391
                            COMPUTE_INTERACTION
                            energy += tempEnergy;
392
#ifdef USE_SYMMETRIC
393
394
395
396
397
                            delta.xyz *= dEdR;
                            force.xyz -= delta.xyz;
                            localData[j].fx += delta.x;
                            localData[j].fy += delta.y;
                            localData[j].fz += delta.z;
398
#else
399
400
401
402
                            force.xyz -= dEdR1.xyz;
                            localData[j].fx += dEdR2.x;
                            localData[j].fy += dEdR2.y;
                            localData[j].fz += dEdR2.z;
403
404
405
406
407
408
#endif
#ifdef USE_CUTOFF
                        }
#endif
                    }

409
                    // Write results for atom1.
410

411
#ifdef SUPPORTS_64_BIT_ATOMICS
412
413
414
                    atom_add(&forceBuffers[atom1], realToFixedPoint(force.x));
                    atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
                    atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
415
#else
416
417
                    unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
                    forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
418
#endif
419
420
421
422
423
424
                }
            }

            // Write results.

            for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
425
426
427
428
429
430
431
#ifdef USE_CUTOFF
                unsigned int atom2 = atomIndices[tgx];
#else
                unsigned int atom2 = y*TILE_SIZE + tgx;
#endif
                if (atom2 < PADDED_NUM_ATOMS) {
#ifdef SUPPORTS_64_BIT_ATOMICS
432
433
434
                    atom_add(&forceBuffers[atom2], realToFixedPoint(localData[tgx].fx));
                    atom_add(&forceBuffers[atom2+PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fy));
                    atom_add(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fz));
435
436
437
438
439
440
441
442
443
#else
                    unsigned int offset = atom2 + get_group_id(0)*PADDED_NUM_ATOMS;
                    real4 f = forceBuffers[offset];
                    f.x += localData[tgx].fx;
                    f.y += localData[tgx].fy;
                    f.z += localData[tgx].fz;
                    forceBuffers[offset] = f;
#endif
                }
444
445
446
447
448
            }
        }
        pos++;
    }
    energyBuffer[get_global_id(0)] += energy;
449
    SAVE_DERIVATIVES
450
}