nonbonded_cpu.cl 18.3 KB
Newer Older
1
typedef struct {
2
3
4
    real x, y, z;
    real q;
    real fx, fy, fz;
5
6
7
8
9
10
11
    ATOM_PARAMETER_DATA
} AtomData;

/**
 * Compute nonbonded interactions.
 */

12
13
14
__kernel void computeNonbonded(
#ifdef SUPPORTS_64_BIT_ATOMICS
        __global long* restrict forceBuffers,
15
#else
16
        __global real4* restrict forceBuffers,
17
#endif
18
        __global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
19
        __global const int2* restrict exclusionTiles, unsigned int startTileIndex, unsigned long numTileIndices
20
#ifdef USE_CUTOFF
21
        , __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
22
23
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
        __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
24
#endif
25
        PARAMETER_ARGUMENTS) {
26
    mixed energy = 0;
27
    INIT_DERIVATIVES
28
    __local AtomData localData[TILE_SIZE];
29

30
    // First loop: process tiles that contain exclusions.
31

32
33
34
    const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+get_group_id(0)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/get_num_groups(0);
    const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(get_group_id(0)+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/get_num_groups(0);
    for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
35
        const int2 tileIndices = exclusionTiles[pos];
36
37
        const unsigned int x = tileIndices.x;
        const unsigned int y = tileIndices.y;
38

39
        // Load the data for this tile.
40

41
42
43
44
45
46
47
48
        for (int localAtomIndex = 0; localAtomIndex < TILE_SIZE; localAtomIndex++) {
            unsigned int j = y*TILE_SIZE + localAtomIndex;
            real4 tempPosq = posq[j];
            localData[localAtomIndex].x = tempPosq.x;
            localData[localAtomIndex].y = tempPosq.y;
            localData[localAtomIndex].z = tempPosq.z;
            localData[localAtomIndex].q = tempPosq.w;
            LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
49
        }
50
        const bool hasExclusions = true;
51
52
53
54
55
        if (x == y) {
            // This tile is on the diagonal.

            for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef USE_EXCLUSIONS
56
                unsigned int excl = exclusions[pos*TILE_SIZE+tgx];
57
58
#endif
                unsigned int atom1 = x*TILE_SIZE+tgx;
59
60
                real4 force = 0;
                real4 posq1 = posq[atom1];
61
62
                LOAD_ATOM1_PARAMETERS
                for (unsigned int j = 0; j < TILE_SIZE; j++) {
63
64
                    real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
                    real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
65
#ifdef USE_PERIODIC
66
                    APPLY_PERIODIC_TO_DELTA(delta)
67
#endif
68
                    real r2 = dot(delta.xyz, delta.xyz);
69
#ifdef USE_CUTOFF
70
                    if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
71
#endif
72
                        real invR = RSQRT(r2);
peastman's avatar
peastman committed
73
                        real r = r2*invR;
74
75
76
                        unsigned int atom2 = j;
                        LOAD_ATOM2_PARAMETERS
                        atom2 = y*TILE_SIZE+j;
77
#ifdef USE_SYMMETRIC
78
                        real dEdR = 0;
79
#else
80
81
                        real4 dEdR1 = (real4) 0;
                        real4 dEdR2 = (real4) 0;
82
#endif
83
84
85
86
#ifdef USE_EXCLUSIONS
                        bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
#endif
                        real tempEnergy = 0;
87
                        const real interactionScale = 0.5f;
88
89
                        COMPUTE_INTERACTION
                        energy += 0.5f*tempEnergy;
90
#ifdef USE_SYMMETRIC
91
                        force.xyz -= delta.xyz*dEdR;
92
#else
93
                        force.xyz -= dEdR1.xyz;
94
95
96
97
#endif
#ifdef USE_CUTOFF
                    }
#endif
98
#ifdef USE_EXCLUSIONS
99
                    excl >>= 1;
100
#endif
101
102
103
104
                }

                // Write results.

105
#ifdef SUPPORTS_64_BIT_ATOMICS
106
107
108
                ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
                ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
                ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
109
110
#else
                unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
111
                forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
112
#endif
113
114
115
116
117
118
            }
        }
        else {
            // This is an off-diagonal tile.

            for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
119
120
121
                localData[tgx].fx = 0;
                localData[tgx].fy = 0;
                localData[tgx].fz = 0;
122
            }
123
124
125
126
127
128
129
130
131
132
133
134
            for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
                unsigned int atom1 = x*TILE_SIZE+tgx;
                real4 force = 0;
                real4 posq1 = posq[atom1];
                LOAD_ATOM1_PARAMETERS
#ifdef USE_EXCLUSIONS
                unsigned int excl = exclusions[pos*TILE_SIZE+tgx];
#endif
                for (unsigned int j = 0; j < TILE_SIZE; j++) {
                    real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
                    real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
135
                    APPLY_PERIODIC_TO_DELTA(delta)
136
137
#endif
                    real r2 = dot(delta.xyz, delta.xyz);
138
#ifdef USE_CUTOFF
139
                    if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
140
141
#endif
                        real invR = RSQRT(r2);
peastman's avatar
peastman committed
142
                        real r = r2*invR;
143
144
145
146
147
148
149
150
151
152
153
154
155
                        unsigned int atom2 = j;
                        LOAD_ATOM2_PARAMETERS
                        atom2 = y*TILE_SIZE+j;
#ifdef USE_SYMMETRIC
                        real dEdR = 0;
#else
                        real4 dEdR1 = (real4) 0;
                        real4 dEdR2 = (real4) 0;
#endif
#ifdef USE_EXCLUSIONS
                        bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
#endif
                        real tempEnergy = 0;
156
                        const real interactionScale = 1.0f;
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
                        COMPUTE_INTERACTION
                        energy += tempEnergy;
#ifdef USE_SYMMETRIC
                        delta.xyz *= dEdR;
                        force.xyz -= delta.xyz;
                        localData[j].fx += delta.x;
                        localData[j].fy += delta.y;
                        localData[j].fz += delta.z;
#else
                        force.xyz -= dEdR1.xyz;
                        localData[j].fx += dEdR2.x;
                        localData[j].fy += dEdR2.y;
                        localData[j].fz += dEdR2.z;
#endif
#ifdef USE_CUTOFF
                    }
#endif
#ifdef USE_EXCLUSIONS
                    excl >>= 1;
#endif
                }
178

179
180
181
               // Write results for atom1.

#ifdef SUPPORTS_64_BIT_ATOMICS
182
183
184
                ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
                ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
                ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
185
186
187
188
189
190
191
192
193
194
195
#else
                unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
                forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
#endif
            }

            // Write results.

            for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef SUPPORTS_64_BIT_ATOMICS
                unsigned int offset = y*TILE_SIZE + tgx;
196
197
198
                ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[tgx].fx));
                ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[tgx].fy));
                ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[tgx].fz));
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#else
                unsigned int offset = y*TILE_SIZE+tgx + get_group_id(0)*PADDED_NUM_ATOMS;
                real4 f = forceBuffers[offset];
                f.x += localData[tgx].fx;
                f.y += localData[tgx].fy;
                f.z += localData[tgx].fz;
                forceBuffers[offset] = f;
#endif
            }
        }
    }

    // Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
    // of them (no cutoff).

#ifdef USE_CUTOFF
    const unsigned int numTiles = interactionCount[0];
216
217
    if (numTiles > maxTiles)
        return; // There wasn't enough memory for the neighbor list.
218
219
    int pos = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+get_group_id(0)*(long)numTileIndices/get_num_groups(0)) : get_group_id(0)*(long)numTiles/get_num_groups(0));
    int end = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+(get_group_id(0)+1)*(long)numTileIndices/get_num_groups(0)) : (get_group_id(0)+1)*(long)numTiles/get_num_groups(0));
220
221
#else
    const unsigned int numTiles = numTileIndices;
222
223
    int pos = (int) (startTileIndex+get_group_id(0)*(long)numTiles/get_num_groups(0));
    int end = (int) (startTileIndex+(get_group_id(0)+1)*(long)numTiles/get_num_groups(0));
224
225
226
227
228
229
230
231
#endif
    int nextToSkip = -1;
    int currentSkipIndex = 0;
    __local int atomIndices[TILE_SIZE];

    while (pos < end) {
        const bool hasExclusions = false;
        bool includeTile = true;
232

233
        // Extract the coordinates of this tile.
234

235
        int x, y;
236
237
        bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
238
239
240
241
242
243
244
245
246
247
        x = tiles[pos];
        real4 blockSizeX = blockSize[x];
        singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
                              0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
                              0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
#else
        y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
        x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
        if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
            y += (x < y ? -1 : 1);
248
            x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
249
        }
250

251
        // Skip over tiles that have exclusions, since they were already processed.
252

253
254
        while (nextToSkip < pos) {
            if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
255
                int2 tile = exclusionTiles[currentSkipIndex++];
256
                nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
257
            }
258
259
            else
                nextToSkip = end;
260
        }
261
262
        includeTile = (nextToSkip != pos);
#endif
263
264
265
266
267
        if (includeTile) {
            // Load the data for this tile.

            for (int localAtomIndex = 0; localAtomIndex < TILE_SIZE; localAtomIndex++) {
#ifdef USE_CUTOFF
peastman's avatar
peastman committed
268
                unsigned int j = interactingAtoms[pos*TILE_SIZE+localAtomIndex];
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
#else
                unsigned int j = y*TILE_SIZE+localAtomIndex;
#endif
                atomIndices[localAtomIndex] = j;
                if (j < PADDED_NUM_ATOMS) {
                    real4 tempPosq = posq[j];
                    localData[localAtomIndex].x = tempPosq.x;
                    localData[localAtomIndex].y = tempPosq.y;
                    localData[localAtomIndex].z = tempPosq.z;
                    localData[localAtomIndex].q = tempPosq.w;
                    LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
                    localData[localAtomIndex].fx = 0;
                    localData[localAtomIndex].fy = 0;
                    localData[localAtomIndex].fz = 0;
                }
            }
285
#ifdef USE_PERIODIC
286
287
288
289
290
291
            if (singlePeriodicCopy) {
                // The box is small enough that we can just translate all the atoms into a single periodic
                // box, then skip having to apply periodic boundary conditions later.

                real4 blockCenterX = blockCenter[x];
                for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
292
                    APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[tgx], blockCenterX)
293
294
295
296
297
                }
                for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
                    unsigned int atom1 = x*TILE_SIZE+tgx;
                    real4 force = 0;
                    real4 posq1 = posq[atom1];
298
                    APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
299
300
301
302
303
                    LOAD_ATOM1_PARAMETERS
                    for (unsigned int j = 0; j < TILE_SIZE; j++) {
                        real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
                        real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
                        real r2 = dot(delta.xyz, delta.xyz);
304
                        if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
305
                            real invR = RSQRT(r2);
peastman's avatar
peastman committed
306
                            real r = r2*invR;
307
308
309
                            unsigned int atom2 = j;
                            LOAD_ATOM2_PARAMETERS
                            atom2 = atomIndices[j];
310
#ifdef USE_SYMMETRIC
311
                            real dEdR = 0;
312
#else
313
314
315
316
317
                            real4 dEdR1 = (real4) 0;
                            real4 dEdR2 = (real4) 0;
#endif
#ifdef USE_EXCLUSIONS
                            bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
318
#endif
319
                            real tempEnergy = 0;
320
                            const real interactionScale = 1.0f;
321
322
                            COMPUTE_INTERACTION
                            energy += tempEnergy;
323
#ifdef USE_SYMMETRIC
324
325
326
327
328
                            delta.xyz *= dEdR;
                            force.xyz -= delta.xyz;
                            localData[j].fx += delta.x;
                            localData[j].fy += delta.y;
                            localData[j].fz += delta.z;
329
#else
330
331
332
333
                            force.xyz -= dEdR1.xyz;
                            localData[j].fx += dEdR2.x;
                            localData[j].fy += dEdR2.y;
                            localData[j].fz += dEdR2.z;
334
335
#endif
                        }
336
                    }
337

338
                   // Write results for atom1.
339

340
#ifdef SUPPORTS_64_BIT_ATOMICS
341
342
343
                    ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
                    ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
                    ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
344
345
346
347
#else
                    unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
                    forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
#endif
348
349
350
351
352
                }
            }
            else
#endif
            {
353
                // We need to apply periodic boundary conditions separately for each interaction.
354
355
356

                for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
                    unsigned int atom1 = x*TILE_SIZE+tgx;
357
358
                    real4 force = 0;
                    real4 posq1 = posq[atom1];
359
360
                    LOAD_ATOM1_PARAMETERS
                    for (unsigned int j = 0; j < TILE_SIZE; j++) {
361
362
                        real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
                        real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
363
#ifdef USE_PERIODIC
364
                        APPLY_PERIODIC_TO_DELTA(delta)
365
#endif
366
                        real r2 = dot(delta.xyz, delta.xyz);
367
#ifdef USE_CUTOFF
368
                        if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
369
#endif
370
                            real invR = RSQRT(r2);
peastman's avatar
peastman committed
371
                            real r = r2*invR;
372
373
374
                            unsigned int atom2 = j;
                            LOAD_ATOM2_PARAMETERS
                            atom2 = atomIndices[j];
375
#ifdef USE_SYMMETRIC
376
                            real dEdR = 0;
377
#else
378
379
                            real4 dEdR1 = (real4) 0;
                            real4 dEdR2 = (real4) 0;
380
#endif
381
382
383
384
#ifdef USE_EXCLUSIONS
                            bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
#endif
                            real tempEnergy = 0;
385
                            const real interactionScale = 1.0f;
386
387
                            COMPUTE_INTERACTION
                            energy += tempEnergy;
388
#ifdef USE_SYMMETRIC
389
390
391
392
393
                            delta.xyz *= dEdR;
                            force.xyz -= delta.xyz;
                            localData[j].fx += delta.x;
                            localData[j].fy += delta.y;
                            localData[j].fz += delta.z;
394
#else
395
396
397
398
                            force.xyz -= dEdR1.xyz;
                            localData[j].fx += dEdR2.x;
                            localData[j].fy += dEdR2.y;
                            localData[j].fz += dEdR2.z;
399
400
401
402
403
404
#endif
#ifdef USE_CUTOFF
                        }
#endif
                    }

405
                    // Write results for atom1.
406

407
#ifdef SUPPORTS_64_BIT_ATOMICS
408
409
410
                    ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
                    ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
                    ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
411
#else
412
413
                    unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
                    forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
414
#endif
415
416
417
418
419
420
                }
            }

            // Write results.

            for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
421
422
423
424
425
426
427
#ifdef USE_CUTOFF
                unsigned int atom2 = atomIndices[tgx];
#else
                unsigned int atom2 = y*TILE_SIZE + tgx;
#endif
                if (atom2 < PADDED_NUM_ATOMS) {
#ifdef SUPPORTS_64_BIT_ATOMICS
428
429
430
                    ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) realToFixedPoint(localData[tgx].fx));
                    ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[tgx].fy));
                    ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[tgx].fz));
431
432
433
434
435
436
437
438
439
#else
                    unsigned int offset = atom2 + get_group_id(0)*PADDED_NUM_ATOMS;
                    real4 f = forceBuffers[offset];
                    f.x += localData[tgx].fx;
                    f.y += localData[tgx].fy;
                    f.z += localData[tgx].fz;
                    forceBuffers[offset] = f;
#endif
                }
440
441
442
443
444
            }
        }
        pos++;
    }
    energyBuffer[get_global_id(0)] += energy;
445
    SAVE_DERIVATIVES
446
}