nonbonded_cpu.cl 16.7 KB
Newer Older
1
typedef struct {
2
3
4
    real x, y, z;
    real q;
    real fx, fy, fz;
5
6
7
8
9
10
11
    ATOM_PARAMETER_DATA
} AtomData;

/**
 * Compute nonbonded interactions.
 */

12
13
__kernel void computeNonbonded(
        __global long* restrict forceBuffers,
14
        __global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
15
        __global const int2* restrict exclusionTiles, unsigned int startTileIndex, unsigned long numTileIndices
16
#ifdef USE_CUTOFF
17
        , __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
18
19
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
        __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
20
#endif
21
        PARAMETER_ARGUMENTS) {
22
    mixed energy = 0;
23
    INIT_DERIVATIVES
24
    __local AtomData localData[TILE_SIZE];
25

26
    // First loop: process tiles that contain exclusions.
27

28
29
30
    const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+get_group_id(0)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/get_num_groups(0);
    const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(get_group_id(0)+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/get_num_groups(0);
    for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
31
        const int2 tileIndices = exclusionTiles[pos];
32
33
        const unsigned int x = tileIndices.x;
        const unsigned int y = tileIndices.y;
34

35
        // Load the data for this tile.
36

37
38
39
40
41
42
43
44
        for (int localAtomIndex = 0; localAtomIndex < TILE_SIZE; localAtomIndex++) {
            unsigned int j = y*TILE_SIZE + localAtomIndex;
            real4 tempPosq = posq[j];
            localData[localAtomIndex].x = tempPosq.x;
            localData[localAtomIndex].y = tempPosq.y;
            localData[localAtomIndex].z = tempPosq.z;
            localData[localAtomIndex].q = tempPosq.w;
            LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
45
        }
46
        const bool hasExclusions = true;
47
48
49
50
51
        if (x == y) {
            // This tile is on the diagonal.

            for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef USE_EXCLUSIONS
52
                unsigned int excl = exclusions[pos*TILE_SIZE+tgx];
53
54
#endif
                unsigned int atom1 = x*TILE_SIZE+tgx;
55
56
                real4 force = 0;
                real4 posq1 = posq[atom1];
57
58
                LOAD_ATOM1_PARAMETERS
                for (unsigned int j = 0; j < TILE_SIZE; j++) {
59
60
                    real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
                    real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
61
#ifdef USE_PERIODIC
62
                    APPLY_PERIODIC_TO_DELTA(delta)
63
#endif
64
                    real r2 = dot(delta.xyz, delta.xyz);
65
#ifdef USE_CUTOFF
66
                    if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
67
#endif
68
                        real invR = RSQRT(r2);
peastman's avatar
peastman committed
69
                        real r = r2*invR;
70
71
72
                        unsigned int atom2 = j;
                        LOAD_ATOM2_PARAMETERS
                        atom2 = y*TILE_SIZE+j;
73
#ifdef USE_SYMMETRIC
74
                        real dEdR = 0;
75
#else
76
77
                        real4 dEdR1 = (real4) 0;
                        real4 dEdR2 = (real4) 0;
78
#endif
79
80
81
82
#ifdef USE_EXCLUSIONS
                        bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
#endif
                        real tempEnergy = 0;
83
                        const real interactionScale = 0.5f;
84
85
                        COMPUTE_INTERACTION
                        energy += 0.5f*tempEnergy;
86
#ifdef USE_SYMMETRIC
87
                        force.xyz -= delta.xyz*dEdR;
88
#else
89
                        force.xyz -= dEdR1.xyz;
90
91
92
93
#endif
#ifdef USE_CUTOFF
                    }
#endif
94
#ifdef USE_EXCLUSIONS
95
                    excl >>= 1;
96
#endif
97
98
99
100
                }

                // Write results.

101
102
103
                ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
                ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
                ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
104
105
106
107
108
109
            }
        }
        else {
            // This is an off-diagonal tile.

            for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
110
111
112
                localData[tgx].fx = 0;
                localData[tgx].fy = 0;
                localData[tgx].fz = 0;
113
            }
114
115
116
117
118
119
120
121
122
123
124
125
            for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
                unsigned int atom1 = x*TILE_SIZE+tgx;
                real4 force = 0;
                real4 posq1 = posq[atom1];
                LOAD_ATOM1_PARAMETERS
#ifdef USE_EXCLUSIONS
                unsigned int excl = exclusions[pos*TILE_SIZE+tgx];
#endif
                for (unsigned int j = 0; j < TILE_SIZE; j++) {
                    real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
                    real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
126
                    APPLY_PERIODIC_TO_DELTA(delta)
127
128
#endif
                    real r2 = dot(delta.xyz, delta.xyz);
129
#ifdef USE_CUTOFF
130
                    if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
131
132
#endif
                        real invR = RSQRT(r2);
peastman's avatar
peastman committed
133
                        real r = r2*invR;
134
135
136
137
138
139
140
141
142
143
144
145
146
                        unsigned int atom2 = j;
                        LOAD_ATOM2_PARAMETERS
                        atom2 = y*TILE_SIZE+j;
#ifdef USE_SYMMETRIC
                        real dEdR = 0;
#else
                        real4 dEdR1 = (real4) 0;
                        real4 dEdR2 = (real4) 0;
#endif
#ifdef USE_EXCLUSIONS
                        bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
#endif
                        real tempEnergy = 0;
147
                        const real interactionScale = 1.0f;
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
                        COMPUTE_INTERACTION
                        energy += tempEnergy;
#ifdef USE_SYMMETRIC
                        delta.xyz *= dEdR;
                        force.xyz -= delta.xyz;
                        localData[j].fx += delta.x;
                        localData[j].fy += delta.y;
                        localData[j].fz += delta.z;
#else
                        force.xyz -= dEdR1.xyz;
                        localData[j].fx += dEdR2.x;
                        localData[j].fy += dEdR2.y;
                        localData[j].fz += dEdR2.z;
#endif
#ifdef USE_CUTOFF
                    }
#endif
#ifdef USE_EXCLUSIONS
                    excl >>= 1;
#endif
                }
169

170
171
               // Write results for atom1.

172
173
174
                ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
                ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
                ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
175
176
177
178
179
180
            }

            // Write results.

            for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
                unsigned int offset = y*TILE_SIZE + tgx;
181
182
183
                ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[tgx].fx));
                ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[tgx].fy));
                ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[tgx].fz));
184
185
186
187
188
189
190
191
192
            }
        }
    }

    // Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
    // of them (no cutoff).

#ifdef USE_CUTOFF
    const unsigned int numTiles = interactionCount[0];
193
194
    if (numTiles > maxTiles)
        return; // There wasn't enough memory for the neighbor list.
195
196
    int pos = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+get_group_id(0)*(long)numTileIndices/get_num_groups(0)) : get_group_id(0)*(long)numTiles/get_num_groups(0));
    int end = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+(get_group_id(0)+1)*(long)numTileIndices/get_num_groups(0)) : (get_group_id(0)+1)*(long)numTiles/get_num_groups(0));
197
198
#else
    const unsigned int numTiles = numTileIndices;
199
200
    int pos = (int) (startTileIndex+get_group_id(0)*(long)numTiles/get_num_groups(0));
    int end = (int) (startTileIndex+(get_group_id(0)+1)*(long)numTiles/get_num_groups(0));
201
202
203
204
205
206
207
208
#endif
    int nextToSkip = -1;
    int currentSkipIndex = 0;
    __local int atomIndices[TILE_SIZE];

    while (pos < end) {
        const bool hasExclusions = false;
        bool includeTile = true;
209

210
        // Extract the coordinates of this tile.
211

212
        int x, y;
213
214
        bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
215
216
217
218
219
220
221
222
223
224
        x = tiles[pos];
        real4 blockSizeX = blockSize[x];
        singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
                              0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
                              0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
#else
        y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
        x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
        if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
            y += (x < y ? -1 : 1);
225
            x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
226
        }
227

228
        // Skip over tiles that have exclusions, since they were already processed.
229

230
231
        while (nextToSkip < pos) {
            if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
232
                int2 tile = exclusionTiles[currentSkipIndex++];
233
                nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
234
            }
235
236
            else
                nextToSkip = end;
237
        }
238
239
        includeTile = (nextToSkip != pos);
#endif
240
241
242
243
244
        if (includeTile) {
            // Load the data for this tile.

            for (int localAtomIndex = 0; localAtomIndex < TILE_SIZE; localAtomIndex++) {
#ifdef USE_CUTOFF
peastman's avatar
peastman committed
245
                unsigned int j = interactingAtoms[pos*TILE_SIZE+localAtomIndex];
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#else
                unsigned int j = y*TILE_SIZE+localAtomIndex;
#endif
                atomIndices[localAtomIndex] = j;
                if (j < PADDED_NUM_ATOMS) {
                    real4 tempPosq = posq[j];
                    localData[localAtomIndex].x = tempPosq.x;
                    localData[localAtomIndex].y = tempPosq.y;
                    localData[localAtomIndex].z = tempPosq.z;
                    localData[localAtomIndex].q = tempPosq.w;
                    LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
                    localData[localAtomIndex].fx = 0;
                    localData[localAtomIndex].fy = 0;
                    localData[localAtomIndex].fz = 0;
                }
            }
262
#ifdef USE_PERIODIC
263
264
265
266
267
268
            if (singlePeriodicCopy) {
                // The box is small enough that we can just translate all the atoms into a single periodic
                // box, then skip having to apply periodic boundary conditions later.

                real4 blockCenterX = blockCenter[x];
                for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
269
                    APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[tgx], blockCenterX)
270
271
272
273
274
                }
                for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
                    unsigned int atom1 = x*TILE_SIZE+tgx;
                    real4 force = 0;
                    real4 posq1 = posq[atom1];
275
                    APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
276
277
278
279
280
                    LOAD_ATOM1_PARAMETERS
                    for (unsigned int j = 0; j < TILE_SIZE; j++) {
                        real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
                        real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
                        real r2 = dot(delta.xyz, delta.xyz);
281
                        if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
282
                            real invR = RSQRT(r2);
peastman's avatar
peastman committed
283
                            real r = r2*invR;
284
285
286
                            unsigned int atom2 = j;
                            LOAD_ATOM2_PARAMETERS
                            atom2 = atomIndices[j];
287
#ifdef USE_SYMMETRIC
288
                            real dEdR = 0;
289
#else
290
291
292
293
294
                            real4 dEdR1 = (real4) 0;
                            real4 dEdR2 = (real4) 0;
#endif
#ifdef USE_EXCLUSIONS
                            bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
295
#endif
296
                            real tempEnergy = 0;
297
                            const real interactionScale = 1.0f;
298
299
                            COMPUTE_INTERACTION
                            energy += tempEnergy;
300
#ifdef USE_SYMMETRIC
301
302
303
304
305
                            delta.xyz *= dEdR;
                            force.xyz -= delta.xyz;
                            localData[j].fx += delta.x;
                            localData[j].fy += delta.y;
                            localData[j].fz += delta.z;
306
#else
307
308
309
310
                            force.xyz -= dEdR1.xyz;
                            localData[j].fx += dEdR2.x;
                            localData[j].fy += dEdR2.y;
                            localData[j].fz += dEdR2.z;
311
312
#endif
                        }
313
                    }
314

315
                   // Write results for atom1.
316

317
318
319
                    ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
                    ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
                    ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
320
321
322
323
324
                }
            }
            else
#endif
            {
325
                // We need to apply periodic boundary conditions separately for each interaction.
326
327
328

                for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
                    unsigned int atom1 = x*TILE_SIZE+tgx;
329
330
                    real4 force = 0;
                    real4 posq1 = posq[atom1];
331
332
                    LOAD_ATOM1_PARAMETERS
                    for (unsigned int j = 0; j < TILE_SIZE; j++) {
333
334
                        real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
                        real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
335
#ifdef USE_PERIODIC
336
                        APPLY_PERIODIC_TO_DELTA(delta)
337
#endif
338
                        real r2 = dot(delta.xyz, delta.xyz);
339
#ifdef USE_CUTOFF
340
                        if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
341
#endif
342
                            real invR = RSQRT(r2);
peastman's avatar
peastman committed
343
                            real r = r2*invR;
344
345
346
                            unsigned int atom2 = j;
                            LOAD_ATOM2_PARAMETERS
                            atom2 = atomIndices[j];
347
#ifdef USE_SYMMETRIC
348
                            real dEdR = 0;
349
#else
350
351
                            real4 dEdR1 = (real4) 0;
                            real4 dEdR2 = (real4) 0;
352
#endif
353
354
355
356
#ifdef USE_EXCLUSIONS
                            bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
#endif
                            real tempEnergy = 0;
357
                            const real interactionScale = 1.0f;
358
359
                            COMPUTE_INTERACTION
                            energy += tempEnergy;
360
#ifdef USE_SYMMETRIC
361
362
363
364
365
                            delta.xyz *= dEdR;
                            force.xyz -= delta.xyz;
                            localData[j].fx += delta.x;
                            localData[j].fy += delta.y;
                            localData[j].fz += delta.z;
366
#else
367
368
369
370
                            force.xyz -= dEdR1.xyz;
                            localData[j].fx += dEdR2.x;
                            localData[j].fy += dEdR2.y;
                            localData[j].fz += dEdR2.z;
371
372
373
374
375
376
#endif
#ifdef USE_CUTOFF
                        }
#endif
                    }

377
                    // Write results for atom1.
378

379
380
381
                    ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
                    ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
                    ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
382
383
384
385
386
387
                }
            }

            // Write results.

            for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
388
389
390
391
392
393
#ifdef USE_CUTOFF
                unsigned int atom2 = atomIndices[tgx];
#else
                unsigned int atom2 = y*TILE_SIZE + tgx;
#endif
                if (atom2 < PADDED_NUM_ATOMS) {
394
395
396
                    ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) realToFixedPoint(localData[tgx].fx));
                    ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[tgx].fy));
                    ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[tgx].fz));
397
                }
398
399
400
401
402
            }
        }
        pos++;
    }
    energyBuffer[get_global_id(0)] += energy;
403
    SAVE_DERIVATIVES
404
}