nonbonded.hip 20.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
/**
 * Compute nonbonded interactions. The kernel is separated into two parts,
 * tiles with exclusions and tiles without exclusions. It relies heavily on
 * implicit warp-level synchronization. A tile is defined by two atom blocks
 * each of warpsize. Each warp computes a range of tiles.
 *
 * Tiles with exclusions compute the entire set of interactions across
 * atom blocks, equal to warpsize*warpsize. In order to avoid access conflicts
 * the forces are computed and accumulated diagonally in the manner shown below
 * where, suppose
 *
 * [a-h] comprise atom block 1, [i-p] comprise atom block 2
 *
 * 1 denotes the first set of calculations within the warp
 * 2 denotes the second set of calculations within the warp
 * ... etc.
 *
 *        threads
 *     0 1 2 3 4 5 6 7
 *         atom1
 * L    a b c d e f g h
 * o  i 1 2 3 4 5 6 7 8
 * c  j 8 1 2 3 4 5 6 7
 * a  k 7 8 1 2 3 4 5 6
 * l  l 6 7 8 1 2 3 4 5
 * D  m 5 6 7 8 1 2 3 4
 * a  n 4 5 6 7 8 1 2 3
 * t  o 3 4 5 6 7 8 1 2
 * a  p 2 3 4 5 6 7 8 1
 *
 * Tiles without exclusions read off directly from the neighbourlist interactingAtoms
 * and follows the same force accumulation method. If more there are more interactingTiles
 * than the size of the neighbourlist initially allocated, the neighbourlist is rebuilt
 * and the full tileset is computed. This should happen on the first step, and very rarely
 * afterwards.
 *
 * On diagonal exclusion tiles use __shfl to broadcast. For all other types of tiles __shfl
 * is used to pass around the forces, positions, and parameters when computing the forces.
 *
 * [out]forceBuffers    - forces on each atom to eventually be accumulated
 * [out]energyBuffer    - energyBuffer to eventually be accumulated
 * [in]posq             - x,y,z,charge
 * [in]exclusions       - 1024-bit flags denoting atom-atom exclusions for each tile
 * [in]exclusionTiles   - x,y denotes the indices of tiles that have an exclusion
 * [in]startTileIndex   - index into first tile to be processed
 * [in]numTileIndices   - number of tiles this context is responsible for processing
 * [in]int tiles        - the atom block for each tile
 * [in]interactionCount - total number of tiles that have an interaction
 * [in]maxTiles         - stores the size of the neighbourlist in case it needs
 *                      - to be expanded
 * [in]periodicBoxSize  - size of the Periodic Box, last dimension (w) not used
 * [in]invPeriodicBox   - inverse of the periodicBoxSize, pre-computed for speed
 * [in]blockCenter      - the center of each block in euclidean coordinates
 * [in]blockSize        - size of the each block, radiating from the center
 *                      - x is half the distance of total length
 *                      - y is half the distance of total width
 *                      - z is half the distance of total height
 *                      - w is not used
 * [in]interactingAtoms - a list of interactions within a given tile
 *
 */
Anton Gorenko's avatar
Anton Gorenko committed
62
extern "C" __launch_bounds__(THREAD_BLOCK_SIZE) __global__ void computeNonbonded(
63
64
65
66
67
68
69
70
71
72
        unsigned long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions,
        const int2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned long long numTileIndices
#ifdef USE_CUTOFF
        , const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
        const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms, unsigned int maxSinglePairs,
        const int2* __restrict__ singlePairs
#endif
        PARAMETER_ARGUMENTS) {
    const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
Anton Gorenko's avatar
Anton Gorenko committed
73
    const unsigned int warp = blockIdx.x*(THREAD_BLOCK_SIZE/TILE_SIZE) + threadIdx.x/TILE_SIZE; // global warpIndex
74
75
76
77
78
79
80
    const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp
    const unsigned int tbx = threadIdx.x - tgx;           // block warpIndex
    mixed energy = 0;
    INIT_DERIVATIVES

    // First loop: process tiles that contain exclusions.

Anton Gorenko's avatar
Anton Gorenko committed
81
    for (int pos = FIRST_EXCLUSION_TILE+warp; pos < LAST_EXCLUSION_TILE; pos+=totalWarps) {
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
        const int2 tileIndices = exclusionTiles[pos];
        const unsigned int x = tileIndices.x;
        const unsigned int y = tileIndices.y;
        real3 force = make_real3(0);
        unsigned int atom1 = x*TILE_SIZE + tgx;
        real4 posq1 = posq[atom1];
        LOAD_ATOM1_PARAMETERS
#ifdef USE_EXCLUSIONS
        tileflags excl = exclusions[pos*TILE_SIZE+tgx];
#endif
        if (x == y) {
            // This tile is on the diagonal.
            real4 shflPosq = posq1;

            // we do not need to fetch parameters from global since this is a symmetric tile
            // instead we can broadcast the values using shuffle
Anton Gorenko's avatar
Anton Gorenko committed
98
            #pragma unroll 2
99
100
101
102
103
104
105
106
            for (unsigned int j = 0; j < TILE_SIZE; j++) {
                real4 posq2;
                BROADCAST_WARP_DATA
                real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
                APPLY_PERIODIC_TO_DELTA(delta)
#endif
                real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
Anton Gorenko's avatar
Anton Gorenko committed
107
108
109
110
111
112
113
#ifdef USE_CUTOFF
                if (r2 < MAX_CUTOFF_SQUARED) {
#endif
                    real invR = RSQRT(r2);
                    real r = r2*invR;
                    LOAD_ATOM2_PARAMETERS
                    int atom2 = y*TILE_SIZE+j;
114
#ifdef USE_SYMMETRIC
Anton Gorenko's avatar
Anton Gorenko committed
115
                    real dEdR = 0.0f;
116
#else
Anton Gorenko's avatar
Anton Gorenko committed
117
118
                    real3 dEdR1 = make_real3(0);
                    real3 dEdR2 = make_real3(0);
119
120
#endif
#ifdef USE_EXCLUSIONS
Anton Gorenko's avatar
Anton Gorenko committed
121
122
123
124
125
126
127
                    bool isExcluded = !(atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && (excl & 1));
#endif
                    real tempEnergy = 0.0f;
                    const real interactionScale = 0.5f;
                    COMPUTE_INTERACTION
#ifdef INCLUDE_ENERGY
                    energy += 0.5f*tempEnergy;
128
129
130
#endif
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
Anton Gorenko's avatar
Anton Gorenko committed
131
132
133
                    force.x -= delta.x*dEdR;
                    force.y -= delta.y*dEdR;
                    force.z -= delta.z*dEdR;
134
#else
Anton Gorenko's avatar
Anton Gorenko committed
135
136
137
                    force.x -= dEdR1.x;
                    force.y -= dEdR1.y;
                    force.z -= dEdR1.z;
138
139
#endif
#endif
Anton Gorenko's avatar
Anton Gorenko committed
140
141
142
#ifdef USE_CUTOFF
                }
#endif
143
144
145
146
147
148
149
150
#ifdef USE_EXCLUSIONS
                excl >>= 1;
#endif
            }
        }
        else {
            // This is an off-diagonal tile.
            unsigned int j = y*TILE_SIZE + tgx;
Anton Gorenko's avatar
Anton Gorenko committed
151
            int atomIndex = j;
152
153
154
155
156
157
158
159
160
161
            real4 shflPosq = posq[j];
            real3 shflForce;
            shflForce.x = 0.0f;
            shflForce.y = 0.0f;
            shflForce.z = 0.0f;
            DECLARE_LOCAL_PARAMETERS
            LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
#ifdef USE_EXCLUSIONS
            excl = (excl >> tgx) | (excl << (TILE_SIZE - tgx));
#endif
Anton Gorenko's avatar
Anton Gorenko committed
162
            #pragma unroll 2
163
164
165
166
167
168
169
            for (j = 0; j < TILE_SIZE; j++) {
                real4 posq2 = shflPosq;
                real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
                APPLY_PERIODIC_TO_DELTA(delta)
#endif
                real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
Anton Gorenko's avatar
Anton Gorenko committed
170
171
172
173
174
175
176
#ifdef USE_CUTOFF
                if (r2 < MAX_CUTOFF_SQUARED) {
#endif
                    real invR = RSQRT(r2);
                    real r = r2*invR;
                    LOAD_ATOM2_PARAMETERS
                    int atom2 = atomIndex;
177
#ifdef USE_SYMMETRIC
Anton Gorenko's avatar
Anton Gorenko committed
178
                    real dEdR = 0.0f;
179
#else
Anton Gorenko's avatar
Anton Gorenko committed
180
181
                    real3 dEdR1 = make_real3(0);
                    real3 dEdR2 = make_real3(0);
182
183
#endif
#ifdef USE_EXCLUSIONS
Anton Gorenko's avatar
Anton Gorenko committed
184
185
186
187
188
189
190
                    bool isExcluded = !(atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && (excl & 1));
#endif
                    real tempEnergy = 0.0f;
                    const real interactionScale = 1.0f;
                    COMPUTE_INTERACTION
#ifdef INCLUDE_ENERGY
                    energy += tempEnergy;
191
192
193
#endif
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
Anton Gorenko's avatar
Anton Gorenko committed
194
195
196
197
198
199
200
                    delta *= dEdR;
                    force.x -= delta.x;
                    force.y -= delta.y;
                    force.z -= delta.z;
                    shflForce.x += delta.x;
                    shflForce.y += delta.y;
                    shflForce.z += delta.z;
201
#else // !USE_SYMMETRIC
Anton Gorenko's avatar
Anton Gorenko committed
202
203
204
205
206
207
                    force.x -= dEdR1.x;
                    force.y -= dEdR1.y;
                    force.z -= dEdR1.z;
                    shflForce.x += dEdR2.x;
                    shflForce.y += dEdR2.y;
                    shflForce.z += dEdR2.z;
208
209
#endif // end USE_SYMMETRIC
#endif
Anton Gorenko's avatar
Anton Gorenko committed
210
211
#ifdef USE_CUTOFF
                }
212
213
214
215
#endif
#ifdef USE_EXCLUSIONS
                excl >>= 1;
#endif
Anton Gorenko's avatar
Anton Gorenko committed
216
217
                SHUFFLE_WARP_DATA
                atomIndex = warpRotateLeft<TILE_SIZE>(atomIndex);
218
            }
Anton Gorenko's avatar
Anton Gorenko committed
219
            const unsigned int offset = atomIndex;
220
221
            // write results for off diagonal tiles
#ifdef INCLUDE_FORCES
Anton Gorenko's avatar
Anton Gorenko committed
222
223
224
            atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>(realToFixedPoint(shflForce.x)));
            atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.y)));
            atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.z)));
225
226
227
228
229
#endif
        }
        // Write results for on and off diagonal tiles
#ifdef INCLUDE_FORCES
        const unsigned int offset = x*TILE_SIZE + tgx;
Anton Gorenko's avatar
Anton Gorenko committed
230
231
232
        atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>(realToFixedPoint(force.x)));
        atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.y)));
        atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.z)));
233
234
235
236
237
238
239
240
241
242
#endif
    }

    // Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
    // of them (no cutoff).

#ifdef USE_CUTOFF
    const unsigned int numTiles = interactionCount[0];
    if (numTiles > maxTiles)
        return; // There wasn't enough memory for the neighbor list.
Anton Gorenko's avatar
Anton Gorenko committed
243
244
245
246
247
248
    for (unsigned int pos0 = warp; pos0 < LAST_EXCLUSION_TILE+numTiles; pos0+=totalWarps) {
        // Skip warps that may be still busy in the first loop
        if (pos0 < LAST_EXCLUSION_TILE) {
            continue;
        }
        const unsigned int pos = pos0-LAST_EXCLUSION_TILE;
249
250
251
252
253
#else
    int pos = (int) (startTileIndex+warp*numTileIndices/totalWarps);
    int end = (int) (startTileIndex+(warp+1)*numTileIndices/totalWarps);
    int skipBase = 0;
    int currentSkipIndex = tbx;
Anton Gorenko's avatar
Anton Gorenko committed
254
255
256
257
    int skipTiles;
    skipTiles = -1;
    for (; pos < end; pos++) {
#endif
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
        real3 force = make_real3(0);
        bool includeTile = true;

        // Extract the coordinates of this tile.
        int x, y;
        bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
        x = tiles[pos];
        real4 blockSizeX = blockSize[x];
        singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
                              0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
                              0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
#else
        y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
        x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
        if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
            y += (x < y ? -1 : 1);
            x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
        }

        // Skip over tiles that have exclusions, since they were already processed.

Anton Gorenko's avatar
Anton Gorenko committed
280
        while (SHFL(skipTiles, TILE_SIZE-1) < pos) {
281
282
            if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
                int2 tile = exclusionTiles[skipBase+tgx];
Anton Gorenko's avatar
Anton Gorenko committed
283
                skipTiles = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
284
285
            }
            else
Anton Gorenko's avatar
Anton Gorenko committed
286
                skipTiles = end;
287
            skipBase += TILE_SIZE;
Anton Gorenko's avatar
Anton Gorenko committed
288
            currentSkipIndex = 0;
289
        }
Anton Gorenko's avatar
Anton Gorenko committed
290
        while (SHFL(skipTiles, currentSkipIndex) < pos)
291
            currentSkipIndex++;
Anton Gorenko's avatar
Anton Gorenko committed
292
        includeTile = (SHFL(skipTiles, currentSkipIndex) != pos);
293
294
295
296
297
298
299
300
301
302
303
#endif
        if (includeTile) {
            unsigned int atom1 = x*TILE_SIZE + tgx;
            // Load atom data for this tile.
            real4 posq1 = posq[atom1];
            LOAD_ATOM1_PARAMETERS
#ifdef USE_CUTOFF
            unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx];
#else
            unsigned int j = y*TILE_SIZE + tgx;
#endif
Anton Gorenko's avatar
Anton Gorenko committed
304
            int atomIndex = j;
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
            DECLARE_LOCAL_PARAMETERS
            real4 shflPosq;
            real3 shflForce;
            shflForce.x = 0.0f;
            shflForce.y = 0.0f;
            shflForce.z = 0.0f;
            if (j < PADDED_NUM_ATOMS) {
                // Load position of atom j from from global memory
                shflPosq = posq[j];
                LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
            }
            else {
                shflPosq = make_real4(0, 0, 0, 0);
                CLEAR_LOCAL_PARAMETERS
            }
#ifdef USE_PERIODIC
            if (singlePeriodicCopy) {
                // The box is small enough that we can just translate all the atoms into a single periodic
                // box, then skip having to apply periodic boundary conditions later.
                real4 blockCenterX = blockCenter[x];
                APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
                APPLY_PERIODIC_TO_POS_WITH_CENTER(shflPosq, blockCenterX)
Anton Gorenko's avatar
Anton Gorenko committed
327
                #pragma unroll 2
328
329
330
331
                for (j = 0; j < TILE_SIZE; j++) {
                    real4 posq2 = shflPosq;
                    real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
                    real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
Anton Gorenko's avatar
Anton Gorenko committed
332
333
334
335
336
337
338
#ifdef USE_CUTOFF
                    if (r2 < MAX_CUTOFF_SQUARED) {
#endif
                        real invR = RSQRT(r2);
                        real r = r2*invR;
                        LOAD_ATOM2_PARAMETERS
                        int atom2 = atomIndex;
339
#ifdef USE_SYMMETRIC
Anton Gorenko's avatar
Anton Gorenko committed
340
                        real dEdR = 0.0f;
341
#else
Anton Gorenko's avatar
Anton Gorenko committed
342
343
                        real3 dEdR1 = make_real3(0);
                        real3 dEdR2 = make_real3(0);
344
345
#endif
#ifdef USE_EXCLUSIONS
Anton Gorenko's avatar
Anton Gorenko committed
346
347
348
349
350
351
352
                        bool isExcluded = !(atom1 < NUM_ATOMS && atom2 < NUM_ATOMS);
#endif
                        real tempEnergy = 0.0f;
                        const real interactionScale = 1.0f;
                        COMPUTE_INTERACTION
#ifdef INCLUDE_ENERGY
                        energy += tempEnergy;
353
354
355
#endif
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
Anton Gorenko's avatar
Anton Gorenko committed
356
357
358
359
360
361
362
                        delta *= dEdR;
                        force.x -= delta.x;
                        force.y -= delta.y;
                        force.z -= delta.z;
                        shflForce.x += delta.x;
                        shflForce.y += delta.y;
                        shflForce.z += delta.z;
363
#else // !USE_SYMMETRIC
Anton Gorenko's avatar
Anton Gorenko committed
364
365
366
367
368
369
                        force.x -= dEdR1.x;
                        force.y -= dEdR1.y;
                        force.z -= dEdR1.z;
                        shflForce.x += dEdR2.x;
                        shflForce.y += dEdR2.y;
                        shflForce.z += dEdR2.z;
370
371
#endif // end USE_SYMMETRIC
#endif
Anton Gorenko's avatar
Anton Gorenko committed
372
373
#ifdef USE_CUTOFF
                    }
374
#endif
Anton Gorenko's avatar
Anton Gorenko committed
375
376
                    SHUFFLE_WARP_DATA
                    atomIndex = warpRotateLeft<TILE_SIZE>(atomIndex);
377
378
379
380
381
382
                }
            }
            else
#endif
            {
                // We need to apply periodic boundary conditions separately for each interaction.
Anton Gorenko's avatar
Anton Gorenko committed
383
                #pragma unroll 2
384
385
386
387
388
389
390
                for (j = 0; j < TILE_SIZE; j++) {
                    real4 posq2 = shflPosq;
                    real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
                    APPLY_PERIODIC_TO_DELTA(delta)
#endif
                    real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
Anton Gorenko's avatar
Anton Gorenko committed
391
392
393
394
395
396
397
#ifdef USE_CUTOFF
                    if (r2 < MAX_CUTOFF_SQUARED) {
#endif
                        real invR = RSQRT(r2);
                        real r = r2*invR;
                        LOAD_ATOM2_PARAMETERS
                        int atom2 = atomIndex;
398
#ifdef USE_SYMMETRIC
Anton Gorenko's avatar
Anton Gorenko committed
399
                        real dEdR = 0.0f;
400
#else
Anton Gorenko's avatar
Anton Gorenko committed
401
402
                        real3 dEdR1 = make_real3(0);
                        real3 dEdR2 = make_real3(0);
403
404
#endif
#ifdef USE_EXCLUSIONS
Anton Gorenko's avatar
Anton Gorenko committed
405
406
407
408
409
410
411
                        bool isExcluded = !(atom1 < NUM_ATOMS && atom2 < NUM_ATOMS);
#endif
                        real tempEnergy = 0.0f;
                        const real interactionScale = 1.0f;
                        COMPUTE_INTERACTION
#ifdef INCLUDE_ENERGY
                        energy += tempEnergy;
412
413
414
#endif
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
Anton Gorenko's avatar
Anton Gorenko committed
415
416
417
418
419
420
421
                        delta *= dEdR;
                        force.x -= delta.x;
                        force.y -= delta.y;
                        force.z -= delta.z;
                        shflForce.x += delta.x;
                        shflForce.y += delta.y;
                        shflForce.z += delta.z;
422
#else // !USE_SYMMETRIC
Anton Gorenko's avatar
Anton Gorenko committed
423
424
425
426
427
428
                        force.x -= dEdR1.x;
                        force.y -= dEdR1.y;
                        force.z -= dEdR1.z;
                        shflForce.x += dEdR2.x;
                        shflForce.y += dEdR2.y;
                        shflForce.z += dEdR2.z;
429
430
#endif // end USE_SYMMETRIC
#endif
Anton Gorenko's avatar
Anton Gorenko committed
431
432
#ifdef USE_CUTOFF
                    }
433
#endif
Anton Gorenko's avatar
Anton Gorenko committed
434
435
                    SHUFFLE_WARP_DATA
                    atomIndex = warpRotateLeft<TILE_SIZE>(atomIndex);
436
437
438
439
440
                }
            }

            // Write results.
#ifdef INCLUDE_FORCES
Anton Gorenko's avatar
Anton Gorenko committed
441
442
443
444
            atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>(realToFixedPoint(force.x)));
            atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.y)));
            atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.z)));
            unsigned int atom2 = atomIndex;
445
            if (atom2 < PADDED_NUM_ATOMS) {
Anton Gorenko's avatar
Anton Gorenko committed
446
447
448
                atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>(realToFixedPoint(shflForce.x)));
                atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.y)));
                atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.z)));
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
            }
#endif
        }
    }

    // Third loop: single pairs that aren't part of a tile.

#if USE_CUTOFF
    const unsigned int numPairs = interactionCount[1];
    if (numPairs > maxSinglePairs)
        return; // There wasn't enough memory for the neighbor list.
    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numPairs; i += blockDim.x*gridDim.x) {
        int2 pair = singlePairs[i];
        int atom1 = pair.x;
        int atom2 = pair.y;
        real4 posq1 = posq[atom1];
        real4 posq2 = posq[atom2];
        real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
        APPLY_PERIODIC_TO_DELTA(delta)
#endif
        real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
Anton Gorenko's avatar
Anton Gorenko committed
471
472
473
474
475
476
477
478
        if (r2 < MAX_CUTOFF_SQUARED) {
            LOAD_ATOM1_PARAMETERS
            int j = atom2;
            DECLARE_LOCAL_PARAMETERS
            LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
            LOAD_ATOM2_PARAMETERS
            real invR = RSQRT(r2);
            real r = r2*invR;
479
#ifdef USE_SYMMETRIC
Anton Gorenko's avatar
Anton Gorenko committed
480
            real dEdR = 0.0f;
481
#else
Anton Gorenko's avatar
Anton Gorenko committed
482
483
484
485
486
487
488
489
490
491
            real3 dEdR1 = make_real3(0);
            real3 dEdR2 = make_real3(0);
#endif
            bool isExcluded = false;
            real tempEnergy = 0.0f;
            const real interactionScale = 1.0f;
            COMPUTE_INTERACTION
#ifdef INCLUDE_ENERGY
            energy += tempEnergy;
#endif
492
493
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
Anton Gorenko's avatar
Anton Gorenko committed
494
495
            real3 dEdR1 = delta*dEdR;
            real3 dEdR2 = -dEdR1;
496
#endif
Anton Gorenko's avatar
Anton Gorenko committed
497
498
499
500
501
502
            atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>(realToFixedPoint(-dEdR1.x)));
            atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(-dEdR1.y)));
            atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(-dEdR1.z)));
            atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>(realToFixedPoint(-dEdR2.x)));
            atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(-dEdR2.y)));
            atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(-dEdR2.z)));
503
#endif
Anton Gorenko's avatar
Anton Gorenko committed
504
        }
505
506
507
508
509
510
511
    }
#endif
#ifdef INCLUDE_ENERGY
    energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
#endif
    SAVE_DERIVATIVES
}