hippoComputeField.cc 17.1 KB
Newer Older
1
DEVICE void computeDirectFieldDampingFactors(real alpha, real r, real* fdamp3, real* fdamp5, real* fdamp7) {
peastman's avatar
peastman committed
2
3
4
5
6
7
    real ar = alpha*r;
    real ar2 = ar*ar;
    real ar3 = ar2*ar;
    real ar4 = ar2*ar2;
    real expAR = EXP(-ar);
    real one = 1;
8
9
10
    *fdamp3 = 1 - (1 + ar + ar2*(one/2))*expAR;
    *fdamp5 = 1 - (1 + ar + ar2*(one/2) + ar3*(one/6))*expAR;
    *fdamp7 = 1 - (1 + ar + ar2*(one/2) + ar3*(one/6) + ar4*(one/30))*expAR;
peastman's avatar
peastman committed
11
12
}

13
DEVICE void computeMutualFieldDampingFactors(real alphaI, real alphaJ, real r, real* fdamp3, real* fdamp5) {
peastman's avatar
peastman committed
14
15
16
17
18
19
20
21
22
    real arI = alphaI*r;
    real arI2 = arI*arI;
    real arI3 = arI2*arI;
    real expARI = EXP(-arI);
    real one = 1;
    real seven = 7;
    if (alphaI == alphaJ) {
        real arI4 = arI3*arI;
        real arI5 = arI4*arI;
23
24
        *fdamp3 = 1 - (1 + arI + arI2*(one/2) + arI3*(seven/48) + arI4*(one/48))*expARI;
        *fdamp5 = 1 - (1 + arI + arI2*(one/2) + arI3*(one/6) + arI4*(one/24) + arI5*(one/144))*expARI;
peastman's avatar
peastman committed
25
26
27
28
29
30
31
32
33
34
    }
    else {
        real arJ = alphaJ*r;
        real arJ2 = arJ*arJ;
        real arJ3 = arJ2*arJ;
        real expARJ = EXP(-arJ);
        real aI2 = alphaI*alphaI;
        real aJ2 = alphaJ*alphaJ;
        real A = aJ2/(aJ2-aI2);
        real B = aI2/(aI2-aJ2);
Peter Eastman's avatar
Peter Eastman committed
35
36
        real A2expARI = A*A*expARI;
        real B2expARJ = B*B*expARJ;
37
38
39
40
41
42
43
44
        *fdamp3 = 1 - (1 + arI + arI2*(one/2))*A2expARI -
                      (1 + arJ + arJ2*(one/2))*B2expARJ -
                      (1 + arI)*2*B*A2expARI -
                      (1 + arJ)*2*A*B2expARJ;
        *fdamp5 = 1 - (1 + arI + arI2*(one/2) + arI3*(one/6))*A2expARI -
                      (1 + arJ + arJ2*(one/2) + arJ3*(one/6))*B2expARJ -
                      (1 + arI + arI2*(one/3))*2*B*A2expARI -
                      (1 + arJ + arJ2*(one/3))*2*A*B2expARJ;
peastman's avatar
peastman committed
45
46
47
48
    }
}

typedef struct {
49
50
    real x, y, z;
    real fx, fy, fz;
peastman's avatar
peastman committed
51
52
53
54
55
56
    ATOM_PARAMETER_DATA
} AtomData;

/**
 * Compute the electrostatic field.
 */
57
58
KERNEL void computeField(GLOBAL const real4* RESTRICT posq, GLOBAL const unsigned int* RESTRICT exclusions,
        GLOBAL const int2* RESTRICT exclusionTiles, GLOBAL mm_ulong* RESTRICT fieldBuffers,
peastman's avatar
peastman committed
59
#ifdef USE_CUTOFF
60
61
62
        GLOBAL const int* RESTRICT tiles, GLOBAL const unsigned int* RESTRICT interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, GLOBAL const real4* RESTRICT blockCenter,
        GLOBAL const real4* RESTRICT blockSize, GLOBAL const unsigned int* RESTRICT interactingAtoms
peastman's avatar
peastman committed
63
64
65
66
#else
        unsigned int numTiles
#endif
        PARAMETER_ARGUMENTS) {
67
68
69
70
71
    const unsigned int totalWarps = (GLOBAL_SIZE)/TILE_SIZE;
    const unsigned int warp = (GLOBAL_ID)/TILE_SIZE;
    const unsigned int tgx = LOCAL_ID & (TILE_SIZE-1);
    const unsigned int tbx = LOCAL_ID - tgx;
    LOCAL AtomData localData[THREAD_BLOCK_SIZE];
peastman's avatar
peastman committed
72
73
74
75
76
77

    // First loop: process tiles that contain exclusions.
    
    const unsigned int firstExclusionTile = warp*NUM_TILES_WITH_EXCLUSIONS/totalWarps;
    const unsigned int lastExclusionTile = (warp+1)*NUM_TILES_WITH_EXCLUSIONS/totalWarps;
    for (int tile = firstExclusionTile; tile < lastExclusionTile; tile++) {
78
        const int2 tileIndices = exclusionTiles[tile];
peastman's avatar
peastman committed
79
80
81
82
83
84
85
86
87
88
        const unsigned int x = tileIndices.x;
        const unsigned int y = tileIndices.y;
        real3 field = make_real3(0);
        unsigned int atom1 = x*TILE_SIZE + tgx;
        real4 pos1 = posq[atom1];
        LOAD_ATOM1_PARAMETERS
        unsigned int excl = exclusions[tile*TILE_SIZE+tgx];
        if (x == y) {
            // This tile is on the diagonal.

89
90
91
92
            const unsigned int localAtomIndex = LOCAL_ID;
            localData[LOCAL_ID].x = pos1.x;
            localData[LOCAL_ID].y = pos1.y;
            localData[LOCAL_ID].z = pos1.z;
peastman's avatar
peastman committed
93
            LOAD_LOCAL_PARAMETERS_FROM_1
94
            SYNC_WARPS;
peastman's avatar
peastman committed
95
96
            for (unsigned int j = 0; j < TILE_SIZE; j++) {
                int atom2 = tbx+j;
97
                real3 pos2 = make_real3(localData[atom2].x, localData[atom2].y, localData[atom2].z);
peastman's avatar
peastman committed
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
                real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC
                APPLY_PERIODIC_TO_DELTA(delta)
#endif
                real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
                if (r2 < CUTOFF_SQUARED) {
#endif
                    real invR = RSQRT(r2);
                    real r = r2*invR;
                    LOAD_ATOM2_PARAMETERS
                    atom2 = y*TILE_SIZE+j;
                    real3 tempField1 = make_real3(0);
                    real3 tempField2 = make_real3(0);
                    bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
                    if (!isExcluded && atom1 != atom2) {
                        COMPUTE_FIELD
                    }
                    field += tempField1;
#ifdef USE_CUTOFF
                }
#endif
                excl >>= 1;
121
                SYNC_WARPS;
peastman's avatar
peastman committed
122
123
124
125
126
            }
        }
        else {
            // This is an off-diagonal tile.

127
            const unsigned int localAtomIndex = LOCAL_ID;
peastman's avatar
peastman committed
128
129
            unsigned int j = y*TILE_SIZE + tgx;
            real4 tempPosq = posq[j];
130
131
132
            localData[LOCAL_ID].x = tempPosq.x;
            localData[LOCAL_ID].y = tempPosq.y;
            localData[LOCAL_ID].z = tempPosq.z;
peastman's avatar
peastman committed
133
            LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
134
135
136
137
            localData[localAtomIndex].fx = 0;
            localData[localAtomIndex].fy = 0;
            localData[localAtomIndex].fz = 0;
            SYNC_WARPS;
peastman's avatar
peastman committed
138
139
140
141
            excl = (excl >> tgx) | (excl << (TILE_SIZE - tgx));
            unsigned int tj = tgx;
            for (j = 0; j < TILE_SIZE; j++) {
                int atom2 = tbx+tj;
142
                real3 pos2 = make_real3(localData[atom2].x, localData[atom2].y, localData[atom2].z);
peastman's avatar
peastman committed
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
                real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC
                APPLY_PERIODIC_TO_DELTA(delta)
#endif
                real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
                if (r2 < CUTOFF_SQUARED) {
#endif
                    real invR = RSQRT(r2);
                    real r = r2*invR;
                    LOAD_ATOM2_PARAMETERS
                    atom2 = y*TILE_SIZE+tj;
                    real3 tempField1 = make_real3(0);
                    real3 tempField2 = make_real3(0);
                    bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
                    if (!isExcluded) {
                        COMPUTE_FIELD
                    }
                    field += tempField1;
162
163
164
                    localData[tbx+tj].fx += tempField2.x;
                    localData[tbx+tj].fy += tempField2.y;
                    localData[tbx+tj].fz += tempField2.z;
peastman's avatar
peastman committed
165
166
167
168
169
#ifdef USE_CUTOFF
                }
#endif
                excl >>= 1;
                tj = (tj + 1) & (TILE_SIZE - 1);
170
                SYNC_WARPS;
peastman's avatar
peastman committed
171
172
173
174
175
176
            }
        }

        // Write results.

        unsigned int offset1 = x*TILE_SIZE + tgx;
177
178
179
        ATOMIC_ADD(&fieldBuffers[offset1], (mm_ulong) ((mm_long) (field.x*0x100000000)));
        ATOMIC_ADD(&fieldBuffers[offset1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (field.y*0x100000000)));
        ATOMIC_ADD(&fieldBuffers[offset1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (field.z*0x100000000)));
peastman's avatar
peastman committed
180
181
        if (x != y) {
            unsigned int offset2 = y*TILE_SIZE + tgx;
182
183
184
            ATOMIC_ADD(&fieldBuffers[offset2], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fx*0x100000000)));
            ATOMIC_ADD(&fieldBuffers[offset2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fy*0x100000000)));
            ATOMIC_ADD(&fieldBuffers[offset2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fz*0x100000000)));
peastman's avatar
peastman committed
185
186
187
188
189
190
191
192
193
194
        }
    }

    // Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
    // of them (no cutoff).

#ifdef USE_CUTOFF
    unsigned int numTiles = interactionCount[0];
    if (numTiles > maxTiles)
        return; // There wasn't enough memory for the neighbor list.
195
196
    int tile = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((mm_long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
    int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((mm_long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
peastman's avatar
peastman committed
197
#else
198
199
    int tile = (int) (warp*(mm_long)numTiles/totalWarps);
    int end = (int) ((warp+1)*(mm_long)numTiles/totalWarps);
peastman's avatar
peastman committed
200
201
202
#endif
    int skipBase = 0;
    int currentSkipIndex = tbx;
203
204
205
    LOCAL int atomIndices[THREAD_BLOCK_SIZE];
    LOCAL volatile int skipTiles[THREAD_BLOCK_SIZE];
    skipTiles[LOCAL_ID] = -1;
peastman's avatar
peastman committed
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
    
    while (tile < end) {
        real3 field = make_real3(0);
        bool includeTile = true;
        
        // Extract the coordinates of this tile.
        
        int x, y;
        bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
        x = tiles[tile];
        real4 blockSizeX = blockSize[x];
        singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
                              0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
                              0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
#else
        y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*tile));
        x = (tile-y*NUM_BLOCKS+y*(y+1)/2);
        if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
            y += (x < y ? -1 : 1);
            x = (tile-y*NUM_BLOCKS+y*(y+1)/2);
        }

        // Skip over tiles that have exclusions, since they were already processed.

231
        SYNC_WARPS;
peastman's avatar
peastman committed
232
        while (skipTiles[tbx+TILE_SIZE-1] < tile) {
233
            SYNC_WARPS;
peastman's avatar
peastman committed
234
            if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
235
                int2 tile = exclusionTiles[skipBase+tgx];
236
                skipTiles[LOCAL_ID] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
peastman's avatar
peastman committed
237
238
            }
            else
239
                skipTiles[LOCAL_ID] = end;
peastman's avatar
peastman committed
240
241
            skipBase += TILE_SIZE;            
            currentSkipIndex = tbx;
242
            SYNC_WARPS;
peastman's avatar
peastman committed
243
244
245
246
247
248
249
250
251
252
253
254
        }
        while (skipTiles[currentSkipIndex] < tile)
            currentSkipIndex++;
        includeTile = (skipTiles[currentSkipIndex] != tile);
#endif
        if (includeTile) {
            unsigned int atom1 = x*TILE_SIZE + tgx;

            // Load atom data for this tile.
            
            real4 pos1 = posq[atom1];
            LOAD_ATOM1_PARAMETERS
255
            const unsigned int localAtomIndex = LOCAL_ID;
peastman's avatar
peastman committed
256
257
258
259
260
#ifdef USE_CUTOFF
            unsigned int j = interactingAtoms[tile*TILE_SIZE+tgx];
#else
            unsigned int j = y*TILE_SIZE + tgx;
#endif
261
            atomIndices[LOCAL_ID] = j;
peastman's avatar
peastman committed
262
263
            if (j < PADDED_NUM_ATOMS) {
                real4 tempPosq = posq[j];
264
265
266
                localData[localAtomIndex].x = tempPosq.x;
                localData[localAtomIndex].y = tempPosq.y;
                localData[localAtomIndex].z = tempPosq.z;
peastman's avatar
peastman committed
267
                LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
268
269
270
                localData[localAtomIndex].fx = 0;
                localData[localAtomIndex].fy = 0;
                localData[localAtomIndex].fz = 0;
peastman's avatar
peastman committed
271
            }
272
            SYNC_WARPS;
peastman's avatar
peastman committed
273
274
275
276
277
278
279
#ifdef USE_PERIODIC
            if (singlePeriodicCopy) {
                // The box is small enough that we can just translate all the atoms into a single periodic
                // box, then skip having to apply periodic boundary conditions later.

                real4 blockCenterX = blockCenter[x];
                APPLY_PERIODIC_TO_POS_WITH_CENTER(pos1, blockCenterX)
280
281
                APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[LOCAL_ID], blockCenterX)
                SYNC_WARPS;
peastman's avatar
peastman committed
282
283
284
                unsigned int tj = tgx;
                for (unsigned int j = 0; j < TILE_SIZE; j++) {
                    int atom2 = tbx+tj;
285
                    real3 pos2 = make_real3(localData[atom2].x, localData[atom2].y, localData[atom2].z);
peastman's avatar
peastman committed
286
287
288
289
290
291
292
293
294
295
296
297
298
                    real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
                    real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
                    if (r2 < CUTOFF_SQUARED) {
                        real invR = RSQRT(r2);
                        real r = r2*invR;
                        LOAD_ATOM2_PARAMETERS
                        atom2 = atomIndices[tbx+tj];
                        real3 tempField1 = make_real3(0);
                        real3 tempField2 = make_real3(0);
                        if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
                            COMPUTE_FIELD
                        }
                        field += tempField1;
299
300
301
                        localData[tbx+tj].fx += tempField2.x;
                        localData[tbx+tj].fy += tempField2.y;
                        localData[tbx+tj].fz += tempField2.z;
peastman's avatar
peastman committed
302
303
                    }
                    tj = (tj + 1) & (TILE_SIZE - 1);
304
                    SYNC_WARPS;
peastman's avatar
peastman committed
305
306
307
308
309
310
311
312
313
314
                }
            }
            else
#endif
            {
                // We need to apply periodic boundary conditions separately for each interaction.

                unsigned int tj = tgx;
                for (unsigned int j = 0; j < TILE_SIZE; j++) {
                    int atom2 = tbx+tj;
315
                    real3 pos2 = make_real3(localData[atom2].x, localData[atom2].y, localData[atom2].z);
peastman's avatar
peastman committed
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
                    real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC
                    APPLY_PERIODIC_TO_DELTA(delta)
#endif
                    real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
                    if (r2 < CUTOFF_SQUARED) {
#endif
                        real invR = RSQRT(r2);
                        real r = r2*invR;
                        LOAD_ATOM2_PARAMETERS
                        atom2 = atomIndices[tbx+tj];
                        real3 tempField1 = make_real3(0);
                        real3 tempField2 = make_real3(0);
                        if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
                            COMPUTE_FIELD
                        }
                        field += tempField1;
334
335
336
                        localData[tbx+tj].fx += tempField2.x;
                        localData[tbx+tj].fy += tempField2.y;
                        localData[tbx+tj].fz += tempField2.z;
peastman's avatar
peastman committed
337
338
339
340
#ifdef USE_CUTOFF
                    }
#endif
                    tj = (tj + 1) & (TILE_SIZE - 1);
341
                    SYNC_WARPS;
peastman's avatar
peastman committed
342
343
344
345
346
                }
            }
        
            // Write results.

347
348
349
            ATOMIC_ADD(&fieldBuffers[atom1], (mm_ulong) ((mm_long) (field.x*0x100000000)));
            ATOMIC_ADD(&fieldBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (field.y*0x100000000)));
            ATOMIC_ADD(&fieldBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (field.z*0x100000000)));
peastman's avatar
peastman committed
350
#ifdef USE_CUTOFF
351
            unsigned int atom2 = atomIndices[LOCAL_ID];
peastman's avatar
peastman committed
352
353
354
355
#else
            unsigned int atom2 = y*TILE_SIZE + tgx;
#endif
            if (atom2 < PADDED_NUM_ATOMS) {
356
357
358
                ATOMIC_ADD(&fieldBuffers[atom2], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fx*0x100000000)));
                ATOMIC_ADD(&fieldBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fy*0x100000000)));
                ATOMIC_ADD(&fieldBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fz*0x100000000)));
peastman's avatar
peastman committed
359
360
361
362
363
364
365
366
367
368
369
            }
        }
        tile++;
    }
}

#define COMPUTING_EXCEPTIONS

/**
 * Compute the electrostatic field from nonbonded exceptions.
 */
370
371
KERNEL void computeFieldExceptions(GLOBAL const real4* RESTRICT posq, GLOBAL mm_ulong* RESTRICT fieldBuffers,
        GLOBAL const int2* RESTRICT exceptionAtoms, GLOBAL const real* RESTRICT exceptionScale
peastman's avatar
peastman committed
372
373
374
375
#ifdef USE_CUTOFF
        , real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
#endif
        PARAMETER_ARGUMENTS) {
376
    for (int index = GLOBAL_ID; index < NUM_EXCEPTIONS; index += GLOBAL_SIZE) {
peastman's avatar
peastman committed
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
        int2 atoms = exceptionAtoms[index];
        int atom1 = atoms.x;
        int atom2 = atoms.y;
        real4 pos1 = posq[atom1];
        real4 pos2 = posq[atom2];
        LOAD_ATOM1_PARAMETERS
        LOAD_ATOM2_PARAMETERS_FROM_GLOBAL
        real scale = exceptionScale[index];
        real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC
        APPLY_PERIODIC_TO_DELTA(delta)
#endif
        real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
        if (r2 < CUTOFF_SQUARED) {
#endif
            real invR = RSQRT(r2);
            real r = r2*invR;
            real3 tempField1 = make_real3(0);
            real3 tempField2 = make_real3(0);
            COMPUTE_FIELD
398
399
400
401
402
403
            ATOMIC_ADD(&fieldBuffers[atom1], (mm_ulong) ((mm_long) (tempField1.x*0x100000000)));
            ATOMIC_ADD(&fieldBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempField1.y*0x100000000)));
            ATOMIC_ADD(&fieldBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempField1.z*0x100000000)));
            ATOMIC_ADD(&fieldBuffers[atom2], (mm_ulong) ((mm_long) (tempField2.x*0x100000000)));
            ATOMIC_ADD(&fieldBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempField2.y*0x100000000)));
            ATOMIC_ADD(&fieldBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempField2.z*0x100000000)));
peastman's avatar
peastman committed
404
405
406
407
408
#ifdef USE_CUTOFF
        }
#endif
    }
}