gbsaObc_nvidia.cl 21.3 KB
Newer Older
1
#define TILE_SIZE 32
2
3
4
5
6

/**
 * Compute the Born sum.
 */

7
__kernel void computeBornSum(__global float* global_bornSum, __local float* local_bornSum, __global float4* posq,
8
        __local float4* local_posq, __global float2* global_params, __local float2* local_params, __local float* tempBuffer, __global unsigned int* tiles,
9
#ifdef USE_CUTOFF
10
        __global unsigned int* interactionFlags, __global unsigned int* interactionCount) {
11
12
13
14
15
16
#else
        unsigned int numTiles) {
#endif
#ifdef USE_CUTOFF
    unsigned int numTiles = interactionCount[0];
#endif
17
18
    unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
    unsigned int warp = get_global_id(0)/TILE_SIZE;
19
20
21
22
23
24
25
26
    unsigned int pos = warp*numTiles/totalWarps;
    unsigned int end = (warp+1)*numTiles/totalWarps;
    float energy = 0.0f;
    unsigned int lasty = 0xFFFFFFFF;

    while (pos < end) {
        // Extract the coordinates of this tile
        unsigned int x = tiles[pos];
27
28
29
        unsigned int y = ((x >> 2) & 0x7fff)*TILE_SIZE;
        x = (x>>17)*TILE_SIZE;
        unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
30
        unsigned int tbx = get_local_id(0) - tgx;
31
        unsigned int atom1 = x + tgx;
32
        float bornSum = 0.0f;
33
34
        float4 posq1 = posq[atom1];
        float2 params1 = global_params[atom1];
35
36
37
38
39
        if (x == y) {
            // This tile is on the diagonal.

            local_posq[get_local_id(0)] = posq1;
            local_params[get_local_id(0)] = params1;
40
41
42
            unsigned int xi = x/TILE_SIZE;
            unsigned int tile = xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*(xi+1)/2;
            for (unsigned int j = 0; j < TILE_SIZE; j++) {
43
44
                float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
45
46
47
                delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
                delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
                delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
48
49
50
#endif
                float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
51
                if (atom1 < NUM_ATOMS && y+j < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
52
#else
53
                if (atom1 < NUM_ATOMS && y+j < NUM_ATOMS) {
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#endif
                    float r = sqrt(r2);
                    float invR = 1.0f/r;
                    float2 params2 = local_params[tbx+j];
                    float rScaledRadiusJ = r+params2.y;
                    if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
                        float l_ij = 1.0f/max(params1.x, fabs(r-params2.y));
                        float u_ij = 1.0f/rScaledRadiusJ;
                        float l_ij2 = l_ij*l_ij;
                        float u_ij2 = u_ij*u_ij;
                        float ratio = log(u_ij / l_ij);
                        bornSum += l_ij - u_ij + 0.25f*r*(u_ij2-l_ij2) + (0.50f*invR*ratio) +
                                         (0.25f*params2.y*params2.y*invR)*(l_ij2-u_ij2);
                        if (params1.x < params2.x-r)
                            bornSum += 2.0f*(1.0f/params1.x-l_ij);
                    }
                }
            }

            // Write results
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
75
            unsigned int offset = x + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
76
#else
77
            unsigned int offset = x + tgx + warp*PADDED_NUM_ATOMS;
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#endif
            global_bornSum[offset] += bornSum;
        }
        else {
            // This is an off-diagonal tile.

            if (lasty != y) {
                unsigned int j = y + tgx;
                local_posq[get_local_id(0)] = posq[j];
                local_params[get_local_id(0)] = global_params[j];
            }
            local_bornSum[get_local_id(0)] = 0.0f;
#ifdef USE_CUTOFF
            unsigned int flags = interactionFlags[pos];
            if (flags != 0xFFFFFFFF) {
                if (flags == 0) {
                    // No interactions in this tile.
                }
                else {
                    // Compute only a subset of the interactions in this tile.

99
                    for (unsigned int j = 0; j < TILE_SIZE; j++) {
100
101
102
                        if ((flags&(1<<j)) != 0) {
                            float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
103
104
105
                            delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
                            delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
                            delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
106
107
#endif
                            float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
Peter Eastman's avatar
Peter Eastman committed
108
                            tempBuffer[get_local_id(0)] = 0.0f;
109
#ifdef USE_CUTOFF
110
                            if (atom1 < NUM_ATOMS && y+j < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
111
#else
112
                            if (atom1 < NUM_ATOMS && y+j < NUM_ATOMS) {
113
114
115
116
117
#endif
                                float r = sqrt(r2);
                                float invR = 1.0f/r;
                                float2 params2 = local_params[tbx+j];
                                float rScaledRadiusJ = r+params2.y;
Peter Eastman's avatar
Peter Eastman committed
118
                                if (params1.x < rScaledRadiusJ) {
119
120
121
122
123
124
125
126
127
128
129
                                    float l_ij = 1.0f/max(params1.x, fabs(r-params2.y));
                                    float u_ij = 1.0f/rScaledRadiusJ;
                                    float l_ij2 = l_ij*l_ij;
                                    float u_ij2 = u_ij*u_ij;
                                    float ratio = log(u_ij / l_ij);
                                    bornSum += l_ij - u_ij + 0.25f*r*(u_ij2-l_ij2) + (0.50f*invR*ratio) +
                                                     (0.25f*params2.y*params2.y*invR)*(l_ij2-u_ij2);
                                    if (params1.x < params2.x-r)
                                        bornSum += 2.0f*(1.0f/params1.x-l_ij);
                                }
                                float rScaledRadiusI = r+params1.y;
Peter Eastman's avatar
Peter Eastman committed
130
                                if (params2.x < rScaledRadiusI) {
131
                                    float l_ij = 1.0f/max(params2.x, fabs(r-params1.y));
Peter Eastman's avatar
Peter Eastman committed
132
                                    float u_ij = 1.0f/rScaledRadiusI;
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
                                    float l_ij2 = l_ij*l_ij;
                                    float u_ij2 = u_ij*u_ij;
                                    float ratio = log(u_ij / l_ij);
                                    float term = l_ij - u_ij + 0.25f*r*(u_ij2-l_ij2) + (0.50f*invR*ratio) +
                                                     (0.25f*params1.y*params1.y*invR)*(l_ij2-u_ij2);
                                    if (params2.x < params1.x-r)
                                        term += 2.0f*(1.0f/params2.x-l_ij);
                                    tempBuffer[get_local_id(0)] = term;
                                }
                            }

                            // Sum the forces on atom j.

                            if (tgx % 2 == 0)
                                tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+1];
                            if (tgx % 4 == 0)
                                tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+2];
                            if (tgx % 8 == 0)
                                tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+4];
                            if (tgx % 16 == 0)
                                tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+8];
                            if (tgx == 0)
                                local_bornSum[tbx+j] += tempBuffer[get_local_id(0)] + tempBuffer[get_local_id(0)+16];
                        }
                    }
                }
            }
            else
#endif
            {
                // Compute the full set of interactions in this tile.

165
166
167
168
169
                unsigned int xi = x/TILE_SIZE;
                unsigned int yi = y/TILE_SIZE;
                unsigned int tile = xi+yi*PADDED_NUM_ATOMS/TILE_SIZE-yi*(yi+1)/2;
                unsigned int tj = tgx;
                for (unsigned int j = 0; j < TILE_SIZE; j++) {
170
171
                    float4 delta = (float4) (local_posq[tbx+tj].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
172
173
174
                    delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
                    delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
                    delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
175
176
177
#endif
                    float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
178
                    if (atom1 < NUM_ATOMS && y+tj < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
179
#else
180
                    if (atom1 < NUM_ATOMS && y+tj < NUM_ATOMS) {
181
182
183
184
185
#endif
                        float r = sqrt(r2);
                        float invR = 1.0f/r;
                        float2 params2 = local_params[tbx+tj];
                        float rScaledRadiusJ = r+params2.y;
Peter Eastman's avatar
Peter Eastman committed
186
                        if (params1.x < rScaledRadiusJ) {
187
188
189
190
191
192
193
194
195
196
197
                            float l_ij = 1.0f/max(params1.x, fabs(r-params2.y));
                            float u_ij = 1.0f/rScaledRadiusJ;
                            float l_ij2 = l_ij*l_ij;
                            float u_ij2 = u_ij*u_ij;
                            float ratio = log(u_ij / l_ij);
                            bornSum += l_ij - u_ij + 0.25f*r*(u_ij2-l_ij2) + (0.50f*invR*ratio) +
                                             (0.25f*params2.y*params2.y*invR)*(l_ij2-u_ij2);
                            if (params1.x < params2.x-r)
                                bornSum += 2.0f*(1.0f/params1.x-l_ij);
                        }
                        float rScaledRadiusI = r+params1.y;
Peter Eastman's avatar
Peter Eastman committed
198
                        if (params2.x < rScaledRadiusI) {
199
                            float l_ij = 1.0f/max(params2.x, fabs(r-params1.y));
Peter Eastman's avatar
Peter Eastman committed
200
                            float u_ij = 1.0f/rScaledRadiusI;
201
202
203
204
205
206
207
                            float l_ij2 = l_ij*l_ij;
                            float u_ij2 = u_ij*u_ij;
                            float ratio = log(u_ij / l_ij);
                            float term = l_ij - u_ij + 0.25f*r*(u_ij2-l_ij2) + (0.50f*invR*ratio) +
                                             (0.25f*params1.y*params1.y*invR)*(l_ij2-u_ij2);
                            if (params2.x < params1.x-r)
                                term += 2.0f*(1.0f/params2.x-l_ij);
Peter Eastman's avatar
Peter Eastman committed
208
                            local_bornSum[tbx+tj] += term;
209
210
                        }
                    }
211
                    tj = (tj + 1) & (TILE_SIZE - 1);
212
213
214
215
216
217
                }
            }

            // Write results
            float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
218
219
            unsigned int offset1 = x + tgx + (y/TILE_SIZE)*PADDED_NUM_ATOMS;
            unsigned int offset2 = y + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
220
#else
221
222
            unsigned int offset1 = x + tgx + warp*PADDED_NUM_ATOMS;
            unsigned int offset2 = y + tgx + warp*PADDED_NUM_ATOMS;
223
224
225
226
227
228
229
230
231
#endif
            global_bornSum[offset1] += bornSum;
            global_bornSum[offset2] += local_bornSum[get_local_id(0)];
            lasty = y;
        }
        pos++;
    }
}

232
233
234
235
/**
 * First part of computing the GBSA interaction.
 */

236
__kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuffer,
237
        __global float4* posq, __local float4* local_posq, __local float4* local_force, __global float* global_bornRadii, __local float* local_bornRadii,
238
        __global float* global_bornForce, __local float* local_bornForce, __local float4* tempBuffer, __global unsigned int* tiles,
239
#ifdef USE_CUTOFF
240
        __global unsigned int* interactionFlags, __global unsigned int* interactionCount) {
241
242
243
244
245
246
#else
        unsigned int numTiles) {
#endif
#ifdef USE_CUTOFF
    unsigned int numTiles = interactionCount[0];
#endif
247
248
    unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
    unsigned int warp = get_global_id(0)/TILE_SIZE;
249
250
251
252
253
254
255
256
    unsigned int pos = warp*numTiles/totalWarps;
    unsigned int end = (warp+1)*numTiles/totalWarps;
    float energy = 0.0f;
    unsigned int lasty = 0xFFFFFFFF;

    while (pos < end) {
        // Extract the coordinates of this tile
        unsigned int x = tiles[pos];
257
258
259
        unsigned int y = ((x >> 2) & 0x7fff)*TILE_SIZE;
        x = (x>>17)*TILE_SIZE;
        unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
260
261
262
263
264
265
266
267
268
269
        unsigned int tbx = get_local_id(0) - tgx;
        unsigned int atom1 = x + tgx;
        float4 force = 0.0f;
        float4 posq1 = posq[atom1];
        float bornRadius1 = global_bornRadii[atom1];
        if (x == y) {
            // This tile is on the diagonal.

            local_posq[get_local_id(0)] = posq1;
            local_bornRadii[get_local_id(0)] = bornRadius1;
270
271
272
            unsigned int xi = x/TILE_SIZE;
            unsigned int tile = xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*(xi+1)/2;
            for (unsigned int j = 0; j < TILE_SIZE; j++) {
273
                if (atom1 < NUM_ATOMS && y+j < NUM_ATOMS) {
274
275
                    float4 posq2 = local_posq[tbx+j];
                    float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
276
#ifdef USE_PERIODIC
277
278
279
                    delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
                    delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
                    delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
280
281
282
283
284
285
286
287
288
289
#endif
                    float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
                    float r = sqrt(r2);
                    float invR = 1.0f/r;
                    float bornRadius2 = local_bornRadii[tbx+j];
                    float alpha2_ij = bornRadius1*bornRadius2;
                    float D_ij = r2/(4.0f*alpha2_ij);
                    float expTerm = exp(-D_ij);
                    float denominator2 = r2 + alpha2_ij*expTerm;
                    float denominator = sqrt(denominator2);
290
                    float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
291
292
293
294
295
                    float Gpol = tempEnergy/denominator2;
                    float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
                    force.w += dGpol_dalpha2_ij*bornRadius2;
                    float dEdR = Gpol*(1.0f - 0.25f*expTerm);
#ifdef USE_CUTOFF
296
                    if (r2 > CUTOFF_SQUARED) {
297
298
299
300
301
302
303
304
305
306
307
308
                        dEdR = 0.0f;
                        tempEnergy  = 0.0f;
                    }
#endif
                    energy += 0.5f*tempEnergy;
                    delta.xyz *= dEdR;
                    force.xyz -= delta.xyz;
                }
            }

            // Write results
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
309
            unsigned int offset = x + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
310
#else
311
            unsigned int offset = x + tgx + warp*PADDED_NUM_ATOMS;
312
313
#endif
            forceBuffers[offset].xyz += force.xyz;
Peter Eastman's avatar
Peter Eastman committed
314
            global_bornForce[offset] += force.w;
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
        }
        else {
            // This is an off-diagonal tile.

            if (lasty != y) {
                unsigned int j = y + tgx;
                local_posq[get_local_id(0)] = posq[j];
                local_bornRadii[get_local_id(0)] = global_bornRadii[j];
            }
            local_force[get_local_id(0)] = 0.0f;
#ifdef USE_CUTOFF
            unsigned int flags = interactionFlags[pos];
            if (flags != 0xFFFFFFFF) {
                if (flags == 0) {
                    // No interactions in this tile.
                }
                else {
                    // Compute only a subset of the interactions in this tile.

334
                    for (unsigned int j = 0; j < TILE_SIZE; j++) {
335
                        if ((flags&(1<<j)) != 0) {
336
337
                            float4 posq2 = local_posq[tbx+j];
                            float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
338
#ifdef USE_PERIODIC
339
340
341
                            delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
                            delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
                            delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
342
343
344
#endif
                            float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
                            float r = sqrt(r2);
345
                            float invR = 1.0f/r;
346
347
348
349
350
351
                            float bornRadius2 = local_bornRadii[tbx+j];
                            float alpha2_ij = bornRadius1*bornRadius2;
                            float D_ij = r2/(4.0f*alpha2_ij);
                            float expTerm = exp(-D_ij);
                            float denominator2 = r2 + alpha2_ij*expTerm;
                            float denominator = sqrt(denominator2);
352
                            float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
353
354
355
356
357
                            float Gpol = tempEnergy/denominator2;
                            float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
                            force.w += dGpol_dalpha2_ij*bornRadius2;
                            float dEdR = Gpol*(1.0f - 0.25f*expTerm);
#ifdef USE_CUTOFF
358
                            if (atom1 >= NUM_ATOMS || y+j >= NUM_ATOMS || r2 > CUTOFF_SQUARED) {
359
#else
360
                            if (atom1 >= NUM_ATOMS || y+j >= NUM_ATOMS) {
361
362
363
364
365
366
367
#endif
                                dEdR = 0.0f;
				tempEnergy = 0.0f;
                            }
			    energy += tempEnergy;
                            delta.xyz *= dEdR;
                            force.xyz -= delta.xyz;
Peter Eastman's avatar
Peter Eastman committed
368
                            tempBuffer[get_local_id(0)] = (float4) (delta.xyz, dGpol_dalpha2_ij*bornRadius1);
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390

                            // Sum the forces on atom j.

                            if (tgx % 2 == 0)
                                tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+1];
                            if (tgx % 4 == 0)
                                tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+2];
                            if (tgx % 8 == 0)
                                tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+4];
                            if (tgx % 16 == 0)
                                tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+8];
                            if (tgx == 0)
                                local_force[tbx+j] += tempBuffer[get_local_id(0)] + tempBuffer[get_local_id(0)+16];
                        }
                    }
                }
            }
            else
#endif
            {
                // Compute the full set of interactions in this tile.

391
392
393
394
395
                unsigned int xi = x/TILE_SIZE;
                unsigned int yi = y/TILE_SIZE;
                unsigned int tile = xi+yi*PADDED_NUM_ATOMS/TILE_SIZE-yi*(yi+1)/2;
                unsigned int tj = tgx;
                for (unsigned int j = 0; j < TILE_SIZE; j++) {
396
                    if (atom1 < NUM_ATOMS && y+tj < NUM_ATOMS) {
397
398
                        float4 posq2 = local_posq[tbx+tj];
                        float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
399
#ifdef USE_PERIODIC
400
401
402
                        delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
                        delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
                        delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
403
404
405
#endif
                        float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
                        float r = sqrt(r2);
406
                        float invR = 1.0f/r;
407
408
409
410
411
412
                        float bornRadius2 = local_bornRadii[tbx+tj];
                        float alpha2_ij = bornRadius1*bornRadius2;
                        float D_ij = r2/(4.0f*alpha2_ij);
                        float expTerm = exp(-D_ij);
                        float denominator2 = r2 + alpha2_ij*expTerm;
                        float denominator = sqrt(denominator2);
413
                        float tempEnergy = (PREFACTOR*posq1.w*posq2.w)/denominator;
414
415
416
417
418
                        float Gpol = tempEnergy/denominator2;
                        float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
                        force.w += dGpol_dalpha2_ij*bornRadius2;
                        float dEdR = Gpol*(1.0f - 0.25f*expTerm);
#ifdef USE_CUTOFF
419
                        if (r2 > CUTOFF_SQUARED) {
420
421
422
423
424
425
426
                            dEdR = 0.0f;
                            tempEnergy  = 0.0f;
                        }
#endif
                        energy += tempEnergy;
                        delta.xyz *= dEdR;
                        force.xyz -= delta.xyz;
Peter Eastman's avatar
Peter Eastman committed
427
                        local_force[tbx+tj] += (float4) (delta.xyz, dGpol_dalpha2_ij*bornRadius1);
428
                    }
429
                    tj = (tj + 1) & (TILE_SIZE - 1);
430
431
432
433
434
                }
            }

            // Write results
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
435
436
            unsigned int offset1 = x + tgx + (y/TILE_SIZE)*PADDED_NUM_ATOMS;
            unsigned int offset2 = y + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
437
#else
438
439
            unsigned int offset1 = x + tgx + warp*PADDED_NUM_ATOMS;
            unsigned int offset2 = y + tgx + warp*PADDED_NUM_ATOMS;
440
441
442
#endif
            forceBuffers[offset1].xyz += force.xyz;
            forceBuffers[offset2].xyz += local_force[get_local_id(0)].xyz;
Peter Eastman's avatar
Peter Eastman committed
443
444
            global_bornForce[offset1] += force.w;
            global_bornForce[offset2] += local_force[get_local_id(0)].w;
445
446
447
448
449
450
            lasty = y;
        }
        pos++;
    }
    energyBuffer[get_global_id(0)] += energy;
}