kCalculateAmoebaCudaPmeDirectElectrostatic.h 21.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2009 Stanford University and the Authors.           *
 * Authors: Scott Le Grand, Peter Eastman                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

#include "amoebaScaleFactors.h"

__global__ 
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(384, 1)
32
#elif (__CUDA_ARCH__ >= 120)
33
34
35
36
__launch_bounds__(128, 1)
#else
__launch_bounds__(64, 1)
#endif
Mark Friedrichs's avatar
Mark Friedrichs committed
37
38
void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
                            unsigned int* workUnit, float* outputForce, float* outputTorque
39
40
41
42
43
44
45

#ifdef AMOEBA_DEBUG
                           , float4* debugArray, unsigned int targetAtom
#endif
){

#ifdef AMOEBA_DEBUG
46
47
    int maxPullIndex = 2;
    float4 pullBack[12];
48
49
#endif

Mark Friedrichs's avatar
Mark Friedrichs committed
50
    extern __shared__ PmeDirectElectrostaticParticle sA[];
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67

    unsigned int totalWarps      = gridDim.x*blockDim.x/GRID;
    unsigned int warp            = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
    unsigned int numWorkUnits    = cSim.pInteractionCount[0];
    unsigned int pos             = warp*numWorkUnits/totalWarps;
    unsigned int end             = (warp+1)*numWorkUnits/totalWarps;
    unsigned int lasty           = 0xFFFFFFFF;
    float totalEnergy            = 0.0f;     

    float scalingFactors[LastScalingIndex];

    while (pos < end)
    {

        unsigned int x;
        unsigned int y;
        bool bExclusionFlag;
Mark Friedrichs's avatar
Mark Friedrichs committed
68
69
70
        int  dScaleMask;
        int2 pScaleMask;
        int2 mScaleMask;
71
72
73
74
75
76
77
78
79

        // Extract cell coordinates

        decodeCell( workUnit[pos], &x, &y, &bExclusionFlag );

        unsigned int tgx              = threadIdx.x & (GRID - 1);
        unsigned int tbx              = threadIdx.x - tgx;
        unsigned int tj               = tgx;

Mark Friedrichs's avatar
Mark Friedrichs committed
80
        PmeDirectElectrostaticParticle* psA    = &sA[tbx];
81
        unsigned int atomI            = x + tgx;
Mark Friedrichs's avatar
Mark Friedrichs committed
82
83
        PmeDirectElectrostaticParticle localParticle;
        loadPmeDirectElectrostaticShared(&localParticle, atomI );
84

Mark Friedrichs's avatar
Mark Friedrichs committed
85
86
87
        localParticle.force[0]        = 0.0f;
        localParticle.force[1]        = 0.0f;
        localParticle.force[2]        = 0.0f;
88

Mark Friedrichs's avatar
Mark Friedrichs committed
89
90
91
        localParticle.torque[0]       = 0.0f;
        localParticle.torque[1]       = 0.0f;
        localParticle.torque[2]       = 0.0f;
92
93
94
95
96
97
98
99
100
101
102

        scalingFactors[PScaleIndex]   = 1.0f;
        scalingFactors[DScaleIndex]   = 1.0f;
        scalingFactors[UScaleIndex]   = 1.0f;
        scalingFactors[MScaleIndex]   = 1.0f;

        if (x == y) // Handle diagonals uniquely at 50% efficiency
        {

            // load shared data

Mark Friedrichs's avatar
Mark Friedrichs committed
103
            loadPmeDirectElectrostaticShared( &(sA[threadIdx.x]), atomI );
104

Mark Friedrichs's avatar
Mark Friedrichs committed
105
            if (bExclusionFlag)
106
107
108
            {
                unsigned int xi       = x >> GRIDBITS;
                unsigned int cell     = xi + xi*cAmoebaSim.paddedNumberOfAtoms/GRID-xi*(xi+1)/2;
Mark Friedrichs's avatar
Mark Friedrichs committed
109
110
111
112
113
114
                dScaleMask            = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                pScaleMask            = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                mScaleMask            = cAmoebaSim.pM_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
            } else {
                scalingFactors[DScaleIndex] = scalingFactors[PScaleIndex] = scalingFactors[MScaleIndex] = 1.0f;
            }
115

Mark Friedrichs's avatar
Mark Friedrichs committed
116
117
            for (unsigned int j = 0; j < GRID; j++)
            {
118

Mark Friedrichs's avatar
Mark Friedrichs committed
119
                float force[3];
Peter Eastman's avatar
Peter Eastman committed
120
                float3 torque[2];
121

Mark Friedrichs's avatar
Mark Friedrichs committed
122
                unsigned int atomJ = y + j;
123

Mark Friedrichs's avatar
Mark Friedrichs committed
124
                // set scale factors
125

Mark Friedrichs's avatar
Mark Friedrichs committed
126
127
                if (bExclusionFlag)
                {
128
129
130
                    getMaskedDScaleFactor( j, dScaleMask, scalingFactors + DScaleIndex );
                    getMaskedPScaleFactor( j, pScaleMask, scalingFactors + PScaleIndex );
                    getMaskedMScaleFactor( j, mScaleMask, scalingFactors + MScaleIndex );
Mark Friedrichs's avatar
Mark Friedrichs committed
131
                }
132

Mark Friedrichs's avatar
Mark Friedrichs committed
133
                // force
134

Mark Friedrichs's avatar
Mark Friedrichs committed
135
136
137
                float energy;
                calculatePmeDirectElectrostaticPairIxn_kernel( localParticle,   psA[j],
                                                               scalingFactors, force, torque, &energy
138
139
140
141
142
#ifdef AMOEBA_DEBUG
, pullBack
#endif
 );

Mark Friedrichs's avatar
Mark Friedrichs committed
143
144
                // nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
                // by setting match flag
145

Mark Friedrichs's avatar
Mark Friedrichs committed
146
                unsigned int mask       =  ( (atomI == atomJ) || (atomI >= cAmoebaSim.numberOfAtoms) || (atomJ >= cAmoebaSim.numberOfAtoms) ) ? 0 : 1;
147

Mark Friedrichs's avatar
Mark Friedrichs committed
148
                // add to field at atomI the field due atomJ's charge/dipole/quadrupole
149

Mark Friedrichs's avatar
Mark Friedrichs committed
150
151
152
                localParticle.force[0]            += mask ? force[0]     : 0.0f;
                localParticle.force[1]            += mask ? force[1]     : 0.0f;
                localParticle.force[2]            += mask ? force[2]     : 0.0f;
153

Peter Eastman's avatar
Peter Eastman committed
154
155
156
                localParticle.torque[0]           += mask ? torque[0].x : 0.0f;
                localParticle.torque[1]           += mask ? torque[0].y : 0.0f;
                localParticle.torque[2]           += mask ? torque[0].z : 0.0f;
157

Mark Friedrichs's avatar
Mark Friedrichs committed
158
                totalEnergy                       += mask ? 0.5*energy   : 0.0f;
159
160

#ifdef AMOEBA_DEBUG
161
162
163
164
165
166
/*
energy =  mask ? 0.5*energy   : 0.0f;
if( atomI < 200 && (fabs( energy ) > 1.0e+8 || energy != energy) ){
    debugSetup( atomI, atomJ, debugArray, pullBack );
} */

167
if( atomI == targetAtom ){
Mark Friedrichs's avatar
Mark Friedrichs committed
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
    unsigned int index                 = (atomI == targetAtom) ? atomJ : atomI;
    float blockId                      = 1.0f;

    debugArray[index].x                = (float) atomI;
    debugArray[index].y                = (float) atomJ;
    debugArray[index].z                = (float) y;
    debugArray[index].w                = blockId;

    index                             += cAmoebaSim.paddedNumberOfAtoms;
    debugArray[index].x                = mask ? force[0]     : 0.0f;
    debugArray[index].y                = mask ? force[1]     : 0.0f;
    debugArray[index].z                = mask ? force[2]     : 0.0f;
    debugArray[index].w                = blockId;


    index                             += cAmoebaSim.paddedNumberOfAtoms;
Peter Eastman's avatar
Peter Eastman committed
184
185
186
    debugArray[index].x                = mask ? torque[0].x : 0.0f;
    debugArray[index].y                = mask ? torque[0].y : 0.0f;
    debugArray[index].z                = mask ? torque[0].z : 0.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
187
    debugArray[index].w                = mask ? energy       : 0.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
188
189

    index                             += cAmoebaSim.paddedNumberOfAtoms;
Peter Eastman's avatar
Peter Eastman committed
190
191
192
    debugArray[index].x                = mask ? torque[0].x : 0.0f;
    debugArray[index].y                = mask ? torque[0].y : 0.0f;
    debugArray[index].z                = mask ? torque[0].z : 0.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
193
    debugArray[index].w                = (float) (blockIdx.x * blockDim.x + threadIdx.x);
Mark Friedrichs's avatar
Mark Friedrichs committed
194

195
    for( int pullIndex = 0; pullIndex < maxPullIndex; pullIndex++ ){
Mark Friedrichs's avatar
Mark Friedrichs committed
196
197
198
199
200
201
        index                             += cAmoebaSim.paddedNumberOfAtoms;
        debugArray[index].x                = pullBack[pullIndex].x;
        debugArray[index].y                = pullBack[pullIndex].y;
        debugArray[index].z                = pullBack[pullIndex].z;
        debugArray[index].w                = pullBack[pullIndex].w;
    }
202
203
204
}
#endif

Mark Friedrichs's avatar
Mark Friedrichs committed
205
            } // end of j-loop
206

Mark Friedrichs's avatar
Mark Friedrichs committed
207
208
209
210
            // include self energy and self torque

            if( atomI < cAmoebaSim.numberOfAtoms ){
                calculatePmeSelfTorqueElectrostaticPairIxn_kernel( localParticle );
Mark Friedrichs's avatar
Mark Friedrichs committed
211
212
213
                float energy;
                calculatePmeSelfEnergyElectrostaticPairIxn_kernel( localParticle, &energy );
                totalEnergy += energy;
Mark Friedrichs's avatar
Mark Friedrichs committed
214
215
            }

216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
            // Write results

#ifdef USE_OUTPUT_BUFFER_PER_WARP
            float  of;
            unsigned int offset                 = 3*(x + tgx + warp*cAmoebaSim.paddedNumberOfAtoms);
            of                                  = outputForce[offset];
            of                                 += localParticle.force[0];
            outputForce[offset]                 = of;

            of                                  = outputForce[offset+1];
            of                                 += localParticle.force[1];
            outputForce[offset+1]               = of;

            of                                  = outputForce[offset+2];
            of                                 += localParticle.force[2];
            outputForce[offset+2]               = of;

            of                                  = outputTorque[offset];
            of                                 += localParticle.torque[0];
            outputTorque[offset]                = of;

            of                                  = outputTorque[offset+1];
            of                                 += localParticle.torque[1];
            outputTorque[offset+1]              = of;

            of                                  = outputTorque[offset+2];
            of                                 += localParticle.torque[2];
            outputTorque[offset+2]              = of;

#else
            unsigned int offset                 = 3*(x + tgx + (x >> GRIDBITS) * cAmoebaSim.paddedNumberOfAtoms);
            outputForce[offset]                 = localParticle.force[0];
            outputForce[offset+1]               = localParticle.force[1];
            outputForce[offset+2]               = localParticle.force[2];

            outputTorque[offset]                = localParticle.torque[0];
            outputTorque[offset+1]              = localParticle.torque[1];
            outputTorque[offset+2]              = localParticle.torque[2];
#endif

Mark Friedrichs's avatar
Mark Friedrichs committed
256
        } else {
257

Mark Friedrichs's avatar
Mark Friedrichs committed
258
259
260
261
262
263
264
265
            if (lasty != y) {

                // load shared data

               loadPmeDirectElectrostaticShared( &(sA[threadIdx.x]), (y+tgx) );

            }

Mark Friedrichs's avatar
Mark Friedrichs committed
266
267
268
269
            unsigned int flags           = cSim.pInteractionFlag[pos];
            if (flags == 0) {
                // No interactions in this block.
            } else {
270

Mark Friedrichs's avatar
Mark Friedrichs committed
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
                if (lasty != y) {
    
                    // load shared data
    
                   loadPmeDirectElectrostaticShared( &(sA[threadIdx.x]), (y+tgx) );
    
                }
    
                sA[threadIdx.x].force[0]     = 0.0f;
                sA[threadIdx.x].force[1]     = 0.0f;
                sA[threadIdx.x].force[2]     = 0.0f;
    
                sA[threadIdx.x].torque[0]    = 0.0f;
                sA[threadIdx.x].torque[1]    = 0.0f;
                sA[threadIdx.x].torque[2]    = 0.0f;
    
                if( bExclusionFlag )
                {
                    unsigned int xi   = x >> GRIDBITS;
                    unsigned int yi   = y >> GRIDBITS;
                    unsigned int cell = xi+yi*cAmoebaSim.paddedNumberOfAtoms/GRID-yi*(yi+1)/2;
                    dScaleMask        = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                    pScaleMask        = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                    mScaleMask        = cAmoebaSim.pM_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                } else {
                    scalingFactors[DScaleIndex] = scalingFactors[PScaleIndex] = scalingFactors[MScaleIndex] = 1.0f;
                }
       
299
300
                for (unsigned int j = 0; j < GRID; j++)
                {
Peter Eastman's avatar
Peter Eastman committed
301
                    if ((flags&(1<<j)) != 0)
Mark Friedrichs's avatar
Mark Friedrichs committed
302
                    {
Peter Eastman's avatar
Peter Eastman committed
303
304
305
306
                        unsigned int jIdx  = (flags == 0xFFFFFFFF) ? tj : j;
                        unsigned int atomJ = y + jIdx;

                        float force[3];
Peter Eastman's avatar
Peter Eastman committed
307
                        float3 torque[2];
Peter Eastman's avatar
Peter Eastman committed
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322

                        // set scale factors

                        if( bExclusionFlag )
                        {
                            getMaskedDScaleFactor( jIdx, dScaleMask, scalingFactors + DScaleIndex );
                            getMaskedPScaleFactor( jIdx, pScaleMask, scalingFactors + PScaleIndex );
                            getMaskedMScaleFactor( jIdx, mScaleMask, scalingFactors + MScaleIndex );
                        }

                        // force

                        float energy;
                        calculatePmeDirectElectrostaticPairIxn_kernel( localParticle,   psA[jIdx],
                                                                       scalingFactors, force, torque, &energy
323
#ifdef AMOEBA_DEBUG
Mark Friedrichs's avatar
Mark Friedrichs committed
324
    , pullBack
325
#endif
Peter Eastman's avatar
Peter Eastman committed
326
         );
327

Peter Eastman's avatar
Peter Eastman committed
328
                        // check if atoms out-of-bounds
329

Peter Eastman's avatar
Peter Eastman committed
330
                        unsigned int mask    =  ( (atomI >= cAmoebaSim.numberOfAtoms) || (atomJ >= cAmoebaSim.numberOfAtoms) ) ? 0 : 1;
331

Peter Eastman's avatar
Peter Eastman committed
332
                        // add force and torque to atom I due atom J
Mark Friedrichs's avatar
Mark Friedrichs committed
333

Peter Eastman's avatar
Peter Eastman committed
334
335
336
337
                        localParticle.force[0]         += mask ? force[0]      : 0.0f;
                        localParticle.force[1]         += mask ? force[1]      : 0.0f;
                        localParticle.force[2]         += mask ? force[2]      : 0.0f;

Peter Eastman's avatar
Peter Eastman committed
338
339
340
                        localParticle.torque[0]        += mask ? torque[0].x  : 0.0f;
                        localParticle.torque[1]        += mask ? torque[0].y  : 0.0f;
                        localParticle.torque[2]        += mask ? torque[0].z  : 0.0f;
Peter Eastman's avatar
Peter Eastman committed
341
342
343
344
345
346
347
348
349
350
351

                        totalEnergy                    += mask ? energy        : 0.0f;

                        // add force and torque to atom J due atom I

                        if( flags == 0xFFFFFFFF ){

                            psA[jIdx].force[0]               -= mask ?  force[0]     : 0.0f;
                            psA[jIdx].force[1]               -= mask ?  force[1]     : 0.0f;
                            psA[jIdx].force[2]               -= mask ?  force[2]     : 0.0f;

Peter Eastman's avatar
Peter Eastman committed
352
353
354
                            psA[jIdx].torque[0]              += mask ?  torque[1].x : 0.0f;
                            psA[jIdx].torque[1]              += mask ?  torque[1].y : 0.0f;
                            psA[jIdx].torque[2]              += mask ?  torque[1].z : 0.0f;
Peter Eastman's avatar
Peter Eastman committed
355
356
357
358
359
360
361

                        } else {

                            sA[threadIdx.x].tempForce[0]     = mask ? 0.0f : force[0];
                            sA[threadIdx.x].tempForce[1]     = mask ? 0.0f : force[1];
                            sA[threadIdx.x].tempForce[2]     = mask ? 0.0f : force[2];

Peter Eastman's avatar
Peter Eastman committed
362
363
364
                            sA[threadIdx.x].tempTorque[0]    = mask ? 0.0f : torque[1].x;
                            sA[threadIdx.x].tempTorque[1]    = mask ? 0.0f : torque[1].y;
                            sA[threadIdx.x].tempTorque[2]    = mask ? 0.0f : torque[1].z;
Peter Eastman's avatar
Peter Eastman committed
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383

                            if( tgx % 2 == 0 ){
                                sumTempBuffer( sA[threadIdx.x], sA[threadIdx.x+1] );
                            }
                            if( tgx % 4 == 0 ){
                                sumTempBuffer( sA[threadIdx.x], sA[threadIdx.x+2] );
                            }
                            if( tgx % 8 == 0 ){
                                sumTempBuffer( sA[threadIdx.x], sA[threadIdx.x+4] );
                            }
                            if( tgx % 16 == 0 ){
                                sumTempBuffer( sA[threadIdx.x], sA[threadIdx.x+8] );
                            }

                            if (tgx == 0)
                            {
                                psA[jIdx].force[0]  -= sA[threadIdx.x].tempForce[0]  + sA[threadIdx.x+16].tempForce[0];
                                psA[jIdx].force[1]  -= sA[threadIdx.x].tempForce[1]  + sA[threadIdx.x+16].tempForce[1];
                                psA[jIdx].force[2]  -= sA[threadIdx.x].tempForce[2]  + sA[threadIdx.x+16].tempForce[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
384

Peter Eastman's avatar
Peter Eastman committed
385
386
387
388
                                psA[jIdx].torque[0] += sA[threadIdx.x].tempTorque[0] + sA[threadIdx.x+16].tempTorque[0];
                                psA[jIdx].torque[1] += sA[threadIdx.x].tempTorque[1] + sA[threadIdx.x+16].tempTorque[1];
                                psA[jIdx].torque[2] += sA[threadIdx.x].tempTorque[2] + sA[threadIdx.x+16].tempTorque[2];
                            }
Mark Friedrichs's avatar
Mark Friedrichs committed
389
390
391
                        }
                    }
 
Mark Friedrichs's avatar
Mark Friedrichs committed
392
                    tj = (tj + 1) & (GRID - 1);
393

Mark Friedrichs's avatar
Mark Friedrichs committed
394
                } // end of j-loop
395

Mark Friedrichs's avatar
Mark Friedrichs committed
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
                // Write results
    
    #ifdef USE_OUTPUT_BUFFER_PER_WARP
    
                float of;
                unsigned int offset                 = 3*(x + tgx + warp*cAmoebaSim.paddedNumberOfAtoms);
                of                                  = outputForce[offset];
                of                                 += localParticle.force[0];
                outputForce[offset]                 = of;
    
                of                                  = outputForce[offset+1];
                of                                 += localParticle.force[1];
                outputForce[offset+1]               = of;
    
                of                                  = outputForce[offset+2];
                of                                 += localParticle.force[2];
                outputForce[offset+2]               = of;
    
                of                                  = outputTorque[offset];
                of                                 += localParticle.torque[0];
                outputTorque[offset]                 = of;
    
                of                                  = outputTorque[offset+1];
                of                                 += localParticle.torque[1];
                outputTorque[offset+1]               = of;
    
                of                                  = outputTorque[offset+2];
                of                                 += localParticle.torque[2];
                outputTorque[offset+2]               = of;
    
                offset                              = 3*(y + tgx + warp*cAmoebaSim.paddedNumberOfAtoms);
    
                of                                  = outputForce[offset];
                of                                 += sA[threadIdx.x].force[0];
                outputForce[offset]                 = of;
    
                of                                  = outputForce[offset+1];
                of                                 += sA[threadIdx.x].force[1];
                outputForce[offset+1]               = of;
    
                of                                  = outputForce[offset+2];
                of                                 += sA[threadIdx.x].force[2];
                outputForce[offset+2]               = of;
    
                of                                  = outputTorque[offset];
                of                                 += sA[threadIdx.x].torque[0];
                outputTorque[offset]                = of;
    
                of                                  = outputTorque[offset+1];
                of                                 += sA[threadIdx.x].torque[1];
                outputTorque[offset+1]              = of;
    
                of                                  = outputTorque[offset+2];
                of                                 += sA[threadIdx.x].torque[2];
                outputTorque[offset+2]              = of;
    
    #else
                unsigned int offset                 = 3*(x + tgx + (y >> GRIDBITS) * cAmoebaSim.paddedNumberOfAtoms);
    
                outputForce[offset]                 = localParticle.force[0];
                outputForce[offset+1]               = localParticle.force[1];
                outputForce[offset+2]               = localParticle.force[2];
    
                outputTorque[offset]                = localParticle.torque[0];
                outputTorque[offset+1]              = localParticle.torque[1];
                outputTorque[offset+2]              = localParticle.torque[2];
    
                offset                              = 3*(y + tgx + (x >> GRIDBITS) * cAmoebaSim.paddedNumberOfAtoms);
    
                outputForce[offset]                 = sA[threadIdx.x].force[0];
                outputForce[offset+1]               = sA[threadIdx.x].force[1];
                outputForce[offset+2]               = sA[threadIdx.x].force[2];
    
                outputTorque[offset]                = sA[threadIdx.x].torque[0];
                outputTorque[offset+1]              = sA[threadIdx.x].torque[1];
                outputTorque[offset+2]              = sA[threadIdx.x].torque[2];
    
    #endif
                lasty = y;
475

Mark Friedrichs's avatar
Mark Friedrichs committed
476
            } // end of pInteractionFlag block
477
478
479
480
481
        }
        pos++;
    }
    cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += totalEnergy;
}