kCalculateAmoebaCudaKirkwood.h 20.8 KB
Newer Older
Mark Friedrichs's avatar
Mark Friedrichs committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2009 Stanford University and the Authors.           *
 * Authors: Scott Le Grand, Peter Eastman                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

#include "amoebaScaleFactors.h"

__global__
#if (__CUDA_ARCH__ >= 200)
Peter Eastman's avatar
Peter Eastman committed
31
__launch_bounds__(512, 1)
32
#elif (__CUDA_ARCH__ >= 120)
33
__launch_bounds__(128, 1)
Mark Friedrichs's avatar
Mark Friedrichs committed
34
#else
35
__launch_bounds__(64, 1)
Mark Friedrichs's avatar
Mark Friedrichs committed
36
37
#endif
void METHOD_NAME(kCalculateAmoebaCudaKirkwood, Forces_kernel)(
Mark Friedrichs's avatar
Mark Friedrichs committed
38
                            unsigned int* workUnit){
Mark Friedrichs's avatar
Mark Friedrichs committed
39
40
41
42
43
44
45
46
47
48

    extern __shared__ KirkwoodParticle sA[];

    unsigned int totalWarps      = gridDim.x*blockDim.x/GRID;
    unsigned int warp            = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
    unsigned int numWorkUnits    = cSim.pInteractionCount[0];
    unsigned int pos             = warp*numWorkUnits/totalWarps;
    unsigned int end             = (warp+1)*numWorkUnits/totalWarps;
    unsigned int lasty           = 0xFFFFFFFF;

49
    // pWorkArray_3_1 == torque
50
51
52
53

    // pWorkArray_1_1 == dBorn
    // pWorkArray_1_2 == dBornPolar

Mark Friedrichs's avatar
Mark Friedrichs committed
54
55
56
57
58
59
60
61
62
63
64
65
66
    float energySum = 0.0f;     

    while (pos < end)
    {

        unsigned int x;
        unsigned int y;
        bool bExclusionFlag;

        // extract cell coordinates

        decodeCell( workUnit[pos], &x, &y, &bExclusionFlag );

67
68
69
        unsigned int tgx                         = threadIdx.x & (GRID - 1);
        unsigned int tbx                         = threadIdx.x - tgx;
        unsigned int tj                          = tgx;
Mark Friedrichs's avatar
Mark Friedrichs committed
70

71
72
73
        KirkwoodParticle* psA                    = &sA[tbx];
 
        unsigned int atomI                       = x + tgx;
74
        KirkwoodParticle localParticle;
75
        loadKirkwoodShared(&localParticle, atomI );
Mark Friedrichs's avatar
Mark Friedrichs committed
76

77
        zeroKirkwoodParticleSharedField( &localParticle );
Mark Friedrichs's avatar
Mark Friedrichs committed
78
79
80
        if (x == y) 
        {

81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
            loadKirkwoodShared( &(sA[threadIdx.x]), atomI );
            if( atomI < cSim.atoms ){
                for (unsigned int j = 0; j < GRID && (y+j) < cSim.atoms; j++){
                    //calculateKirkwoodPairIxn_kernel( localParticle, psA[j], 0.5f, &energySum);
                    float force[3];
                    float energy;
                    calculateKirkwoodPairIxnF1_kernel( localParticle, psA[j], &energy, force);
                    calculateKirkwoodPairIxnF2_kernel( localParticle, psA[j], &energy, force);

                    localParticle.force[0]  += force[0];
                    localParticle.force[1]  += force[1];
                    localParticle.force[2]  += force[2];
                
                    energySum               += 0.5f*energy;
                
                }
            }
Mark Friedrichs's avatar
Mark Friedrichs committed
98

99
100
101
            localParticle.force[0]  *= 0.5f;
            localParticle.force[1]  *= 0.5f;
            localParticle.force[2]  *= 0.5f;
Mark Friedrichs's avatar
Mark Friedrichs committed
102

103
            // write results
Mark Friedrichs's avatar
Mark Friedrichs committed
104

105
106
107
108
109
110
111
#ifdef USE_OUTPUT_BUFFER_PER_WARP
            unsigned int offset                 = x + tgx + warp*cSim.paddedNumberOfAtoms;
            add3dArrayToFloat4(   offset, localParticle.force,  cSim.pForce4 );
#else
            unsigned int offset                 = x + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms;
            add3dArrayToFloat4(    offset, localParticle.force,  cSim.pForce4);
#endif
Mark Friedrichs's avatar
Mark Friedrichs committed
112

113
114
115
116
117
            zeroKirkwoodParticleSharedField( &localParticle );
            if( atomI < cSim.atoms ){
                for (unsigned int j = 0; j < GRID && (y+j) < cSim.atoms; j++){
                    //calculateKirkwoodPairIxn_kernel( localParticle, psA[j], 0.5f, &energySum);
#ifdef INCLUDE_TORQUE
Mark Friedrichs's avatar
Mark Friedrichs committed
118

119
120
121
122
123
124
125
126
127
128
129
                    calculateKirkwoodPairIxnT1_kernel( localParticle, psA[j] );
                    calculateKirkwoodPairIxnT2_kernel( localParticle, psA[j] );

#else

                    float torque[3];
                    calculateKirkwoodPairIxnT1_kernel( localParticle, psA[j], torque );
                    calculateKirkwoodPairIxnT2_kernel( localParticle, psA[j], torque );
                    localParticle.force[0]  += torque[0];
                    localParticle.force[1]  += torque[1];
                    localParticle.force[2]  += torque[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
130
131

#endif
132
133
134
135
136
137
            //        calculateKirkwoodPairIxnB1B2_kernel( localParticle, psA[j] );
                
                }
            }

            // write results
Mark Friedrichs's avatar
Mark Friedrichs committed
138

139
140
141
142
143
144
145
#ifdef USE_OUTPUT_BUFFER_PER_WARP
            float  of;
            offset                              = x + tgx + warp*cSim.paddedNumberOfAtoms;
/*
            of                                  = cAmoebaSim.pWorkArray_1_1[offset];
            of                                 += localParticle.dBornRadius;
            cAmoebaSim.pWorkArray_1_1[offset]   = of;
Mark Friedrichs's avatar
Mark Friedrichs committed
146

147
148
149
150
            of                                  = cAmoebaSim.pWorkArray_1_2[offset];
            of                                 += 0.5f*localParticle.dBornRadiusPolar;
            cAmoebaSim.pWorkArray_1_2[offset]   = of;
*/
Mark Friedrichs's avatar
Mark Friedrichs committed
151

152
153
154
155
156
#ifdef INCLUDE_TORQUE
            add3dArray( 3*offset, localParticle.torque, cAmoebaSim.pWorkArray_3_1 );
#else
            add3dArray( 3*offset, localParticle.force, cAmoebaSim.pWorkArray_3_1 );
#endif
Mark Friedrichs's avatar
Mark Friedrichs committed
157
158
159



160
#else
Mark Friedrichs's avatar
Mark Friedrichs committed
161
162


163
            offset                              = x + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
164
165

/*
166
167
            cAmoebaSim.pWorkArray_1_1[offset]   = localParticle.dBornRadius;
            cAmoebaSim.pWorkArray_1_2[offset]   = 0.5f*localParticle.dBornRadiusPolar;
Mark Friedrichs's avatar
Mark Friedrichs committed
168
*/
169
170
171
172
#ifdef INCLUDE_TORQUE
            add3dArray( 3*offset, localParticle.torque, cAmoebaSim.pWorkArray_3_1 );
#else
            add3dArray( 3*offset, localParticle.force, cAmoebaSim.pWorkArray_3_1 );
Mark Friedrichs's avatar
Mark Friedrichs committed
173
174
#endif

175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#endif
            zeroKirkwoodParticleSharedField( &localParticle );
            if( atomI < cSim.atoms ){
                for (unsigned int j = 0; j < GRID && (y+j) < cSim.atoms; j++){
                    //calculateKirkwoodPairIxn_kernel( localParticle, psA[j], 0.5f, &energySum);
/*
#ifdef INCLUDE_TORQUE

                    calculateKirkwoodPairIxnT1_kernel( localParticle, psA[j] );
                    calculateKirkwoodPairIxnT2_kernel( localParticle, psA[j] );

#else

                    float torque[3];
                    calculateKirkwoodPairIxnT1_kernel( localParticle, psA[j], torque );
                    calculateKirkwoodPairIxnT2_kernel( localParticle, psA[j], torque );
                    localParticle.force[0]  += torque[0];
                    localParticle.force[1]  += torque[1];
                    localParticle.force[2]  += torque[2];

#endif
*/
                    calculateKirkwoodPairIxnB1B2_kernel( localParticle, psA[j] );
                
                }
            }
Mark Friedrichs's avatar
Mark Friedrichs committed
201
202
203
204

            // write results

#ifdef USE_OUTPUT_BUFFER_PER_WARP
205
206
            //float  of;
            offset                              = x + tgx + warp*cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
207

208
            of                                  = cAmoebaSim.pWorkArray_1_1[offset];
209
            of                                 += localParticle.dBornRadius;
210
            cAmoebaSim.pWorkArray_1_1[offset]   = of;
Mark Friedrichs's avatar
Mark Friedrichs committed
211

212
            of                                  = cAmoebaSim.pWorkArray_1_2[offset];
213
            of                                 += 0.5f*localParticle.dBornRadiusPolar;
214
            cAmoebaSim.pWorkArray_1_2[offset]   = of;
215
216
/*
#ifdef INCLUDE_TORQUE
217
            add3dArray( 3*offset, localParticle.torque, cAmoebaSim.pWorkArray_3_1 );
218
219
220
221
222
223
#else
            add3dArray( 3*offset, localParticle.force, cAmoebaSim.pWorkArray_3_1 );
#endif
*/


Mark Friedrichs's avatar
Mark Friedrichs committed
224
225
226
227

#else


228
229
230
231
232
233
            offset                              = x + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms;

            cAmoebaSim.pWorkArray_1_1[offset]   = localParticle.dBornRadius;
            cAmoebaSim.pWorkArray_1_2[offset]   = 0.5f*localParticle.dBornRadiusPolar;
/*
#ifdef INCLUDE_TORQUE
234
            add3dArray( 3*offset, localParticle.torque, cAmoebaSim.pWorkArray_3_1 );
235
236
#else
            add3dArray( 3*offset, localParticle.force, cAmoebaSim.pWorkArray_3_1 );
Mark Friedrichs's avatar
Mark Friedrichs committed
237
#endif
238
*/
Mark Friedrichs's avatar
Mark Friedrichs committed
239

240
241
242
#endif

        } else {
Mark Friedrichs's avatar
Mark Friedrichs committed
243

244
245
            if (lasty != y){
                loadKirkwoodShared( &(sA[threadIdx.x]), (y+tgx) );
Mark Friedrichs's avatar
Mark Friedrichs committed
246

247
248
            }
            zeroKirkwoodParticleSharedField( &(sA[threadIdx.x]) );
Mark Friedrichs's avatar
Mark Friedrichs committed
249

250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
            for (unsigned int j = 0; j < GRID; j++){
                if( atomI < cSim.atoms && (y+tj) < cSim.atoms ){
                    //calculateKirkwoodPairIxn_kernel( localParticle, psA[tj], 1.0f, &energySum);
                    float force[3];
                    float energy;
                    calculateKirkwoodPairIxnF1_kernel( localParticle, psA[tj], &energy, force);
                    calculateKirkwoodPairIxnF2_kernel( localParticle, psA[tj], &energy, force);

                    localParticle.force[0]  += force[0];
                    localParticle.force[1]  += force[1];
                    localParticle.force[2]  += force[2];
                
                    psA[tj].force[0]        -= force[0];
                    psA[tj].force[1]        -= force[1];
                    psA[tj].force[2]        -= force[2];
                
                    energySum               += energy;
                
                }
                tj  = (tj + 1) & (GRID - 1);
Mark Friedrichs's avatar
Mark Friedrichs committed
270
271
            }

272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
            localParticle.force[0]    *= 0.5f;
            localParticle.force[1]    *= 0.5f;
            localParticle.force[2]    *= 0.5f;

            sA[threadIdx.x].force[0]  *= 0.5f;
            sA[threadIdx.x].force[1]  *= 0.5f;
            sA[threadIdx.x].force[2]  *= 0.5f;

            // Write results

#ifdef USE_OUTPUT_BUFFER_PER_WARP

            unsigned int offset                 = x + tgx + warp*cSim.paddedNumberOfAtoms;
            add3dArrayToFloat4( offset, localParticle.force,  cSim.pForce4 );

            offset                              = y + tgx + warp*cSim.paddedNumberOfAtoms;
            add3dArrayToFloat4(   offset, sA[threadIdx.x].force,  cSim.pForce4 );
#else
            unsigned int offset                 = x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms;
            add3dArrayToFloat4(   offset, localParticle.force,  cSim.pForce4 );

            offset                              = y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms;
            add3dArrayToFloat4( offset,    sA[threadIdx.x].force,  cSim.pForce4 );

#endif
Mark Friedrichs's avatar
Mark Friedrichs committed
297

298
299
#ifndef INCLUDE_TORQUE
            zeroKirkwoodParticleSharedField( &localParticle );
Mark Friedrichs's avatar
Mark Friedrichs committed
300
            zeroKirkwoodParticleSharedField( &(sA[threadIdx.x]) );
301
302
303
304
305
306
307
308
309
310
311
#endif
            for (unsigned int j = 0; j < GRID; j++){
                if( atomI < cSim.atoms && (y+tj) < cSim.atoms ){
                    //calculateKirkwoodPairIxn_kernel( localParticle, psA[tj], 1.0f, &energySum);
#ifdef INCLUDE_TORQUE

                    calculateKirkwoodPairIxnT1_kernel( localParticle, psA[tj] );
                    calculateKirkwoodPairIxnT2_kernel( localParticle, psA[tj] );
                    calculateKirkwoodPairIxnT1_kernel( psA[tj], localParticle );
                    calculateKirkwoodPairIxnT2_kernel( psA[tj], localParticle );
#else
Mark Friedrichs's avatar
Mark Friedrichs committed
312

313
314
315
                    float torque[3];
                    calculateKirkwoodPairIxnT1_kernel( localParticle, psA[tj], torque );
                    calculateKirkwoodPairIxnT2_kernel( localParticle, psA[tj], torque );
Mark Friedrichs's avatar
Mark Friedrichs committed
316

317
318
319
                    localParticle.force[0]  += torque[0];
                    localParticle.force[1]  += torque[1];
                    localParticle.force[2]  += torque[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
320

321
322
                    calculateKirkwoodPairIxnT1_kernel( psA[tj], localParticle, torque );
                    calculateKirkwoodPairIxnT2_kernel( psA[tj], localParticle, torque );
Mark Friedrichs's avatar
Mark Friedrichs committed
323

324
325
326
                    psA[tj].force[0]        += torque[0];
                    psA[tj].force[1]        += torque[1];
                    psA[tj].force[2]        += torque[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
327
328
329

#endif

330
331
332
333
334
//                    calculateKirkwoodPairIxnB1B2_kernel( localParticle, psA[tj] );
                
                }
                tj  = (tj + 1) & (GRID - 1);
            }
Mark Friedrichs's avatar
Mark Friedrichs committed
335

336
            // Write results
Mark Friedrichs's avatar
Mark Friedrichs committed
337

338
#ifdef USE_OUTPUT_BUFFER_PER_WARP
Mark Friedrichs's avatar
Mark Friedrichs committed
339

340
341
342
343
344
345
            //float of;
            offset                              = x + tgx + warp*cSim.paddedNumberOfAtoms;
/*
            of                                  = cAmoebaSim.pWorkArray_1_1[offset];
            of                                 += localParticle.dBornRadius;
            cAmoebaSim.pWorkArray_1_1[offset]   = of;
Mark Friedrichs's avatar
Mark Friedrichs committed
346

347
348
349
350
            of                                  = cAmoebaSim.pWorkArray_1_2[offset];
            of                                 += 0.5f*localParticle.dBornRadiusPolar;
            cAmoebaSim.pWorkArray_1_2[offset]   = of;
*/
Mark Friedrichs's avatar
Mark Friedrichs committed
351

352
353
354
355
356
357
            //add3dArrayToFloat4( offset, localParticle.force,  cSim.pForce4 );
#ifdef INCLUDE_TORQUE
            add3dArray(       3*offset, localParticle.torque, cAmoebaSim.pWorkArray_3_1 );
#else
            add3dArray(       3*offset, localParticle.force, cAmoebaSim.pWorkArray_3_1 );
#endif
Mark Friedrichs's avatar
Mark Friedrichs committed
358

359
360
361
362
363
            offset                              = y + tgx + warp*cSim.paddedNumberOfAtoms;
/*
            of                                  = cAmoebaSim.pWorkArray_1_1[offset];
            of                                 += sA[threadIdx.x].dBornRadius;
            cAmoebaSim.pWorkArray_1_1[offset]   = of;
Mark Friedrichs's avatar
Mark Friedrichs committed
364

365
366
367
368
            of                                  = cAmoebaSim.pWorkArray_1_2[offset];
            of                                 += 0.5f*sA[threadIdx.x].dBornRadiusPolar;
            cAmoebaSim.pWorkArray_1_2[offset]   = of;
*/
Mark Friedrichs's avatar
Mark Friedrichs committed
369

370
371
372
373
374
375
            //add3dArrayToFloat4(   offset, sA[threadIdx.x].force,  cSim.pForce4 );
#ifdef INCLUDE_TORQUE
            add3dArray(         3*offset, sA[threadIdx.x].torque, cAmoebaSim.pWorkArray_3_1 );
#else
            add3dArray(         3*offset, sA[threadIdx.x].force, cAmoebaSim.pWorkArray_3_1 );
#endif
Mark Friedrichs's avatar
Mark Friedrichs committed
376

377
378
#else
            offset                              = x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
379
/*
380
381
            cAmoebaSim.pWorkArray_1_1[offset]   = localParticle.dBornRadius;
            cAmoebaSim.pWorkArray_1_2[offset]   = 0.5f*localParticle.dBornRadiusPolar;
Mark Friedrichs's avatar
Mark Friedrichs committed
382
383
*/

384
385
386
387
388
            //add3dArrayToFloat4(   offset, localParticle.force,  cSim.pForce4 );
#ifdef INCLUDE_TORQUE
            add3dArray(         3*offset, localParticle.torque, cAmoebaSim.pWorkArray_3_1 );
#else
            add3dArray(         3*offset, localParticle.force, cAmoebaSim.pWorkArray_3_1 );
Mark Friedrichs's avatar
Mark Friedrichs committed
389
#endif
390
391
392
393
394
395
396
397
398
399
400
401

            offset                              = y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms;
/*
            cAmoebaSim.pWorkArray_1_1[offset]   = sA[threadIdx.x].dBornRadius;
            cAmoebaSim.pWorkArray_1_2[offset]   = 0.5f*sA[threadIdx.x].dBornRadiusPolar;
*/

            //add3dArrayToFloat4( offset,    sA[threadIdx.x].force,  cSim.pForce4 );
#ifdef INCLUDE_TORQUE
            add3dArray(       3*offset,    sA[threadIdx.x].torque, cAmoebaSim.pWorkArray_3_1 );
#else
            add3dArray(       3*offset,    sA[threadIdx.x].force, cAmoebaSim.pWorkArray_3_1 );
Mark Friedrichs's avatar
Mark Friedrichs committed
402
403
#endif

404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
#endif

#ifndef INCLUDE_TORQUE
            zeroKirkwoodParticleSharedField( &localParticle );
            zeroKirkwoodParticleSharedField( &(sA[threadIdx.x]) );
#endif
            for (unsigned int j = 0; j < GRID; j++){
                if( atomI < cSim.atoms && (y+tj) < cSim.atoms ){
/*
                    //calculateKirkwoodPairIxn_kernel( localParticle, psA[tj], 1.0f, &energySum);
#ifdef INCLUDE_TORQUE

                    calculateKirkwoodPairIxnT1_kernel( localParticle, psA[tj] );
                    calculateKirkwoodPairIxnT2_kernel( localParticle, psA[tj] );
                    calculateKirkwoodPairIxnT1_kernel( psA[tj], localParticle );
                    calculateKirkwoodPairIxnT2_kernel( psA[tj], localParticle );
#else

                    float torque[3];
                    calculateKirkwoodPairIxnT1_kernel( localParticle, psA[tj], torque );
                    calculateKirkwoodPairIxnT2_kernel( localParticle, psA[tj], torque );
Mark Friedrichs's avatar
Mark Friedrichs committed
425

426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
                    localParticle.force[0]  += torque[0];
                    localParticle.force[1]  += torque[1];
                    localParticle.force[2]  += torque[2];

                    calculateKirkwoodPairIxnT1_kernel( psA[tj], localParticle, torque );
                    calculateKirkwoodPairIxnT2_kernel( psA[tj], localParticle, torque );

                    psA[tj].force[0]        += torque[0];
                    psA[tj].force[1]        += torque[1];
                    psA[tj].force[2]        += torque[2];

#endif
*/

                    calculateKirkwoodPairIxnB1B2_kernel( localParticle, psA[tj] );
                
                }
                tj  = (tj + 1) & (GRID - 1);
Mark Friedrichs's avatar
Mark Friedrichs committed
444
445
446
447
448
449
450
            }

            // Write results

#ifdef USE_OUTPUT_BUFFER_PER_WARP

            float of;
451
            offset                              = x + tgx + warp*cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
452

453
            of                                  = cAmoebaSim.pWorkArray_1_1[offset];
454
            of                                 += localParticle.dBornRadius;
455
            cAmoebaSim.pWorkArray_1_1[offset]   = of;
Mark Friedrichs's avatar
Mark Friedrichs committed
456

457
            of                                  = cAmoebaSim.pWorkArray_1_2[offset];
458
            of                                 += 0.5f*localParticle.dBornRadiusPolar;
459
            cAmoebaSim.pWorkArray_1_2[offset]   = of;
Mark Friedrichs's avatar
Mark Friedrichs committed
460

461
462
463
            //add3dArrayToFloat4( offset, localParticle.force,  cSim.pForce4 );
/*         
#ifdef INCLUDE_TORQUE
464
            add3dArray(       3*offset, localParticle.torque, cAmoebaSim.pWorkArray_3_1 );
465
466
467
468
#else
            add3dArray(       3*offset, localParticle.force, cAmoebaSim.pWorkArray_3_1 );
#endif
*/
Mark Friedrichs's avatar
Mark Friedrichs committed
469

470
            offset                              = y + tgx + warp*cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
471

472
            of                                  = cAmoebaSim.pWorkArray_1_1[offset];
473
            of                                 += sA[threadIdx.x].dBornRadius;
474
            cAmoebaSim.pWorkArray_1_1[offset]   = of;
Mark Friedrichs's avatar
Mark Friedrichs committed
475

476
            of                                  = cAmoebaSim.pWorkArray_1_2[offset];
477
            of                                 += 0.5f*sA[threadIdx.x].dBornRadiusPolar;
478
            cAmoebaSim.pWorkArray_1_2[offset]   = of;
Mark Friedrichs's avatar
Mark Friedrichs committed
479

480
481
482
            //add3dArrayToFloat4(   offset, sA[threadIdx.x].force,  cSim.pForce4 );
/*
#ifdef INCLUDE_TORQUE
483
            add3dArray(         3*offset, sA[threadIdx.x].torque, cAmoebaSim.pWorkArray_3_1 );
Mark Friedrichs's avatar
Mark Friedrichs committed
484
#else
485
486
487
            add3dArray(         3*offset, sA[threadIdx.x].force, cAmoebaSim.pWorkArray_3_1 );
#endif
*/
Mark Friedrichs's avatar
Mark Friedrichs committed
488

489
#else
Mark Friedrichs's avatar
Mark Friedrichs committed
490

491
492
493
494
495
496
497
498
            offset                              = x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms;

            cAmoebaSim.pWorkArray_1_1[offset]   = localParticle.dBornRadius;
            cAmoebaSim.pWorkArray_1_2[offset]   = 0.5f*localParticle.dBornRadiusPolar;

/*
            //add3dArrayToFloat4(   offset, localParticle.force,  cSim.pForce4 );
#ifdef INCLUDE_TORQUE
499
            add3dArray(         3*offset, localParticle.torque, cAmoebaSim.pWorkArray_3_1 );
500
501
502
503
#else
            add3dArray(         3*offset, localParticle.force, cAmoebaSim.pWorkArray_3_1 );
#endif
*/
Mark Friedrichs's avatar
Mark Friedrichs committed
504

505
            offset                              = y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
506

507
            cAmoebaSim.pWorkArray_1_1[offset]   = sA[threadIdx.x].dBornRadius;
508
            cAmoebaSim.pWorkArray_1_2[offset]   = 0.5f*sA[threadIdx.x].dBornRadiusPolar;
Mark Friedrichs's avatar
Mark Friedrichs committed
509

510
511
512
            //add3dArrayToFloat4( offset,    sA[threadIdx.x].force,  cSim.pForce4 );
/*            
#ifdef INCLUDE_TORQUE
513
            add3dArray(       3*offset,    sA[threadIdx.x].torque, cAmoebaSim.pWorkArray_3_1 );
514
515
516
517
#else
            add3dArray(       3*offset,    sA[threadIdx.x].force, cAmoebaSim.pWorkArray_3_1 );
#endif
*/
Mark Friedrichs's avatar
Mark Friedrichs committed
518
519

#endif
520
521


Mark Friedrichs's avatar
Mark Friedrichs committed
522
523
524
525
            lasty = y;
        }
        pos++;
    }
526
    cSim.pEnergy[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*energySum;
Mark Friedrichs's avatar
Mark Friedrichs committed
527
}