kCalculateAmoebaCudaElectrostatic.h 28.7 KB
Newer Older
Mark Friedrichs's avatar
Mark Friedrichs committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2009 Stanford University and the Authors.           *
 * Authors: Scott Le Grand, Peter Eastman                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

#include "amoebaScaleFactors.h"

__global__ 
#if (__CUDA_ARCH__ >= 200)
Peter Eastman's avatar
Peter Eastman committed
31
__launch_bounds__(384, 1)
32
#elif (__CUDA_ARCH__ >= 120)
Peter Eastman's avatar
Peter Eastman committed
33
__launch_bounds__(128, 1)
Mark Friedrichs's avatar
Mark Friedrichs committed
34
#else
Peter Eastman's avatar
Peter Eastman committed
35
__launch_bounds__(64, 1)
Mark Friedrichs's avatar
Mark Friedrichs committed
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#endif
void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
                            unsigned int* workUnit,
                            float4* atomCoord,
                            float* labFrameDipole,
                            float* labFrameQuadrupole,
                            float* inducedDipole,
                            float* inducedDipolePolar,
                            float* outputTorque

#ifdef AMOEBA_DEBUG
                           , float4* debugArray, unsigned int targetAtom
#endif
){

#ifdef AMOEBA_DEBUG
    float4 pullBack[20];
#endif

    extern __shared__ ElectrostaticParticle sA[];

    unsigned int totalWarps      = gridDim.x*blockDim.x/GRID;
    unsigned int warp            = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
    unsigned int numWorkUnits    = cSim.pInteractionCount[0];
    unsigned int pos             = warp*numWorkUnits/totalWarps;
    unsigned int end             = (warp+1)*numWorkUnits/totalWarps;
    unsigned int lasty           = 0xFFFFFFFF;
    float totalEnergy            = 0.0f;     
64
    float conversionFactor       = (cAmoebaSim.electric/cAmoebaSim.dielec);
Mark Friedrichs's avatar
Mark Friedrichs committed
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
    float scalingFactors[LastScalingIndex];

    while (pos < end)
    {

        unsigned int x;
        unsigned int y;
        bool bExclusionFlag;

        // Extract cell coordinates

        decodeCell( workUnit[pos], &x, &y, &bExclusionFlag );

        unsigned int tgx              = threadIdx.x & (GRID - 1);
        unsigned int tbx              = threadIdx.x - tgx;
        unsigned int tj               = tgx;

        ElectrostaticParticle* psA    = &sA[tbx];
        unsigned int atomI            = x + tgx;
84
85
86
87
        ElectrostaticParticle localParticle;
        loadElectrostaticShared(&localParticle, atomI,
                                atomCoord, labFrameDipole, labFrameQuadrupole,
                                inducedDipole, inducedDipolePolar, cAmoebaSim.pDampingFactorAndThole );
Mark Friedrichs's avatar
Mark Friedrichs committed
88

89
90
91
        localParticle.force[0]        = 0.0f;
        localParticle.force[1]        = 0.0f;
        localParticle.force[2]        = 0.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
92

93
94
95
        localParticle.torque[0]       = 0.0f;
        localParticle.torque[1]       = 0.0f;
        localParticle.torque[2]       = 0.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119

        scalingFactors[PScaleIndex]   = 1.0f;
        scalingFactors[DScaleIndex]   = 1.0f;
        scalingFactors[UScaleIndex]   = 1.0f;
        scalingFactors[MScaleIndex]   = 1.0f;

        if (x == y) // Handle diagonals uniquely at 50% efficiency
        {

            // load shared data

           loadElectrostaticShared( &(sA[threadIdx.x]), atomI,
                                    atomCoord, labFrameDipole, labFrameQuadrupole,
                                    inducedDipole, inducedDipolePolar, cAmoebaSim.pDampingFactorAndThole );

            if (!bExclusionFlag)
            {

                // this branch is never exercised since it includes the
                // interaction between atomI and itself which is always excluded

                for (unsigned int j = 0; j < GRID; j++)
                {

120
121
                    float4 force;
                    float4 torque[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
122

123
                    calculateElectrostaticPairIxn_kernel( localParticle, psA[j], scalingFactors, &force, torque
Mark Friedrichs's avatar
Mark Friedrichs committed
124
125
126
127
128
#ifdef AMOEBA_DEBUG
, pullBack
#endif
 );

129
                    if( (atomI != (y + j)) && (atomI < cSim.atoms) && ((y+j) < cSim.atoms) ){
Mark Friedrichs's avatar
Mark Friedrichs committed
130

131
132
133
                         localParticle.force[0]            += force.x;
                         localParticle.force[1]            += force.y;
                         localParticle.force[2]            += force.z;
Mark Friedrichs's avatar
Mark Friedrichs committed
134

135
136
137
138
139
140
141
                         totalEnergy                       += 0.5f*force.w;
     
                         localParticle.torque[0]           += torque[0].x;
                         localParticle.torque[1]           += torque[0].y;
                         localParticle.torque[2]           += torque[0].z;
      
                    }
Mark Friedrichs's avatar
Mark Friedrichs committed
142
143
144
145
146
147
                }

            }
            else  // bExclusion
            {
                unsigned int xi       = x >> GRIDBITS;
148
                unsigned int cell     = xi + xi*cSim.paddedNumberOfAtoms/GRID-xi*(xi+1)/2;
Mark Friedrichs's avatar
Mark Friedrichs committed
149
150
151
152
153
154
155
                int  dScaleMask       = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                int2 pScaleMask       = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                int2 mScaleMask       = cAmoebaSim.pM_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];

                for (unsigned int j = 0; j < GRID; j++)
                {

156
157
                    float4 force;
                    float4 torque[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
158
159
160
161
162
163
164
165
166
167
168

                    unsigned int atomJ = y + j;

                    // set scale factors

                    getMaskedDScaleFactor( j, dScaleMask, scalingFactors + DScaleIndex );
                    getMaskedPScaleFactor( j, pScaleMask, scalingFactors + PScaleIndex );
                    getMaskedMScaleFactor( j, mScaleMask, scalingFactors + MScaleIndex );

                    // force

169
                    calculateElectrostaticPairIxn_kernel( localParticle, psA[j], scalingFactors, &force, torque
Mark Friedrichs's avatar
Mark Friedrichs committed
170
171
172
173
174
175
176
177
#ifdef AMOEBA_DEBUG
, pullBack
#endif
 );

                    // nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
                    // by setting match flag

178
                    if( (atomI != atomJ) && (atomI < cSim.atoms) && (atomJ < cSim.atoms) ){
Mark Friedrichs's avatar
Mark Friedrichs committed
179

180
181
182
                        localParticle.force[0]            += force.x;
                        localParticle.force[1]            += force.y;
                        localParticle.force[2]            += force.z;
Mark Friedrichs's avatar
Mark Friedrichs committed
183

184
                        totalEnergy                       += 0.5f*force.w;
Mark Friedrichs's avatar
Mark Friedrichs committed
185

186
187
188
189
                        localParticle.torque[0]           += torque[0].x;
                        localParticle.torque[1]           += torque[0].y;
                        localParticle.torque[2]           += torque[0].z;
      
Mark Friedrichs's avatar
Mark Friedrichs committed
190

191
192
                    }
    
Mark Friedrichs's avatar
Mark Friedrichs committed
193
194
195
196
197
198
199
200
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom ){
            unsigned int index                 = (atomI == targetAtom) ? atomJ : atomI;

            debugArray[index].x                = (float) atomI;
            debugArray[index].y                = (float) atomJ;
            debugArray[index].z                = 1.0f;
            debugArray[index].w                = (float) y;
201

202
203
            unsigned int mask                  = (atomI != atomJ) && (atomI < cSim.atoms) && (atomJ < cSim.atoms) ? 1 : 0;
            index                             += cSim.paddedNumberOfAtoms;
204
205
206
207
208
209
            debugArray[index].x                = mask ? conversionFactor*force.x : 0.0f;
            debugArray[index].y                = mask ? conversionFactor*force.y : 0.0f;
            debugArray[index].z                = mask ? conversionFactor*force.z : 0.0f;
            debugArray[index].w                = -1.1f;


210
            index                             += cSim.paddedNumberOfAtoms;
211
212
213
214
            debugArray[index].x                = mask ? conversionFactor*torque[0].x : 0.0f;
            debugArray[index].y                = mask ? conversionFactor*torque[0].y : 0.0f;
            debugArray[index].z                = mask ? conversionFactor*torque[0].z : 0.0f;
            debugArray[index].w                = -2.1f;
Mark Friedrichs's avatar
Mark Friedrichs committed
215
216


217
            index                             += cSim.paddedNumberOfAtoms;
218
219
220
221
222
            debugArray[index].x                = mask ? conversionFactor*torque[1].x : 0.0f;
            debugArray[index].y                = mask ? conversionFactor*torque[1].y : 0.0f;
            debugArray[index].z                = mask ? conversionFactor*torque[1].z : 0.0f;
            debugArray[index].w                = -2.2f;

Mark Friedrichs's avatar
Mark Friedrichs committed
223

224
            index                             += cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
225
226
227
228
229
230
            int pullIndex                      = 0;
            debugArray[index].x                = pullBack[pullIndex].x;
            debugArray[index].y                = pullBack[pullIndex].y;
            debugArray[index].z                = pullBack[pullIndex].z;
            debugArray[index].w                = pullBack[pullIndex].w;

231
            index                             += cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
232
            pullIndex++;
233
234
235
            debugArray[index].x                = conversionFactor*pullBack[pullIndex].x;
            debugArray[index].y                = conversionFactor*pullBack[pullIndex].y;
            debugArray[index].z                = conversionFactor*pullBack[pullIndex].z;
Mark Friedrichs's avatar
Mark Friedrichs committed
236
237
            debugArray[index].w                = pullBack[pullIndex].w;

238
            index                             += cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
239
            pullIndex++;
240
241
242
            debugArray[index].x                = conversionFactor*pullBack[pullIndex].x;
            debugArray[index].y                = conversionFactor*pullBack[pullIndex].y;
            debugArray[index].z                = conversionFactor*pullBack[pullIndex].z;
Mark Friedrichs's avatar
Mark Friedrichs committed
243
244
            debugArray[index].w                = pullBack[pullIndex].w;

245
            index                             += cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
246
            pullIndex++;
247
248
249
            debugArray[index].x                = conversionFactor*pullBack[pullIndex].x;
            debugArray[index].y                = conversionFactor*pullBack[pullIndex].y;
            debugArray[index].z                = conversionFactor*pullBack[pullIndex].z;
Mark Friedrichs's avatar
Mark Friedrichs committed
250
251
252
253
254
255
256
257
258
259
            debugArray[index].w                = pullBack[pullIndex].w;

}
#endif

                }
            }

            // Write results

260
261
262
263
264
265
266
267
           localParticle.force[0]  *= conversionFactor;
           localParticle.force[1]  *= conversionFactor;
           localParticle.force[2]  *= conversionFactor;

           localParticle.torque[0] *= conversionFactor;
           localParticle.torque[1] *= conversionFactor;
           localParticle.torque[2] *= conversionFactor;

Mark Friedrichs's avatar
Mark Friedrichs committed
268
269
#ifdef USE_OUTPUT_BUFFER_PER_WARP

270
271
272
            unsigned int offset                 = (x + tgx + warp*cSim.paddedNumberOfAtoms);
            add3dArrayToFloat4( offset, localParticle.force, cSim.pForce4 );
            add3dArray( 3*offset, localParticle.torque, outputTorque );
Mark Friedrichs's avatar
Mark Friedrichs committed
273
274

#else
275
276
277
            unsigned int offset                 = (x + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
            add3dArrayToFloat4( offset, localParticle.force, cSim.pForce4 );
            load3dArray( 3*offset, localParticle.torque, outputTorque );
Mark Friedrichs's avatar
Mark Friedrichs committed
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
#endif

        }
        else        // 100% utilization
        {
            // Read fixed atom data into registers and GRF

            if (lasty != y)
            {
                // load shared data

               loadElectrostaticShared( &(sA[threadIdx.x]), (y+tgx),
                                        atomCoord, labFrameDipole, labFrameQuadrupole,
                                        inducedDipole, inducedDipolePolar, cAmoebaSim.pDampingFactorAndThole );

            }

295
296
297
            sA[threadIdx.x].force[0]     = 0.0f;
            sA[threadIdx.x].force[1]     = 0.0f;
            sA[threadIdx.x].force[2]     = 0.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
298

299
300
301
            sA[threadIdx.x].torque[0]    = 0.0f;
            sA[threadIdx.x].torque[1]    = 0.0f;
            sA[threadIdx.x].torque[2]    = 0.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
302
303
304
305
306
307

            if (!bExclusionFlag)
            {
                for (unsigned int j = 0; j < GRID; j++)
                {

308
309
                    float4 force;
                    float4 torque[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
310
311
312

                    unsigned int atomJ = y + tj;

313
                    calculateElectrostaticPairIxn_kernel( localParticle, psA[tj], scalingFactors, &force, torque
Mark Friedrichs's avatar
Mark Friedrichs committed
314
315
316
317
318
#ifdef AMOEBA_DEBUG
, pullBack
#endif
 );
       
319
                    if( (atomI < cSim.atoms) && (atomJ < cSim.atoms) ){ 
Mark Friedrichs's avatar
Mark Friedrichs committed
320

321
                        // add force and torque to atom I due atom J
Mark Friedrichs's avatar
Mark Friedrichs committed
322
           
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
                        localParticle.force[0]         += force.x;
                        localParticle.force[1]         += force.y;
                        localParticle.force[2]         += force.z;

                        localParticle.torque[0]        += torque[0].x;
                        localParticle.torque[1]        += torque[0].y;
                        localParticle.torque[2]        += torque[0].z;
    
                        totalEnergy                    += force.w;
               
                        // add force and torque to atom J due atom I
           
                        psA[tj].force[0]               -= force.x;
                        psA[tj].force[1]               -= force.y;
                        psA[tj].force[2]               -= force.z;
           
                        psA[tj].torque[0]              += torque[1].x;
                        psA[tj].torque[1]              += torque[1].y;
                        psA[tj].torque[2]              += torque[1].z;
                    }
Mark Friedrichs's avatar
Mark Friedrichs committed
343
344
345
346
347
348
349
350
351
352
353
354
355
       
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom  || atomJ == targetAtom ){
        unsigned int index                 = (atomI == targetAtom) ? atomJ : atomI;
        unsigned int indexI                = (atomI == targetAtom) ? 0 : 1;
        unsigned int indexJ                = (atomI == targetAtom) ? 1 : 0;
        float forceSign                    = (atomI == targetAtom) ? 1.0f : -1.0f;

        debugArray[index].x                = (float) atomI;
        debugArray[index].y                = (float) atomJ;
        debugArray[index].z                = 2.0f;
        debugArray[index].w                = (float) y;

356
357
        unsigned int mask                  = (atomI < cSim.atoms) && (atomJ < cSim.atoms) ? 1 : 0;
        index                             += cSim.paddedNumberOfAtoms;
358
359
360
361
        debugArray[index].x                = mask ? conversionFactor*forceSign*force.x : 0.0f;
        debugArray[index].y                = mask ? conversionFactor*forceSign*force.y : 0.0f;
        debugArray[index].z                = mask ? conversionFactor*forceSign*force.z : 0.0f;
        debugArray[index].w                = -3.1f;
Mark Friedrichs's avatar
Mark Friedrichs committed
362

363
        index                             += cSim.paddedNumberOfAtoms;
364
365
366
367
        debugArray[index].x                = mask ? conversionFactor*torque[indexI].x  : 0.0f;
        debugArray[index].y                = mask ? conversionFactor*torque[indexI].y  : 0.0f;
        debugArray[index].z                = mask ? conversionFactor*torque[indexI].z  : 0.0f;
        debugArray[index].w                = -4.1f;
Mark Friedrichs's avatar
Mark Friedrichs committed
368

369
        index                             += cSim.paddedNumberOfAtoms;
370
371
372
373
374
        debugArray[index].x                = mask ? conversionFactor*torque[indexJ].x  : 0.0f;
        debugArray[index].y                = mask ? conversionFactor*torque[indexJ].y  : 0.0f;
        debugArray[index].z                = mask ? conversionFactor*torque[indexJ].z  : 0.0f;
        debugArray[index].w                = -5.1f;

375
        index                             += cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
376
377
378
379
380
381
        int pullIndex                      = 0;
        debugArray[index].x                = pullBack[pullIndex].x;
        debugArray[index].y                = pullBack[pullIndex].y;
        debugArray[index].z                = pullBack[pullIndex].z;
        debugArray[index].w                = pullBack[pullIndex].w;

382
        index                             += cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
383
        pullIndex++;
384
385
386
387
        debugArray[index].x                = conversionFactor*pullBack[pullIndex].x;
        debugArray[index].y                = conversionFactor*pullBack[pullIndex].y;
        debugArray[index].z                = conversionFactor*pullBack[pullIndex].z;
        debugArray[index].w                = conversionFactor*pullBack[pullIndex].w;
Mark Friedrichs's avatar
Mark Friedrichs committed
388

389
        index                             += cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
390
        pullIndex++;
391
392
393
394
        debugArray[index].x                = conversionFactor*pullBack[pullIndex].x;
        debugArray[index].y                = conversionFactor*pullBack[pullIndex].y;
        debugArray[index].z                = conversionFactor*pullBack[pullIndex].z;
        debugArray[index].w                = conversionFactor*pullBack[pullIndex].w;
Mark Friedrichs's avatar
Mark Friedrichs committed
395
       
396
        index                             += cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
        pullIndex++;
        debugArray[index].x                = pullBack[pullIndex].x;
        debugArray[index].y                = pullBack[pullIndex].y;
        debugArray[index].z                = pullBack[pullIndex].z;
        debugArray[index].w                = pullBack[pullIndex].w;
       
}
#endif
       
                           tj                  = (tj + 1) & (GRID - 1);
       
                       }
                   }
                   else  // bExclusion
                   {
                       // Read fixed atom data into registers and GRF
       
                       unsigned int xi   = x >> GRIDBITS;
                       unsigned int yi   = y >> GRIDBITS;
416
                       unsigned int cell = xi+yi*cSim.paddedNumberOfAtoms/GRID-yi*(yi+1)/2;
Mark Friedrichs's avatar
Mark Friedrichs committed
417
418
419
420
421
422
423
                       int  dScaleMask   = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                       int2 pScaleMask   = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                       int2 mScaleMask   = cAmoebaSim.pM_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
       
                       for (unsigned int j = 0; j < GRID; j++)
                       {
       
424
425
                           float4 force;
                           float4 torque[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
426
427
428
429
430
431
432
433
434
435
436
       
                           unsigned int atomJ = y + tj;
       
                           // set scale factors
       
                           getMaskedDScaleFactor( tj, dScaleMask, scalingFactors + DScaleIndex );
                           getMaskedPScaleFactor( tj, pScaleMask, scalingFactors + PScaleIndex );
                           getMaskedMScaleFactor( tj, mScaleMask, scalingFactors + MScaleIndex );
       
                           // force
       
437
                           calculateElectrostaticPairIxn_kernel( localParticle, psA[tj], scalingFactors, &force, torque
Mark Friedrichs's avatar
Mark Friedrichs committed
438
439
440
441
442
443
444
#ifdef AMOEBA_DEBUG
, pullBack
#endif
 );

                    // check if atoms out-of-bounds

445
                    if( (atomI < cSim.atoms) && (atomJ < cSim.atoms) ){
Mark Friedrichs's avatar
Mark Friedrichs committed
446

447
                        // add force and torque to atom I due atom J
Mark Friedrichs's avatar
Mark Friedrichs committed
448
    
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
                        localParticle.force[0]         += force.x;
                        localParticle.force[1]         += force.y;
                        localParticle.force[2]         += force.z;
    
                        localParticle.torque[0]        += torque[0].x;
                        localParticle.torque[1]        += torque[0].y;
                        localParticle.torque[2]        += torque[0].z;
        
                        totalEnergy                    += force.w;
    
                        // add force and torque to atom J due atom I
    
                        psA[tj].force[0]               -= force.x;
                        psA[tj].force[1]               -= force.y;
                        psA[tj].force[2]               -= force.z;
    
                        psA[tj].torque[0]              += torque[1].x;
                        psA[tj].torque[1]              += torque[1].y;
                        psA[tj].torque[2]              += torque[1].z;
    
                    }
Mark Friedrichs's avatar
Mark Friedrichs committed
470
471
472

#ifdef AMOEBA_DEBUG
if( atomI == targetAtom  || atomJ == targetAtom ){
473
474
475
476
        unsigned int index                 = (atomI == targetAtom) ? atomJ : atomI;
        unsigned int indexI                = (atomI == targetAtom) ? 0 : 1;
        unsigned int indexJ                = (atomI == targetAtom) ? 1 : 0;
        float forceSign                    = (atomI == targetAtom) ? 1.0f : -1.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
477

478
479
480
481
482
        debugArray[index].x                   = (float) atomI;
        debugArray[index].y                   = (float) atomJ;
        debugArray[index].z                   = 3.0f;
        debugArray[index].w                   = (float) y;

483
484
        unsigned int mask                     = (atomI < cSim.atoms) && (atomJ < cSim.atoms) ? 1 : 0;
        index                                += cSim.paddedNumberOfAtoms;
485
486
487
488
489
        debugArray[index].x                   = mask ? conversionFactor*forceSign*force.x : 0.0f;
        debugArray[index].y                   = mask ? conversionFactor*forceSign*force.y : 0.0f;
        debugArray[index].z                   = mask ? conversionFactor*forceSign*force.z : 0.0f;
        debugArray[index].w                   = -6.1f;

490
        index                                += cSim.paddedNumberOfAtoms;
491
492
493
494
495
        debugArray[index].x                   = mask ? conversionFactor*torque[indexI].x  : 0.0f;
        debugArray[index].y                   = mask ? conversionFactor*torque[indexI].y  : 0.0f;
        debugArray[index].z                   = mask ? conversionFactor*torque[indexI].z  : 0.0f;
        debugArray[index].w                   = -7.1f;

496
        index                                += cSim.paddedNumberOfAtoms;
497
498
499
500
501
        debugArray[index].x                   = mask ? conversionFactor*torque[indexJ].x  : 0.0f;
        debugArray[index].y                   = mask ? conversionFactor*torque[indexJ].y  : 0.0f;
        debugArray[index].z                   = mask ? conversionFactor*torque[indexJ].z  : 0.0f;
        debugArray[index].w                   = -8.1f;

502
        index                                += cSim.paddedNumberOfAtoms;
503
504
505
506
507
508
        int pullIndex                         = 0;
        debugArray[index].x                   = conversionFactor*pullBack[pullIndex].x;
        debugArray[index].y                   = conversionFactor*pullBack[pullIndex].y;
        debugArray[index].z                   = conversionFactor*pullBack[pullIndex].z;
        debugArray[index].w                   = conversionFactor*pullBack[pullIndex].w;

509
        index                                += cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
510
        pullIndex++;
511
512
513
514
        debugArray[index].x                   = conversionFactor*pullBack[pullIndex].x;
        debugArray[index].y                   = conversionFactor*pullBack[pullIndex].y;
        debugArray[index].z                   = conversionFactor*pullBack[pullIndex].z;
        debugArray[index].w                   = conversionFactor*pullBack[pullIndex].w;
Mark Friedrichs's avatar
Mark Friedrichs committed
515

516
        index                                += cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
517
        pullIndex++;
518
519
520
521
        debugArray[index].x                   = conversionFactor*pullBack[pullIndex].x;
        debugArray[index].y                   = conversionFactor*pullBack[pullIndex].y;
        debugArray[index].z                   = conversionFactor*pullBack[pullIndex].z;
        debugArray[index].w                   = conversionFactor*pullBack[pullIndex].w;
Mark Friedrichs's avatar
Mark Friedrichs committed
522
       
523
        index                                += cSim.paddedNumberOfAtoms;
Mark Friedrichs's avatar
Mark Friedrichs committed
524
        pullIndex++;
525
526
527
528
        debugArray[index].x                   = pullBack[pullIndex].x;
        debugArray[index].y                   = pullBack[pullIndex].y;
        debugArray[index].z                   = pullBack[pullIndex].z;
        debugArray[index].w                   = pullBack[pullIndex].w;
Mark Friedrichs's avatar
Mark Friedrichs committed
529
530
       
#if 0
531
            index                                += cSim.paddedNumberOfAtoms;
532
533
534
535
536
            debugArray[index].x                   = scalingFactors[DScaleIndex];
            debugArray[index].y                   = dScaleVal;
            debugArray[index].z                   = scalingFactors[PScaleIndex];
            debugArray[index].w                   = pScaleVal;

537
            index                                += cSim.paddedNumberOfAtoms;
538
539
540
            debugArray[index].x                   = scalingFactors[MScaleIndex];
            debugArray[index].y                   = mScaleVal;
            for( int pIndex    = 0; pIndex < 14; pIndex++ ){
541
                index                                += cSim.paddedNumberOfAtoms;
542
543
544
545
                debugArray[index].x                   = pullBack[pIndex].x;
                debugArray[index].y                   = pullBack[pIndex].y;
                debugArray[index].z                   = pullBack[pIndex].z;
                debugArray[index].w                   = pullBack[pIndex].w;
Mark Friedrichs's avatar
Mark Friedrichs committed
546
547
            }

548
            index                                += cSim.paddedNumberOfAtoms;
549
550
551
552
553
            debugArray[index].x                   = labFrameDipole[3*atomI];
            debugArray[index].y                   = labFrameDipole[3*atomI+1];
            debugArray[index].z                   = labFrameDipole[3*atomI+2];
            debugArray[index].w                   = 25.0f;

554
            index                                += cSim.paddedNumberOfAtoms;
555
556
557
558
559
            debugArray[index].x                   = labFrameDipole[3*atomJ];
            debugArray[index].y                   = labFrameDipole[3*atomJ+1];
            debugArray[index].z                   = labFrameDipole[3*atomJ+2];
            debugArray[index].w                   = 26.0f;

560
            index                                += cSim.paddedNumberOfAtoms;
561
562
563
564
            debugArray[index].x                   = jDipole[0];
            debugArray[index].y                   = jDipole[1];
            debugArray[index].z                   = jDipole[2];
            debugArray[index].w                   = 27.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
565
566
567
568
569
570
571
#endif


}
#endif


572
                    tj                     = (tj + 1) & (GRID - 1);
Mark Friedrichs's avatar
Mark Friedrichs committed
573
574
575
576
577
                }
            }

            // Write results

578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
           localParticle.force[0]     *= conversionFactor;
           localParticle.force[1]     *= conversionFactor;
           localParticle.force[2]     *= conversionFactor;

           localParticle.torque[0]    *= conversionFactor;
           localParticle.torque[1]    *= conversionFactor;
           localParticle.torque[2]    *= conversionFactor;

           sA[threadIdx.x].force[0]   *= conversionFactor;
           sA[threadIdx.x].force[1]   *= conversionFactor;
           sA[threadIdx.x].force[2]   *= conversionFactor;

           sA[threadIdx.x].torque[0]  *= conversionFactor;
           sA[threadIdx.x].torque[1]  *= conversionFactor;
           sA[threadIdx.x].torque[2]  *= conversionFactor;

Mark Friedrichs's avatar
Mark Friedrichs committed
594
595
#ifdef USE_OUTPUT_BUFFER_PER_WARP

596
597
598
            unsigned int offset                 = (x + tgx + warp*cSim.paddedNumberOfAtoms);
            add3dArrayToFloat4(   offset, localParticle.force,   cSim.pForce4 );
            add3dArray( 3*offset, localParticle.torque,  outputTorque );
Mark Friedrichs's avatar
Mark Friedrichs committed
599

600
601
602
            offset                              = (y + tgx + warp*cSim.paddedNumberOfAtoms);
            add3dArrayToFloat4(   offset, sA[threadIdx.x].force,   cSim.pForce4 );
            add3dArray( 3*offset, sA[threadIdx.x].torque,  outputTorque );
Mark Friedrichs's avatar
Mark Friedrichs committed
603
604

#else
605
606
607
            unsigned int offset                 = (x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms);
            add3dArrayToFloat4(   offset, localParticle.force,   cSim.pForce4 );
            load3dArray(         3*offset, localParticle.torque, outputTorque );
Mark Friedrichs's avatar
Mark Friedrichs committed
608

609
610
611
            offset                              = (y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
            add3dArrayToFloat4( offset, sA[threadIdx.x].force,  cSim.pForce4 );
            load3dArray(       3*offset, sA[threadIdx.x].torque, outputTorque );
Mark Friedrichs's avatar
Mark Friedrichs committed
612
613
614
615
616
617
618

#endif
            lasty = y;
        }

        pos++;
    }
619
    cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += (conversionFactor*totalEnergy);
Mark Friedrichs's avatar
Mark Friedrichs committed
620
}