kCalculateAmoebaCudaElectrostatic.h 13.1 KB
Newer Older
Mark Friedrichs's avatar
Mark Friedrichs committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2009 Stanford University and the Authors.           *
 * Authors: Scott Le Grand, Peter Eastman                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

#include "amoebaScaleFactors.h"

__global__ 
#if (__CUDA_ARCH__ >= 200)
31
__launch_bounds__(512, 1)
32
#elif (__CUDA_ARCH__ >= 120)
Peter Eastman's avatar
Peter Eastman committed
33
__launch_bounds__(128, 1)
Mark Friedrichs's avatar
Mark Friedrichs committed
34
#else
Peter Eastman's avatar
Peter Eastman committed
35
__launch_bounds__(64, 1)
Mark Friedrichs's avatar
Mark Friedrichs committed
36
37
#endif
void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
38
                            unsigned int* workUnit, float* outputTorque
Mark Friedrichs's avatar
Mark Friedrichs committed
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57

#ifdef AMOEBA_DEBUG
                           , float4* debugArray, unsigned int targetAtom
#endif
){

#ifdef AMOEBA_DEBUG
    float4 pullBack[20];
#endif

    extern __shared__ ElectrostaticParticle sA[];

    unsigned int totalWarps      = gridDim.x*blockDim.x/GRID;
    unsigned int warp            = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
    unsigned int numWorkUnits    = cSim.pInteractionCount[0];
    unsigned int pos             = warp*numWorkUnits/totalWarps;
    unsigned int end             = (warp+1)*numWorkUnits/totalWarps;
    unsigned int lasty           = 0xFFFFFFFF;
    float totalEnergy            = 0.0f;     
58
    float conversionFactor       = (cAmoebaSim.electric/cAmoebaSim.dielec);
Mark Friedrichs's avatar
Mark Friedrichs committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
    float scalingFactors[LastScalingIndex];

    while (pos < end)
    {

        unsigned int x;
        unsigned int y;
        bool bExclusionFlag;

        // Extract cell coordinates

        decodeCell( workUnit[pos], &x, &y, &bExclusionFlag );

        unsigned int tgx              = threadIdx.x & (GRID - 1);
        unsigned int tbx              = threadIdx.x - tgx;
        unsigned int tj               = tgx;

        ElectrostaticParticle* psA    = &sA[tbx];
        unsigned int atomI            = x + tgx;
78
        ElectrostaticParticle localParticle;
79
80
        loadElectrostaticParticle( &localParticle, atomI );
        zeroElectrostaticParticle( &localParticle );
Mark Friedrichs's avatar
Mark Friedrichs committed
81
82
83
84
85
86
87
88
89
90
91

        scalingFactors[PScaleIndex]   = 1.0f;
        scalingFactors[DScaleIndex]   = 1.0f;
        scalingFactors[UScaleIndex]   = 1.0f;
        scalingFactors[MScaleIndex]   = 1.0f;

        if (x == y) // Handle diagonals uniquely at 50% efficiency
        {

            // load shared data

92
93
94
95
96
97
           loadElectrostaticParticle( &(sA[threadIdx.x]), atomI );
            unsigned int xi       = x >> GRIDBITS;
            unsigned int cell     = xi + xi*cSim.paddedNumberOfAtoms/GRID-xi*(xi+1)/2;
            int  dScaleMask       = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
            int2 pScaleMask       = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
            int2 mScaleMask       = cAmoebaSim.pM_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
Mark Friedrichs's avatar
Mark Friedrichs committed
98

99
            for (unsigned int j = 0; j < GRID; j++)
Mark Friedrichs's avatar
Mark Friedrichs committed
100
101
            {

102
103
                unsigned int atomJ = y + j;
                if( (atomI != atomJ) && (atomI < cSim.atoms) && (atomJ < cSim.atoms) ){
Mark Friedrichs's avatar
Mark Friedrichs committed
104
105
106
107

                    getMaskedDScaleFactor( j, dScaleMask, scalingFactors + DScaleIndex );
                    getMaskedPScaleFactor( j, pScaleMask, scalingFactors + PScaleIndex );
                    getMaskedMScaleFactor( j, mScaleMask, scalingFactors + MScaleIndex );
108
    
109
110
111
                    float force[3];
                    float energy;
                    calculateElectrostaticPairIxnF1_kernel( localParticle, psA[j], scalingFactors, &energy, force);
Mark Friedrichs's avatar
Mark Friedrichs committed
112

113
114
115
116
                    localParticle.force[0]            += force[0];
                    localParticle.force[1]            += force[1];
                    localParticle.force[2]            += force[2];
                    totalEnergy                       += 0.5f*energy;
Mark Friedrichs's avatar
Mark Friedrichs committed
117
118
119
120
121
122

                }
            }

            // Write results

123
124
125
            localParticle.force[0]  *= conversionFactor;
            localParticle.force[1]  *= conversionFactor;
            localParticle.force[2]  *= conversionFactor;
126

Mark Friedrichs's avatar
Mark Friedrichs committed
127
#ifdef USE_OUTPUT_BUFFER_PER_WARP
128
            unsigned int offset      = (x + tgx + warp*cSim.paddedNumberOfAtoms);
Mark Friedrichs's avatar
Mark Friedrichs committed
129
#else
130
            unsigned int offset      = (x + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
Mark Friedrichs's avatar
Mark Friedrichs committed
131
#endif
132
            add3dArrayToFloat4( offset, localParticle.force, cSim.pForce4 );
Mark Friedrichs's avatar
Mark Friedrichs committed
133

134
135
            zeroElectrostaticParticle( &localParticle );
            for (unsigned int j = 0; j < GRID; j++)
Mark Friedrichs's avatar
Mark Friedrichs committed
136
137
            {

138
139
                unsigned int atomJ = y + j;
                if( (atomI != atomJ) && (atomI < cSim.atoms) && (atomJ < cSim.atoms) ){
Mark Friedrichs's avatar
Mark Friedrichs committed
140

141
142
143
144
145
146
147
148
149
150
151
                    getMaskedDScaleFactor( j, dScaleMask, scalingFactors + DScaleIndex );
                    getMaskedPScaleFactor( j, pScaleMask, scalingFactors + PScaleIndex );
                    getMaskedMScaleFactor( j, mScaleMask, scalingFactors + MScaleIndex );
    
                    float force[3];
                    calculateElectrostaticPairIxnT1_kernel( localParticle, psA[j], scalingFactors, force);
                    localParticle.force[0]  += force[0];
                    localParticle.force[1]  += force[1];
                    localParticle.force[2]  += force[2];
    
                }
Mark Friedrichs's avatar
Mark Friedrichs committed
152
153
            }

154
155
156
            localParticle.force[0] *= conversionFactor;
            localParticle.force[1] *= conversionFactor;
            localParticle.force[2] *= conversionFactor;
Mark Friedrichs's avatar
Mark Friedrichs committed
157

158
159
160
161
162
163
164
#ifdef USE_OUTPUT_BUFFER_PER_WARP
            offset                 = (x + tgx + warp*cSim.paddedNumberOfAtoms);
            add3dArray( 3*offset, localParticle.force, outputTorque );
#else
            offset                 = (x + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
            load3dArray( 3*offset, localParticle.force, outputTorque );
#endif
Mark Friedrichs's avatar
Mark Friedrichs committed
165

166
        } else   {
Mark Friedrichs's avatar
Mark Friedrichs committed
167

168
            // Read fixed atom data into registers and GRF
Mark Friedrichs's avatar
Mark Friedrichs committed
169

170
171
172
            if( lasty != y ){
               loadElectrostaticParticle( &(sA[threadIdx.x]), (y+tgx) );
            }
Mark Friedrichs's avatar
Mark Friedrichs committed
173

174
            zeroElectrostaticParticle( &(sA[threadIdx.x]) );
Mark Friedrichs's avatar
Mark Friedrichs committed
175
       
176
177
178
179
180
181
182
183
184
185
186
187
            int  dScaleMask;
            int2 pScaleMask;
            int2 mScaleMask;

            if( bExclusionFlag ){
                unsigned int xi   = x >> GRIDBITS;
                unsigned int yi   = y >> GRIDBITS;
                unsigned int cell = xi+yi*cSim.paddedNumberOfAtoms/GRID-yi*(yi+1)/2;
                dScaleMask        = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                pScaleMask        = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                mScaleMask        = cAmoebaSim.pM_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
            }
Mark Friedrichs's avatar
Mark Friedrichs committed
188
       
189
            for (unsigned int j = 0; j < GRID; j++){
Mark Friedrichs's avatar
Mark Friedrichs committed
190
       
191
192
                unsigned int atomJ = y + tj;
                if( (atomI < cSim.atoms) && (atomJ < cSim.atoms) ){
Mark Friedrichs's avatar
Mark Friedrichs committed
193

194
195
196
197
198
199
200
201
202
                    if( bExclusionFlag ){
                        getMaskedDScaleFactor( tj, dScaleMask, scalingFactors + DScaleIndex );
                        getMaskedPScaleFactor( tj, pScaleMask, scalingFactors + PScaleIndex );
                        getMaskedMScaleFactor( tj, mScaleMask, scalingFactors + MScaleIndex );
                    }
           
                    float force[3];
                    float energy;
                    calculateElectrostaticPairIxnF1_kernel( localParticle, psA[tj], scalingFactors, &energy, force);
203
    
204
                    totalEnergy                       += energy;
205
    
206
207
208
                    localParticle.force[0]            += force[0];
                    localParticle.force[1]            += force[1];
                    localParticle.force[2]            += force[2];
209
    
210
211
212
213
                    psA[tj].force[0]                  -= force[0];
                    psA[tj].force[1]                  -= force[1];
                    psA[tj].force[2]                  -= force[2];
                }
Mark Friedrichs's avatar
Mark Friedrichs committed
214

215
                tj = (tj + 1) & (GRID - 1);
Mark Friedrichs's avatar
Mark Friedrichs committed
216
217
            }

218
            // Write results
Mark Friedrichs's avatar
Mark Friedrichs committed
219

220
221
222
223
224
225
226
            localParticle.force[0]     *= conversionFactor;
            localParticle.force[1]     *= conversionFactor;
            localParticle.force[2]     *= conversionFactor;
 
            sA[threadIdx.x].force[0]   *= conversionFactor;
            sA[threadIdx.x].force[1]   *= conversionFactor;
            sA[threadIdx.x].force[2]   *= conversionFactor;
Mark Friedrichs's avatar
Mark Friedrichs committed
227

228
#ifdef USE_OUTPUT_BUFFER_PER_WARP
Mark Friedrichs's avatar
Mark Friedrichs committed
229

230
231
            unsigned int offset                 = (x + tgx + warp*cSim.paddedNumberOfAtoms);
            add3dArrayToFloat4(   offset, localParticle.force,   cSim.pForce4 );
Mark Friedrichs's avatar
Mark Friedrichs committed
232

233
234
            offset                              = (y + tgx + warp*cSim.paddedNumberOfAtoms);
            add3dArrayToFloat4(   offset, sA[threadIdx.x].force,   cSim.pForce4 );
Mark Friedrichs's avatar
Mark Friedrichs committed
235

236
237
238
#else
            unsigned int offset                 = (x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms);
            add3dArrayToFloat4(   offset, localParticle.force,   cSim.pForce4 );
Mark Friedrichs's avatar
Mark Friedrichs committed
239

240
241
            offset                              = (y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
            add3dArrayToFloat4( offset, sA[threadIdx.x].force,  cSim.pForce4 );
242

243
244
245
246
247
248
249
#endif
            zeroElectrostaticParticle( &(sA[threadIdx.x]) );
            zeroElectrostaticParticle( &localParticle );
            for (unsigned int j = 0; j < GRID; j++){
       
                unsigned int atomJ = y + tj;
                if( (atomI < cSim.atoms) && (atomJ < cSim.atoms) ){
250

251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
                    if( bExclusionFlag ){
                        getMaskedDScaleFactor( tj, dScaleMask, scalingFactors + DScaleIndex );
                        getMaskedPScaleFactor( tj, pScaleMask, scalingFactors + PScaleIndex );
                        getMaskedMScaleFactor( tj, mScaleMask, scalingFactors + MScaleIndex );
                    }
           
                    float force[3];
                    calculateElectrostaticPairIxnT1_kernel( localParticle, psA[tj], scalingFactors, force);
                    localParticle.force[0]           += force[0];
                    localParticle.force[1]           += force[1];
                    localParticle.force[2]           += force[2];
    
                    calculateElectrostaticPairIxnT3_kernel( localParticle, psA[tj], scalingFactors, force);
                    psA[tj].force[0]                 += force[0];
                    psA[tj].force[1]                 += force[1];
                    psA[tj].force[2]                 += force[2];
                }
268

269
270
271
272
273
274
275
276
277
278
                tj = (tj + 1) & (GRID - 1);
            }

            localParticle.force[0]    *= conversionFactor;
            localParticle.force[1]    *= conversionFactor;
            localParticle.force[2]    *= conversionFactor;
 
            sA[threadIdx.x].force[0]  *= conversionFactor;
            sA[threadIdx.x].force[1]  *= conversionFactor;
            sA[threadIdx.x].force[2]  *= conversionFactor;
279

Mark Friedrichs's avatar
Mark Friedrichs committed
280
281
#ifdef USE_OUTPUT_BUFFER_PER_WARP

282
283
            offset                 = (x + tgx + warp*cSim.paddedNumberOfAtoms);
            add3dArray( 3*offset, localParticle.force,  outputTorque );
Mark Friedrichs's avatar
Mark Friedrichs committed
284

285
            offset                              = (y + tgx + warp*cSim.paddedNumberOfAtoms);
286
            add3dArray( 3*offset, sA[threadIdx.x].force,  outputTorque );
Mark Friedrichs's avatar
Mark Friedrichs committed
287
288

#else
289
290
            offset                 = (x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms);
            load3dArray(         3*offset, localParticle.force, outputTorque );
Mark Friedrichs's avatar
Mark Friedrichs committed
291

292
            offset                              = (y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
293
            load3dArray(       3*offset, sA[threadIdx.x].force, outputTorque );
Mark Friedrichs's avatar
Mark Friedrichs committed
294
295
296
297
298
299
300

#endif
            lasty = y;
        }

        pos++;
    }
301
    cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += (conversionFactor*totalEnergy);
Mark Friedrichs's avatar
Mark Friedrichs committed
302
}