kCalculateAmoebaCudaPmeFixedEField.h 12.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2009 Stanford University and the Authors.           *
 * Authors: Scott Le Grand, Peter Eastman                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

#include "amoebaScaleFactors.h"

__global__ 
#if (__CUDA_ARCH__ >= 200)
31
32
33
__launch_bounds__(384, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(192, 1)
34
#else
35
__launch_bounds__(64, 1)
36
37
38
39
#endif
void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
                            unsigned int* workUnit,
                            float* outputEField,
Mark Friedrichs's avatar
Mark Friedrichs committed
40
                            float* outputEFieldPolar){ 
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56

    extern __shared__ FixedFieldParticle sA[];

    unsigned int totalWarps      = gridDim.x*blockDim.x/GRID;
    unsigned int warp            = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
    unsigned int numWorkUnits    = cSim.pInteractionCount[0];
    unsigned int pos             = warp*numWorkUnits/totalWarps;
    unsigned int end             = (warp+1)*numWorkUnits/totalWarps;
    unsigned int lasty           = 0xFFFFFFFF;

    while (pos < end)
    {

        unsigned int x;
        unsigned int y;
        bool bExclusionFlag;
Mark Friedrichs's avatar
Mark Friedrichs committed
57
58
59
60
        float dScaleValue;
        float pScaleValue;
        int  dScaleMask;
        int2 pScaleMask;
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92

        // extract cell coordinates

        decodeCell( workUnit[pos], &x, &y, &bExclusionFlag );

        unsigned int tgx           = threadIdx.x & (GRID - 1);
        unsigned int tbx           = threadIdx.x - tgx;
        unsigned int tj            = tgx;

        FixedFieldParticle* psA    = &sA[tbx];
        unsigned int atomI         = x + tgx;
        FixedFieldParticle localParticle;
        loadFixedFieldShared( &localParticle, atomI );

        float fieldSum[3];
        float fieldPolarSum[3];

        fieldSum[0]                = 0.0f;
        fieldSum[1]                = 0.0f;
        fieldSum[2]                = 0.0f;

        fieldPolarSum[0]           = 0.0f;
        fieldPolarSum[1]           = 0.0f;
        fieldPolarSum[2]           = 0.0f;

        if (x == y)
        {

            // load coordinates, charge, ...

            loadFixedFieldShared( &(sA[threadIdx.x]), atomI );

Mark Friedrichs's avatar
Mark Friedrichs committed
93
            if( bExclusionFlag ){
94
                unsigned int xi       = x >> GRIDBITS;
95
                unsigned int cell     = xi + xi*cSim.paddedNumberOfAtoms/GRID-xi*(xi+1)/2;
Mark Friedrichs's avatar
Mark Friedrichs committed
96
97
98
99
                dScaleMask            = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                pScaleMask            = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
            } else {
                dScaleValue = pScaleValue = 1.0f;
100

Mark Friedrichs's avatar
Mark Friedrichs committed
101
            }
102

Mark Friedrichs's avatar
Mark Friedrichs committed
103
104
            for (unsigned int j = 0; j < GRID; j++)
            {
105

Mark Friedrichs's avatar
Mark Friedrichs committed
106
                if( bExclusionFlag ){
107
108
                    getMaskedDScaleFactor( j, dScaleMask, &dScaleValue );
                    getMaskedPScaleFactor( j, pScaleMask, &pScaleValue );
Mark Friedrichs's avatar
Mark Friedrichs committed
109
                }
110

111
                float4 ijField[3];
Mark Friedrichs's avatar
Mark Friedrichs committed
112
                calculateFixedFieldRealSpacePairIxn_kernel( localParticle, psA[j], dScaleValue, pScaleValue, ijField);
113

Mark Friedrichs's avatar
Mark Friedrichs committed
114
115
                // nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
                // by setting match flag
116

117
                unsigned int match      = ( (atomI == (y + j)) || (atomI >= cSim.atoms) || ((y+j) >= cSim.atoms) ) ? 1 : 0;
118

Mark Friedrichs's avatar
Mark Friedrichs committed
119
                // add to field at atomI the field due atomJ's charge/dipole/quadrupole
120

121
122
123
                fieldSum[0]            += match ? 0.0f : ijField[0].x;
                fieldSum[1]            += match ? 0.0f : ijField[1].x;
                fieldSum[2]            += match ? 0.0f : ijField[2].x;
124

125
126
127
                fieldPolarSum[0]       += match ? 0.0f : ijField[0].z;
                fieldPolarSum[1]       += match ? 0.0f : ijField[1].z;
                fieldPolarSum[2]       += match ? 0.0f : ijField[2].z;
128
129
130
131
132
133

            }

            // Write results

#ifdef USE_OUTPUT_BUFFER_PER_WARP
134
            unsigned int offset                 = 3*(x + tgx + warp*cSim.paddedNumberOfAtoms);
135
136
137
            load3dArrayBufferPerWarp( offset, fieldSum,       outputEField );
            load3dArrayBufferPerWarp( offset, fieldPolarSum,  outputEFieldPolar );
#else
138
            unsigned int offset                 = 3*(x + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
139
140
141
142
            load3dArray( offset, fieldSum,       outputEField );
            load3dArray( offset, fieldPolarSum,  outputEFieldPolar );
#endif

Mark Friedrichs's avatar
Mark Friedrichs committed
143
        } else {
144

Mark Friedrichs's avatar
Mark Friedrichs committed
145
146
147
148
149
150
151
152
            if (lasty != y ) {
    
                // load coordinates, charge, ...
    
                loadFixedFieldShared( &(sA[threadIdx.x]), (y+tgx) );
    
            }

Mark Friedrichs's avatar
Mark Friedrichs committed
153
154
155
156
            unsigned int flags = cSim.pInteractionFlag[pos];
            if (flags == 0) {
                // No interactions in this block.
            } else {
157

Mark Friedrichs's avatar
Mark Friedrichs committed
158
                // zero shared fields
159

Mark Friedrichs's avatar
Mark Friedrichs committed
160
                zeroFixedFieldParticleSharedField( &(sA[threadIdx.x]) );
161

Mark Friedrichs's avatar
Mark Friedrichs committed
162
163
164
                if( bExclusionFlag ) {
                    unsigned int xi   = x >> GRIDBITS;
                    unsigned int yi   = y >> GRIDBITS;
165
                    unsigned int cell = xi+yi*cSim.paddedNumberOfAtoms/GRID-yi*(yi+1)/2;
Mark Friedrichs's avatar
Mark Friedrichs committed
166
167
168
169
                    dScaleMask        = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                    pScaleMask        = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                } else {
                    dScaleValue = pScaleValue  = 1.0f;
170
171
                }

Mark Friedrichs's avatar
Mark Friedrichs committed
172
                for (unsigned int j = 0; j < GRID; j++){
173

Peter Eastman's avatar
Peter Eastman committed
174
175
176
177
178
179
                    if ((flags&(1<<j)) != 0) {
                        unsigned int jIdx = (flags == 0xFFFFFFFF) ? tj : j;
                        if( bExclusionFlag ){
                            getMaskedDScaleFactor( jIdx, dScaleMask, &dScaleValue );
                            getMaskedPScaleFactor( jIdx, pScaleMask, &pScaleValue );
                        }
180

Peter Eastman's avatar
Peter Eastman committed
181
                        float4 ijField[3];
Mark Friedrichs's avatar
Mark Friedrichs committed
182
                        calculateFixedFieldRealSpacePairIxn_kernel( localParticle, psA[jIdx], dScaleValue, pScaleValue, ijField);
Peter Eastman's avatar
Peter Eastman committed
183

184
                        unsigned int outOfBounds     = ( (atomI >= cSim.atoms) || ((y+jIdx) >= cSim.atoms) ) ? 1 : 0;
Peter Eastman's avatar
Peter Eastman committed
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240

                        // add to field at atomI the field due atomJ's charge/dipole/quadrupole

                        fieldSum[0]                 += outOfBounds ? 0.0f : ijField[0].x;
                        fieldSum[1]                 += outOfBounds ? 0.0f : ijField[1].x;
                        fieldSum[2]                 += outOfBounds ? 0.0f : ijField[2].x;

                        fieldPolarSum[0]            += outOfBounds ? 0.0f : ijField[0].z;
                        fieldPolarSum[1]            += outOfBounds ? 0.0f : ijField[1].z;
                        fieldPolarSum[2]            += outOfBounds ? 0.0f : ijField[2].z;

                        if( flags == 0xFFFFFFFF ){

                            // add to field at atomJ the field due atomI's charge/dipole/quadrupole

                            psA[jIdx].eField[0]        += outOfBounds ? 0.0f : ijField[0].y;
                            psA[jIdx].eField[1]        += outOfBounds ? 0.0f : ijField[1].y;
                            psA[jIdx].eField[2]        += outOfBounds ? 0.0f : ijField[2].y;

                            psA[jIdx].eFieldP[0]       += outOfBounds ? 0.0f : ijField[0].w;
                            psA[jIdx].eFieldP[1]       += outOfBounds ? 0.0f : ijField[1].w;
                            psA[jIdx].eFieldP[2]       += outOfBounds ? 0.0f : ijField[2].w;

                        } else {

                            sA[threadIdx.x].tempBuffer[0]  = outOfBounds ? 0.0f : ijField[0].y;
                            sA[threadIdx.x].tempBuffer[1]  = outOfBounds ? 0.0f : ijField[1].y;
                            sA[threadIdx.x].tempBuffer[2]  = outOfBounds ? 0.0f : ijField[2].y;

                            sA[threadIdx.x].tempBufferP[0] = outOfBounds ? 0.0f : ijField[0].w;
                            sA[threadIdx.x].tempBufferP[1] = outOfBounds ? 0.0f : ijField[1].w;
                            sA[threadIdx.x].tempBufferP[2] = outOfBounds ? 0.0f : ijField[2].w;

                            if( tgx % 2 == 0 ){
                                sumTempBuffer( sA[threadIdx.x], sA[threadIdx.x+1] );
                            }
                            if( tgx % 4 == 0 ){
                                sumTempBuffer( sA[threadIdx.x], sA[threadIdx.x+2] );
                            }
                            if( tgx % 8 == 0 ){
                                sumTempBuffer( sA[threadIdx.x], sA[threadIdx.x+4] );
                            }
                            if( tgx % 16 == 0 ){
                                sumTempBuffer( sA[threadIdx.x], sA[threadIdx.x+8] );
                            }

                            if (tgx == 0)
                            {
                                psA[jIdx].eField[0]  += sA[threadIdx.x].tempBuffer[0]  + sA[threadIdx.x+16].tempBuffer[0];
                                psA[jIdx].eField[1]  += sA[threadIdx.x].tempBuffer[1]  + sA[threadIdx.x+16].tempBuffer[1];
                                psA[jIdx].eField[2]  += sA[threadIdx.x].tempBuffer[2]  + sA[threadIdx.x+16].tempBuffer[2];

                                psA[jIdx].eFieldP[0] += sA[threadIdx.x].tempBufferP[0] + sA[threadIdx.x+16].tempBufferP[0];
                                psA[jIdx].eFieldP[1] += sA[threadIdx.x].tempBufferP[1] + sA[threadIdx.x+16].tempBufferP[1];
                                psA[jIdx].eFieldP[2] += sA[threadIdx.x].tempBufferP[2] + sA[threadIdx.x+16].tempBufferP[2];
                            }
Mark Friedrichs's avatar
Mark Friedrichs committed
241
                        }
242

Peter Eastman's avatar
Peter Eastman committed
243
                    }
Mark Friedrichs's avatar
Mark Friedrichs committed
244
                    tj = (tj + 1) & (GRID - 1);
245

Mark Friedrichs's avatar
Mark Friedrichs committed
246
247
248
249
                } // j-loop block
    
                // Write results
    
250
#ifdef USE_OUTPUT_BUFFER_PER_WARP
251
                unsigned int offset                 = 3*(x + tgx + warp*cSim.paddedNumberOfAtoms);
Mark Friedrichs's avatar
Mark Friedrichs committed
252
253
254
                load3dArrayBufferPerWarp( offset, fieldSum,       outputEField );
                load3dArrayBufferPerWarp( offset, fieldPolarSum,  outputEFieldPolar );
    
255
                offset                              = 3*(y + tgx + warp*cSim.paddedNumberOfAtoms);
Mark Friedrichs's avatar
Mark Friedrichs committed
256
257
258
                load3dArrayBufferPerWarp( offset, sA[threadIdx.x].eField,  outputEField );
                load3dArrayBufferPerWarp( offset, sA[threadIdx.x].eFieldP, outputEFieldPolar );
    
259
#else
260
                unsigned int offset                 = 3*(x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms);
Mark Friedrichs's avatar
Mark Friedrichs committed
261
262
263
                load3dArray( offset, fieldSum,       outputEField );
                load3dArray( offset, fieldPolarSum,  outputEFieldPolar );
    
264
                offset                              = 3*(y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
Mark Friedrichs's avatar
Mark Friedrichs committed
265
266
267
                load3dArray( offset, sA[threadIdx.x].eField,  outputEField );
                load3dArray( offset, sA[threadIdx.x].eFieldP, outputEFieldPolar );
     
268
#endif
Mark Friedrichs's avatar
Mark Friedrichs committed
269
            } // end of pInteractionFlag block 
270
            lasty = y;
Mark Friedrichs's avatar
Mark Friedrichs committed
271
        } // x == y block
272
273
274
275

        pos++;
    }
}