"csrc/sm100/prefill/sparse/fwd.h" did not exist on "17944550dc863d4c06b2104cd8708377b2ec88a4"
kCalculateAmoebaCudaPmeDirectElectrostatic.h 11 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2009 Stanford University and the Authors.           *
 * Authors: Scott Le Grand, Peter Eastman                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

#include "amoebaScaleFactors.h"

__global__ 
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(384, 1)
32
#elif (__CUDA_ARCH__ >= 120)
33
34
35
36
__launch_bounds__(128, 1)
#else
__launch_bounds__(64, 1)
#endif
37
void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)( unsigned int* workUnit, float* outputTorque ){
38

Mark Friedrichs's avatar
Mark Friedrichs committed
39
    extern __shared__ PmeDirectElectrostaticParticle sA[];
40
41
42
43
44
45
46
47
48
49

    unsigned int totalWarps      = gridDim.x*blockDim.x/GRID;
    unsigned int warp            = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
    unsigned int numWorkUnits    = cSim.pInteractionCount[0];
    unsigned int pos             = warp*numWorkUnits/totalWarps;
    unsigned int end             = (warp+1)*numWorkUnits/totalWarps;
    unsigned int lasty           = 0xFFFFFFFF;
    float totalEnergy            = 0.0f;     

    float scalingFactors[LastScalingIndex];
Mark Friedrichs's avatar
Mark Friedrichs committed
50
    float conversionFactor       = (-cAmoebaSim.electric/cAmoebaSim.dielec);
51
52
53
54
55
56
57

    while (pos < end)
    {

        unsigned int x;
        unsigned int y;
        bool bExclusionFlag;
Mark Friedrichs's avatar
Mark Friedrichs committed
58
59
60
        int  dScaleMask;
        int2 pScaleMask;
        int2 mScaleMask;
61
62
63
64
65

        // Extract cell coordinates

        decodeCell( workUnit[pos], &x, &y, &bExclusionFlag );

66
67
68
        unsigned int tgx                                = threadIdx.x & (GRID - 1);
        unsigned int tbx                                = threadIdx.x - tgx;
        unsigned int tj                                 = tgx;
69

70
71
72
73
        PmeDirectElectrostaticParticle* psA             = &sA[tbx];
        unsigned int atomI                              = x + tgx;
        PmeDirectElectrostaticParticle localParticleI;
        loadPmeDirectElectrostaticParticle( atomI, &localParticleI );
74

75
76
        zeroPmeDirectElectrostaticParticle( &localParticleI );
        scalingFactors[UScaleIndex]                     = 1.0f;
77
78
79
80
81
82

        if (x == y) // Handle diagonals uniquely at 50% efficiency
        {

            // load shared data

83
            loadPmeDirectElectrostaticParticle( atomI, &(sA[threadIdx.x]) );
84

85
            if( atomI < cSim.atoms ){
Mark Friedrichs's avatar
Mark Friedrichs committed
86
87
                if (bExclusionFlag)
                {
88
89
90
91
92
93
94
                    unsigned int xi       = x >> GRIDBITS;
                    unsigned int cell     = xi + xi*cSim.paddedNumberOfAtoms/GRID-xi*(xi+1)/2;
                    dScaleMask            = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                    pScaleMask            = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                    mScaleMask            = cAmoebaSim.pM_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                } else {
                    scalingFactors[DScaleIndex] = scalingFactors[PScaleIndex] = scalingFactors[MScaleIndex] = 1.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
95
                }
96
97
    
                for (unsigned int j = 0; j < GRID && (y+j) < cSim.atoms; j++)
Mark Friedrichs's avatar
Mark Friedrichs committed
98
99
                {
    
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
                    if( atomI != (y+j) )
                    {
                        if (bExclusionFlag)
                        {
                            getMaskedDScaleFactor( j, dScaleMask, scalingFactors + DScaleIndex );
                            getMaskedPScaleFactor( j, pScaleMask, scalingFactors + PScaleIndex );
                            getMaskedMScaleFactor( j, mScaleMask, scalingFactors + MScaleIndex );
                        }
                        calculatePmeDirectElectrostaticPairIxn_kernel( localParticleI, psA[j], bExclusionFlag, scalingFactors, 0.5f, &totalEnergy);
                    }
    
                } // end of j-loop
    
                // include self energy and self torque
    
                calculatePmeSelfTorqueElectrostaticPairIxn_kernel( localParticleI );
                calculatePmeSelfEnergyElectrostaticPairIxn_kernel( localParticleI, &totalEnergy );
    
                localParticleI.force[0]  *= conversionFactor;
                localParticleI.force[1]  *= conversionFactor;
                localParticleI.force[2]  *= conversionFactor;
    
                localParticleI.torque[0] *= -conversionFactor;
                localParticleI.torque[1] *= -conversionFactor;
                localParticleI.torque[2] *= -conversionFactor;
    
                // Write results
Mark Friedrichs's avatar
Mark Friedrichs committed
127
    
128
#ifdef USE_OUTPUT_BUFFER_PER_WARP
129
130
131
                unsigned int offset                 = (x + tgx + warp*cSim.paddedNumberOfAtoms);
                add3dArrayToFloat4( offset, localParticleI.force, cSim.pForce4 );
                add3dArray( 3*offset, localParticleI.torque, outputTorque );
132
#else
133
134
135
                unsigned int offset                 = (x + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
                add3dArrayToFloat4( offset, localParticleI.force, cSim.pForce4 );
                load3dArray( 3*offset, localParticleI.torque, outputTorque );
136
#endif
137
            }
138

Mark Friedrichs's avatar
Mark Friedrichs committed
139
        } else {
140

Mark Friedrichs's avatar
Mark Friedrichs committed
141
            if (lasty != y) {
142
               loadPmeDirectElectrostaticParticle( (y+tgx), &(sA[threadIdx.x]) );
Mark Friedrichs's avatar
Mark Friedrichs committed
143
144
            }

145
            if (cSim.pInteractionFlag[pos] != 0 ) {
146

147
148
                zeroPmeDirectElectrostaticParticle( &(sA[threadIdx.x]) );
/*
Mark Friedrichs's avatar
Mark Friedrichs committed
149
150
151
152
153
154
155
                sA[threadIdx.x].force[0]     = 0.0f;
                sA[threadIdx.x].force[1]     = 0.0f;
                sA[threadIdx.x].force[2]     = 0.0f;
    
                sA[threadIdx.x].torque[0]    = 0.0f;
                sA[threadIdx.x].torque[1]    = 0.0f;
                sA[threadIdx.x].torque[2]    = 0.0f;
156
 */   
Mark Friedrichs's avatar
Mark Friedrichs committed
157
158
159
160
                if( bExclusionFlag )
                {
                    unsigned int xi   = x >> GRIDBITS;
                    unsigned int yi   = y >> GRIDBITS;
161
                    unsigned int cell = xi+yi*cSim.paddedNumberOfAtoms/GRID-yi*(yi+1)/2;
Mark Friedrichs's avatar
Mark Friedrichs committed
162
163
164
165
166
167
168
                    dScaleMask        = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                    pScaleMask        = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                    mScaleMask        = cAmoebaSim.pM_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
                } else {
                    scalingFactors[DScaleIndex] = scalingFactors[PScaleIndex] = scalingFactors[MScaleIndex] = 1.0f;
                }
       
169
170
                for (unsigned int j = 0; j < GRID; j++)
                {
Peter Eastman's avatar
Peter Eastman committed
171

172
                    // set scale factors and calculate force
Peter Eastman's avatar
Peter Eastman committed
173

174
175
176
177
178
                    if( bExclusionFlag ){
                        getMaskedDScaleFactor( tj, dScaleMask, scalingFactors + DScaleIndex );
                        getMaskedPScaleFactor( tj, pScaleMask, scalingFactors + PScaleIndex );
                        getMaskedMScaleFactor( tj, mScaleMask, scalingFactors + MScaleIndex );
                    }
Peter Eastman's avatar
Peter Eastman committed
179

180
                    // check if atoms out-of-bounds
181

182
183
184
185
                    if( (atomI < cSim.atoms) && ((y+tj) < cSim.atoms) )
                    {
                        calculatePmeDirectElectrostaticPairIxn_kernel( localParticleI, psA[tj], bExclusionFlag, scalingFactors, 1.0f, &totalEnergy);
                    } 
Mark Friedrichs's avatar
Mark Friedrichs committed
186
 
Mark Friedrichs's avatar
Mark Friedrichs committed
187
                    tj = (tj + 1) & (GRID - 1);
188

Mark Friedrichs's avatar
Mark Friedrichs committed
189
                } // end of j-loop
190

191
192
193
                localParticleI.force[0]    *=  conversionFactor;
                localParticleI.force[1]    *=  conversionFactor;
                localParticleI.force[2]    *=  conversionFactor;
Mark Friedrichs's avatar
Mark Friedrichs committed
194
    
195
196
197
                localParticleI.torque[0]   *= -conversionFactor;
                localParticleI.torque[1]   *= -conversionFactor;
                localParticleI.torque[2]   *= -conversionFactor;
Mark Friedrichs's avatar
Mark Friedrichs committed
198
    
199
200
201
                sA[threadIdx.x].force[0]   *=  conversionFactor;
                sA[threadIdx.x].force[1]   *=  conversionFactor;
                sA[threadIdx.x].force[2]   *=  conversionFactor;
Mark Friedrichs's avatar
Mark Friedrichs committed
202
    
203
204
205
                sA[threadIdx.x].torque[0]  *= -conversionFactor;
                sA[threadIdx.x].torque[1]  *= -conversionFactor;
                sA[threadIdx.x].torque[2]  *= -conversionFactor;
Mark Friedrichs's avatar
Mark Friedrichs committed
206
    
Mark Friedrichs's avatar
Mark Friedrichs committed
207
208
                // Write results
    
Mark Friedrichs's avatar
Mark Friedrichs committed
209
#ifdef USE_OUTPUT_BUFFER_PER_WARP
Mark Friedrichs's avatar
Mark Friedrichs committed
210
    
211
                unsigned int offset                 = (x + tgx + warp*cSim.paddedNumberOfAtoms);
212
213
                add3dArrayToFloat4( offset, localParticleI.force,  cSim.pForce4 );
                add3dArray(       3*offset, localParticleI.torque, outputTorque );
Mark Friedrichs's avatar
Mark Friedrichs committed
214
    
215
216
217
                offset                              = (y + tgx + warp*cSim.paddedNumberOfAtoms);
                add3dArrayToFloat4( offset, sA[threadIdx.x].force,  cSim.pForce4 );
                add3dArray(       3*offset, sA[threadIdx.x].torque, outputTorque );
Mark Friedrichs's avatar
Mark Friedrichs committed
218
    
Mark Friedrichs's avatar
Mark Friedrichs committed
219
#else
220
                unsigned int offset                 = (x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms);
221
222
                add3dArrayToFloat4( offset,  localParticleI.force,  cSim.pForce4 );
                load3dArray(       3*offset, localParticleI.torque, outputTorque );
Mark Friedrichs's avatar
Mark Friedrichs committed
223
    
224
                offset                              = (y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
Mark Friedrichs's avatar
Mark Friedrichs committed
225
    
226
                add3dArrayToFloat4( offset,  sA[threadIdx.x].force,  cSim.pForce4 );
227
                load3dArray(       3*offset, sA[threadIdx.x].torque, outputTorque );
Mark Friedrichs's avatar
Mark Friedrichs committed
228
    
Mark Friedrichs's avatar
Mark Friedrichs committed
229
#endif
230

Mark Friedrichs's avatar
Mark Friedrichs committed
231
            } // end of pInteractionFlag block
232
            lasty = y;
233
234
235
        }
        pos++;
    }
Mark Friedrichs's avatar
Mark Friedrichs committed
236
    cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] -= conversionFactor*totalEnergy;
237
}