"platforms/cuda-old/src/kernels/kCalculateCDLJForces.h" did not exist on "364130a527920fe0986387c17e0a589beb88f2b5"
kCalculateAmoebaCudaRotateFrame.cu 28.7 KB
Newer Older
Mark Friedrichs's avatar
Mark Friedrichs committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2009 Stanford University and the Authors.           *
 * Authors: Scott Le Grand, Peter Eastman                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */
Mark Friedrichs's avatar
Mark Friedrichs committed
26

Mark Friedrichs's avatar
Mark Friedrichs committed
27
#include "cudaKernels.h"
Mark Friedrichs's avatar
Mark Friedrichs committed
28
#include "amoebaCudaKernels.h"
Mark Friedrichs's avatar
Mark Friedrichs committed
29
#include "kCalculateAmoebaCudaUtilities.h"
30
#include "openmm/OpenMMException.h"
Mark Friedrichs's avatar
Mark Friedrichs committed
31
32
33
34
35
36
37
38
39
40

#include <stdio.h>
#include <cuda.h>
#include <cstdlib>
using namespace std; 

#define SQRT sqrtf

static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
41
extern __global__ void kFindInteractionsWithinBlocksPeriodic_kernel(unsigned int*);
Mark Friedrichs's avatar
Mark Friedrichs committed
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

void SetCalculateAmoebaMultipoleForcesSim(amoebaGpuContext amoebaGpu)
{
    cudaError_t status;
    gpuContext gpu = amoebaGpu->gpuContext;
    status         = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));     
    RTERROR(status, "SetCalculateAmoebaMultipoleForcesSim: cudaMemcpyToSymbol: SetSim copy to cSim failed");
    status         = cudaMemcpyToSymbol(cAmoebaSim, &amoebaGpu->amoebaSim, sizeof(cudaAmoebaGmxSimulation));     
    RTERROR(status, "SetCalculateAmoebaMultipoleForcesSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed");
}

void GetCalculateAmoebaMultipoleForcesSim(amoebaGpuContext amoebaGpu)
{
    cudaError_t status;
    gpuContext gpu = amoebaGpu->gpuContext;
    status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));     
    RTERROR(status, "GetCalculateAmoebaMultipoleForcesSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed");
    status = cudaMemcpyFromSymbol(&amoebaGpu->amoebaSim, cAmoebaSim, sizeof(cudaAmoebaGmxSimulation));     
    RTERROR(status, "GetCalculateAmoebaMultipoleForcesSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}

63
__device__ static float normVector3( float* vector )
Mark Friedrichs's avatar
Mark Friedrichs committed
64
65
{

66
67
68
    float norm                     = DOT3( vector, vector );
    float returnNorm               = SQRT( norm );
    norm                           = returnNorm > 0.0f ? 1.0f/returnNorm : 0.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
69

70
71
72
    vector[0]                     *= norm;
    vector[1]                     *= norm;
    vector[2]                     *= norm;
Mark Friedrichs's avatar
Mark Friedrichs committed
73
74
75
76

    return returnNorm;
}

77
78
79
80
81
82
// ZThenX      == 0
// Bisector    == 1
// ZBisect     == 2
// ThreeFold   == 3
// ZOnly       == 4
// NoAxisType  == 5
83

Mark Friedrichs's avatar
Mark Friedrichs committed
84
85
86
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
87
#elif (__CUDA_ARCH__ >= 120)
Mark Friedrichs's avatar
Mark Friedrichs committed
88
89
90
91
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
92
void kCudaComputeCheckChiral_kernel( void )
Mark Friedrichs's avatar
Mark Friedrichs committed
93
94
{

95
96
97
98
    const int AD           = 0;
    const int BD           = 1;
    const int CD           = 2;
    const int C            = 3;
99
100
    float delta[4][3];
 
101
102
103
104
    float4* particleCoord         = cSim.pPosq;
    int4* multiPoleParticles      = cAmoebaSim.pMultipoleParticlesIdsAndAxisType;
    float* labFrameDipole         = cAmoebaSim.pLabFrameDipole;
    float* labFrameQuadrupole     = cAmoebaSim.pLabFrameQuadrupole;
105
106
107
 
    // ---------------------------------------------------------------------------------------
 
108
109
    int particleIndex             = blockIdx.x*blockDim.x + threadIdx.x;
    int numberOfParticles         = cSim.atoms;
110
111
112
113
    while( particleIndex < numberOfParticles )
    { 
        // skip z-then-x
    
114
        int axisType              = multiPoleParticles[particleIndex].w; 
Mark Friedrichs's avatar
Mark Friedrichs committed
115
        if( axisType != 0 && multiPoleParticles[particleIndex].x >= 0 && multiPoleParticles[particleIndex].y >=0 && multiPoleParticles[particleIndex].z >= 0 )
116
117
118
119
        {
     
            // ---------------------------------------------------------------------------------------
         
120
121
122
123
            int particleA                 = particleIndex;
            int particleB                 = multiPoleParticles[particleIndex].z;
            int particleC                 = multiPoleParticles[particleIndex].x;
            int particleD                 = multiPoleParticles[particleIndex].y;
124
        
125
126
127
            delta[AD][0]                  = particleCoord[particleA].x - particleCoord[particleD].x;
            delta[AD][1]                  = particleCoord[particleA].y - particleCoord[particleD].y;
            delta[AD][2]                  = particleCoord[particleA].z - particleCoord[particleD].z;
128
        
129
130
131
            delta[BD][0]                  = particleCoord[particleB].x - particleCoord[particleD].x;
            delta[BD][1]                  = particleCoord[particleB].y - particleCoord[particleD].y;
            delta[BD][2]                  = particleCoord[particleB].z - particleCoord[particleD].z;
132
        
133
134
135
            delta[CD][0]                  = particleCoord[particleC].x - particleCoord[particleD].x;
            delta[CD][1]                  = particleCoord[particleC].y - particleCoord[particleD].y;
            delta[CD][2]                  = particleCoord[particleC].z - particleCoord[particleD].z;
136
        
137
138
139
            delta[C][0]                   = delta[BD][1]*delta[CD][2] - delta[BD][2]*delta[CD][1];
            delta[C][1]                   = delta[CD][1]*delta[AD][2] - delta[CD][2]*delta[AD][1];
            delta[C][2]                   = delta[AD][1]*delta[BD][2] - delta[AD][2]*delta[BD][1];
140
         
141
            float volume                  = delta[C][0]*delta[AD][0] + delta[C][1]*delta[BD][0] + delta[C][2]*delta[CD][0];
142
143
144
145
146
147
148
149
150
            if( volume < 0.0 ){
                labFrameDipole[particleIndex*3+1]            *= -1.0f; // pole(3,i)
                labFrameQuadrupole[particleIndex*9+1]        *= -1.0f; // pole(6,i)  && pole(8,i)
                labFrameQuadrupole[particleIndex*9+3]        *= -1.0f; // pole(10,i) && pole(12,i)
                labFrameQuadrupole[particleIndex*9+5]        *= -1.0f; // pole(6,i)  && pole(8,i)
                labFrameQuadrupole[particleIndex*9+7]        *= -1.0f; // pole(10,i) && pole(12,i)
            }
        }
    
151
        particleIndex                 += gridDim.x*blockDim.x;
152
    }    
153
}
Mark Friedrichs's avatar
Mark Friedrichs committed
154

155
156
157
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
158
#elif (__CUDA_ARCH__ >= 120)
159
160
161
162
163
164
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kCudaComputeLabFrameMoments_kernel( void )
{
Mark Friedrichs's avatar
Mark Friedrichs committed
165

166
167
168
169
    float vectorX[3];
    float vectorY[3];
    float vectorZ[3];
 
170
    int particleIndex             = blockIdx.x*blockDim.x + threadIdx.x;
171

172
173
174
175
    float4* particleCoord         = cSim.pPosq;
    int4* multiPoleParticles      = cAmoebaSim.pMultipoleParticlesIdsAndAxisType;
    float* labFrameDipole         = cAmoebaSim.pLabFrameDipole;
    float* labFrameQuadrupole     = cAmoebaSim.pLabFrameQuadrupole;
176
177
178
179
180
181
182
183
184
 
    // get coordinates of this atom and the z & x axis atoms
    // compute the vector between the atoms and 1/sqrt(d2), d2 is distance between
    // this atom and the axis atom
 
    // this atom is referred to as the k-atom in notes below
 
    // code common to ZThenX and Bisector
    
185
    while( particleIndex < cSim.atoms )
186
    {
Mark Friedrichs's avatar
Mark Friedrichs committed
187
188
189

        if( multiPoleParticles[particleIndex].x >= 0 && multiPoleParticles[particleIndex].z >= 0 )
        {
190
            float4 coordinatesThisParticle    = particleCoord[particleIndex];
191
         
192
193
            int multipoleParticleIndex        = multiPoleParticles[particleIndex].z;
            float4 coordinatesAxisParticle    = particleCoord[multipoleParticleIndex];
Mark Friedrichs's avatar
Mark Friedrichs committed
194
         
195
196
197
            vectorZ[0]                        = coordinatesAxisParticle.x - coordinatesThisParticle.x;
            vectorZ[1]                        = coordinatesAxisParticle.y - coordinatesThisParticle.y;
            vectorZ[2]                        = coordinatesAxisParticle.z - coordinatesThisParticle.z;
Mark Friedrichs's avatar
Mark Friedrichs committed
198
              
199
200
            multipoleParticleIndex            = multiPoleParticles[particleIndex].x; 
            coordinatesAxisParticle           = particleCoord[multipoleParticleIndex];
Mark Friedrichs's avatar
Mark Friedrichs committed
201
         
202
203
204
            vectorX[0]                        = coordinatesAxisParticle.x - coordinatesThisParticle.x;
            vectorX[1]                        = coordinatesAxisParticle.y - coordinatesThisParticle.y;
            vectorX[2]                        = coordinatesAxisParticle.z - coordinatesThisParticle.z;
Mark Friedrichs's avatar
Mark Friedrichs committed
205
         
206
            int axisType                      = multiPoleParticles[particleIndex].w; 
207
    
Mark Friedrichs's avatar
Mark Friedrichs committed
208
209
210
211
            /*
                z-only
                   (1) norm z
                   (2) select random x
212
                   (3) x  = x - (x.z)z
Mark Friedrichs's avatar
Mark Friedrichs committed
213
214
215
216
217
                   (4) norm x
        
                z-then-x
                   (1) norm z
                   (2) norm x (not needed)
218
                   (3) x  = x - (x.z)z
Mark Friedrichs's avatar
Mark Friedrichs committed
219
220
221
222
223
                   (4) norm x
        
                bisector
                   (1) norm z
                   (2) norm x 
224
                   (3) z  = x + z
Mark Friedrichs's avatar
Mark Friedrichs committed
225
                   (4) norm z
226
                   (5) x  = x - (x.z)z 
Mark Friedrichs's avatar
Mark Friedrichs committed
227
228
229
230
231
232
                   (6) norm x 
        
                z-bisect
                   (1) norm z
                   (2) norm x 
                   (3) norm y 
233
                   (3) x  = x + y
Mark Friedrichs's avatar
Mark Friedrichs committed
234
                   (4) norm x
235
                   (5) x  = x - (x.z)z 
Mark Friedrichs's avatar
Mark Friedrichs committed
236
237
238
239
240
241
                   (6) norm x 
        
                3-fold
                   (1) norm z
                   (2) norm x 
                   (3) norm y 
242
                   (4) z  = x + y + z
Mark Friedrichs's avatar
Mark Friedrichs committed
243
                   (5) norm z
244
                   (6) x  = x - (x.z)z 
Mark Friedrichs's avatar
Mark Friedrichs committed
245
246
247
248
249
250
                   (7) norm x 
        
            */
        
            // branch based on axis type
             
251
            float sum                    = normVector3( vectorZ );
Mark Friedrichs's avatar
Mark Friedrichs committed
252
        
253
            if( axisType  == 1 ){
Mark Friedrichs's avatar
Mark Friedrichs committed
254
255
256
        
                // bisector
                
257
                sum                      = normVector3( vectorX );
Mark Friedrichs's avatar
Mark Friedrichs committed
258
                
259
260
261
                vectorZ[0]              += vectorX[0];
                vectorZ[1]              += vectorX[1];
                vectorZ[2]              += vectorX[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
262
           
263
                sum                      = normVector3( vectorZ );
Mark Friedrichs's avatar
Mark Friedrichs committed
264
        
265
            } else if( axisType  == 2 || axisType == 3 ){ 
Mark Friedrichs's avatar
Mark Friedrichs committed
266
267
268
         
                // z-bisect
        
269
                multipoleParticleIndex   = multiPoleParticles[particleIndex].y; 
Mark Friedrichs's avatar
Mark Friedrichs committed
270
                if( multipoleParticleIndex >= 0 && multipoleParticleIndex < cSim.atoms ){
271
272
273
274
                    coordinatesAxisParticle  = particleCoord[multipoleParticleIndex];
                    vectorY[0]               = coordinatesAxisParticle.x - coordinatesThisParticle.x;
                    vectorY[1]               = coordinatesAxisParticle.y - coordinatesThisParticle.y;
                    vectorY[2]               = coordinatesAxisParticle.z - coordinatesThisParticle.z;
275
            
276
277
                    sum                      = normVector3( vectorY );
                    sum                      = normVector3( vectorX );
278
            
279
                    if( axisType  == 2 ){
Mark Friedrichs's avatar
Mark Friedrichs committed
280
            
281
282
283
284
                        vectorX[0]          += vectorY[0];
                        vectorX[1]          += vectorY[1];
                        vectorX[2]          += vectorY[2];
                        sum                  = normVector3( vectorX );
Mark Friedrichs's avatar
Mark Friedrichs committed
285
286
287
288
289
             
                    } else { 
             
                        // 3-fold
                
290
291
292
293
                        vectorZ[0]          += vectorX[0] + vectorY[0];
                        vectorZ[1]          += vectorX[1] + vectorY[1];
                        vectorZ[2]          += vectorX[2] + vectorY[2];
                        sum                  = normVector3( vectorZ );
Mark Friedrichs's avatar
Mark Friedrichs committed
294
295
296
297
                    }
                }
         
            } else if( axisType >= 4 ){ 
298
        
299
300
301
                vectorX[0]              = 0.1f;
                vectorX[1]              = 0.1f;
                vectorX[2]              = 0.1f;
302
            }
Mark Friedrichs's avatar
Mark Friedrichs committed
303
            
304
            // x  = x - (x.z)z
Mark Friedrichs's avatar
Mark Friedrichs committed
305
        
306
            float dot          = vectorZ[0]*vectorX[0] + vectorZ[1]*vectorX[1] + vectorZ[2]*vectorX[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
307
                
308
309
310
            vectorX[0]        -= dot*vectorZ[0];
            vectorX[1]        -= dot*vectorZ[1];
            vectorX[2]        -= dot*vectorZ[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
311
             
312
            sum                = normVector3( vectorX );
Mark Friedrichs's avatar
Mark Friedrichs committed
313
        
314
315
316
            vectorY[0]         = (vectorZ[1]*vectorX[2]) - (vectorZ[2]*vectorX[1]);
            vectorY[1]         = (vectorZ[2]*vectorX[0]) - (vectorZ[0]*vectorX[2]);
            vectorY[2]         = (vectorZ[0]*vectorX[1]) - (vectorZ[1]*vectorX[0]);
Mark Friedrichs's avatar
Mark Friedrichs committed
317
318
319
320
         
            // use identity rotation matrix for unrecognized axis types
        
            if( axisType < 0 || axisType > 4 ){
321
        
322
323
324
                vectorX[0]  = 1.0f;
                vectorX[1]  = 0.0f;
                vectorX[2]  = 0.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
325
        
326
327
328
                vectorY[0]  = 0.0f;
                vectorY[1]  = 1.0f;
                vectorY[2]  = 0.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
329
        
330
331
332
                vectorZ[0]  = 0.0f;
                vectorZ[1]  = 0.0f;
                vectorZ[2]  = 1.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
333
334
            }
        
335
            unsigned int offset            = 3*particleIndex;
336
    
Mark Friedrichs's avatar
Mark Friedrichs committed
337
            float molDipole[3];
338
339
340
            molDipole[0]                   = labFrameDipole[offset];
            molDipole[1]                   = labFrameDipole[offset+1];
            molDipole[2]                   = labFrameDipole[offset+2];
341
            
Mark Friedrichs's avatar
Mark Friedrichs committed
342
            // set out-of-range elements to 0.0f
343
         
344
345
346
            labFrameDipole[offset]         = molDipole[0]*vectorX[0] + molDipole[1]*vectorY[0] + molDipole[2]*vectorZ[0];
            labFrameDipole[offset+1]       = molDipole[0]*vectorX[1] + molDipole[1]*vectorY[1] + molDipole[2]*vectorZ[1];
            labFrameDipole[offset+2]       = molDipole[0]*vectorX[2] + molDipole[1]*vectorY[2] + molDipole[2]*vectorZ[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
347
348
349
350
            
            // ---------------------------------------------------------------------------------------
            
            float mPole[3][3];
351
            offset                         = 9*particleIndex;
Mark Friedrichs's avatar
Mark Friedrichs committed
352
            
353
354
355
            mPole[0][0]                    = labFrameQuadrupole[offset];
            mPole[0][1]                    = labFrameQuadrupole[offset+1];
            mPole[0][2]                    = labFrameQuadrupole[offset+2];
Mark Friedrichs's avatar
Mark Friedrichs committed
356
        
357
358
359
            mPole[1][0]                    = labFrameQuadrupole[offset+3];
            mPole[1][1]                    = labFrameQuadrupole[offset+4];
            mPole[1][2]                    = labFrameQuadrupole[offset+5];
Mark Friedrichs's avatar
Mark Friedrichs committed
360
        
361
362
363
            mPole[2][0]                    = labFrameQuadrupole[offset+6];
            mPole[2][1]                    = labFrameQuadrupole[offset+7];
            mPole[2][2]                    = labFrameQuadrupole[offset+8];
Mark Friedrichs's avatar
Mark Friedrichs committed
364
        
365
366
367
            labFrameQuadrupole[offset+8]   = vectorX[2]*(vectorX[2]*mPole[0][0] + vectorY[2]*mPole[0][1] + vectorZ[2]*mPole[0][2]);
            labFrameQuadrupole[offset+8]  += vectorY[2]*(vectorX[2]*mPole[1][0] + vectorY[2]*mPole[1][1] + vectorZ[2]*mPole[1][2]);
            labFrameQuadrupole[offset+8]  += vectorZ[2]*(vectorX[2]*mPole[2][0] + vectorY[2]*mPole[2][1] + vectorZ[2]*mPole[2][2]);
368
    
369
370
371
            labFrameQuadrupole[offset+4]   = vectorX[1]*(vectorX[1]*mPole[0][0] + vectorY[1]*mPole[0][1] + vectorZ[1]*mPole[0][2]);
            labFrameQuadrupole[offset+4]  += vectorY[1]*(vectorX[1]*mPole[1][0] + vectorY[1]*mPole[1][1] + vectorZ[1]*mPole[1][2]);
            labFrameQuadrupole[offset+4]  += vectorZ[1]*(vectorX[1]*mPole[2][0] + vectorY[1]*mPole[2][1] + vectorZ[1]*mPole[2][2]);
372
    
373
374
375
            labFrameQuadrupole[offset+5]   = vectorX[1]*(vectorX[2]*mPole[0][0] + vectorY[2]*mPole[0][1] + vectorZ[2]*mPole[0][2]);
            labFrameQuadrupole[offset+5]  += vectorY[1]*(vectorX[2]*mPole[1][0] + vectorY[2]*mPole[1][1] + vectorZ[2]*mPole[1][2]);
            labFrameQuadrupole[offset+5]  += vectorZ[1]*(vectorX[2]*mPole[2][0] + vectorY[2]*mPole[2][1] + vectorZ[2]*mPole[2][2]);
376
    
377
378
379
            labFrameQuadrupole[offset]     = vectorX[0]*(vectorX[0]*mPole[0][0] + vectorY[0]*mPole[0][1] + vectorZ[0]*mPole[0][2]);
            labFrameQuadrupole[offset]    += vectorY[0]*(vectorX[0]*mPole[1][0] + vectorY[0]*mPole[1][1] + vectorZ[0]*mPole[1][2]);
            labFrameQuadrupole[offset]    += vectorZ[0]*(vectorX[0]*mPole[2][0] + vectorY[0]*mPole[2][1] + vectorZ[0]*mPole[2][2]);
380
    
381
382
383
            labFrameQuadrupole[offset+1]   = vectorX[0]*(vectorX[1]*mPole[0][0] + vectorY[1]*mPole[0][1] + vectorZ[1]*mPole[0][2]);
            labFrameQuadrupole[offset+1]  += vectorY[0]*(vectorX[1]*mPole[1][0] + vectorY[1]*mPole[1][1] + vectorZ[1]*mPole[1][2]);
            labFrameQuadrupole[offset+1]  += vectorZ[0]*(vectorX[1]*mPole[2][0] + vectorY[1]*mPole[2][1] + vectorZ[1]*mPole[2][2]);
384
    
385
386
387
            labFrameQuadrupole[offset+2]   = vectorX[0]*(vectorX[2]*mPole[0][0] + vectorY[2]*mPole[0][1] + vectorZ[2]*mPole[0][2]);
            labFrameQuadrupole[offset+2]  += vectorY[0]*(vectorX[2]*mPole[1][0] + vectorY[2]*mPole[1][1] + vectorZ[2]*mPole[1][2]);
            labFrameQuadrupole[offset+2]  += vectorZ[0]*(vectorX[2]*mPole[2][0] + vectorY[2]*mPole[2][1] + vectorZ[2]*mPole[2][2]);
Mark Friedrichs's avatar
Mark Friedrichs committed
388
     
389
390
391
            labFrameQuadrupole[offset+3]   = labFrameQuadrupole[offset+1];
            labFrameQuadrupole[offset+6]   = labFrameQuadrupole[offset+2];
            labFrameQuadrupole[offset+7]   = labFrameQuadrupole[offset+5];
392
    
Mark Friedrichs's avatar
Mark Friedrichs committed
393
        }
Mark Friedrichs's avatar
Mark Friedrichs committed
394

395
        particleIndex                 += gridDim.x*blockDim.x;
396
    }
397
     
Mark Friedrichs's avatar
Mark Friedrichs committed
398
399
400
401
402
403
404
}

void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
{

   // ---------------------------------------------------------------------------------------

405
    gpuContext gpu     = amoebaGpu->gpuContext;
Mark Friedrichs's avatar
Mark Friedrichs committed
406

407
408
    int numBlocks      = gpu->sim.blocks;
    int numThreads     = gpu->sim.threads_per_block;
Mark Friedrichs's avatar
Mark Friedrichs committed
409

410
411
412
413
414
415
416
    // copy molecular moments to lab frame moment arrays
    // check if chiral center requires moments to have sign flipped
    // compute lab frame moments

    cudaMemcpy( amoebaGpu->psLabFrameDipole->_pDevData,      amoebaGpu->psMolecularDipole->_pDevData,    3*gpu->sim.paddedNumberOfAtoms*sizeof( float ), cudaMemcpyDeviceToDevice ); 
    cudaMemcpy( amoebaGpu->psLabFrameQuadrupole->_pDevData, amoebaGpu->psMolecularQuadrupole->_pDevData, 9*gpu->sim.paddedNumberOfAtoms*sizeof( float ), cudaMemcpyDeviceToDevice ); 

417
418
419
420
    kCudaComputeCheckChiral_kernel<<< numBlocks, numThreads>>> ( );
    LAUNCHERROR("kCudaComputeCheckChiral");

    kCudaComputeLabFrameMoments_kernel<<< numBlocks, numThreads>>> ( );
Mark Friedrichs's avatar
Mark Friedrichs committed
421
    LAUNCHERROR("kCudaComputeLabFrameMoments");
Mark Friedrichs's avatar
Mark Friedrichs committed
422
423
424

}

425
static void kSetupAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaGeneralizedKirkwood ) 
Mark Friedrichs's avatar
Mark Friedrichs committed
426
{
427
    std::string methodName  = "kSetupAmoebaMultipoleForces";
Mark Friedrichs's avatar
Mark Friedrichs committed
428
429
430
431
432

    // compute lab frame moments

    cudaComputeAmoebaLabFrameMoments( amoebaGpu );

Mark Friedrichs's avatar
Mark Friedrichs committed
433
    if( 0 ){
434
        gpuContext gpu                        = amoebaGpu->gpuContext;
Mark Friedrichs's avatar
Mark Friedrichs committed
435
436
437
        std::vector<int> fileId;
        //fileId.push_back( 0 );
        VectorOfDoubleVectors outputVector;
438
439
440
        //cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4,              outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
        cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psLabFrameDipole,     outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
        cudaLoadCudaFloatArray( gpu->natoms,  9, amoebaGpu->psLabFrameQuadrupole, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
Mark Friedrichs's avatar
Mark Friedrichs committed
441
442
443
        cudaWriteVectorOfDoubleVectorsToFile( "CudaLabMoments", fileId, outputVector );
    }   

Mark Friedrichs's avatar
Mark Friedrichs committed
444
445
446
447
448
449
    // compute fixed E-field and mutual induced field 

    if( hasAmoebaGeneralizedKirkwood ){
        cudaComputeAmoebaFixedEAndGkFields( amoebaGpu );
        cudaComputeAmoebaMutualInducedAndGkField( amoebaGpu );
    } else {
450

451
        if( amoebaGpu->multipoleNonbondedMethod  == AMOEBA_NO_CUTOFF ){
452

453
454
            cudaComputeAmoebaFixedEField( amoebaGpu );
            cudaComputeAmoebaMutualInducedField( amoebaGpu );
455

456
        } else {
457

458
            gpuContext gpu  = amoebaGpu->gpuContext;
Mark Friedrichs's avatar
Mark Friedrichs committed
459
460
461
462
            kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
            LAUNCHERROR("kFindBlockBoundsPeriodic");
            kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
            LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
463
464
465
466
467
468
469
470
471

            compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);

            //compactStream( gpu->compactPlan, 
            //               gpu->sim.pInteractingWorkUnit, unsigned int* dOut
            //               amoebaGpu->psWorkUnit->_pDevData, const unsigned int* dIn
            //               gpu->sim.pInteractionFlag,        const unsigned int* dValid
            //               gpu->sim.workUnits,               gpu
            //               gpu->sim.pInteractionCount);
Mark Friedrichs's avatar
Mark Friedrichs committed
472
473
474
            kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
                    sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
            LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
475

476
            cudaComputeAmoebaPmeFixedEField( amoebaGpu );
477
            cudaComputeAmoebaPmeMutualInducedField( amoebaGpu );
478
        }
Mark Friedrichs's avatar
Mark Friedrichs committed
479
480
481
482
    }

    // check if induce dipole calculation converged -- abort if it did not

483
    if( amoebaGpu->mutualInducedDone  == 0 ){
484
       throw OpenMM::OpenMMException("Induced dipole calculation did not converge" );
Mark Friedrichs's avatar
Mark Friedrichs committed
485
486
    }

487
488
489
490
491
492
493
}

void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaGeneralizedKirkwood ) 
{

    kSetupAmoebaMultipoleForces(amoebaGpu, hasAmoebaGeneralizedKirkwood ); 

Mark Friedrichs's avatar
Mark Friedrichs committed
494
495
    // calculate electrostatic forces

496
    if( amoebaGpu->multipoleNonbondedMethod  == AMOEBA_NO_CUTOFF ){
497
        cudaComputeAmoebaElectrostatic( amoebaGpu, (hasAmoebaGeneralizedKirkwood ? 0 : 1) );
498
    } else {
Mark Friedrichs's avatar
Mark Friedrichs committed
499
        cudaComputeAmoebaPmeElectrostatic( amoebaGpu );
500
    }
501

Mark Friedrichs's avatar
Mark Friedrichs committed
502
}
503
504
505
506

void kCalculateAmoebaMultipolePotential(amoebaGpuContext amoebaGpu ) 
{

507
    // setup
508

509
    kSetupAmoebaMultipoleForces(amoebaGpu, false ); 
510

511
    // calculate electrostatic potential
512

513
    cudaComputeAmoebaElectrostaticPotential( amoebaGpu );
514

515
516
517
518
}

void kCalculateAmoebaSystemMultipoleMoments(amoebaGpuContext amoebaGpu, const OpenMM::Vec3& origin, std::vector< double >& outputMultipoleMoments ) 
{
519

520
    // setup
521

522
    kSetupAmoebaMultipoleForces(amoebaGpu, false ); 
523

524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
    gpuContext gpu         = amoebaGpu->gpuContext;

    gpu->psPosq4->Download();
    gpu->psVelm4->Download();
    float4* posq           = gpu->psPosq4->_pSysData;    
    float4* velm           = gpu->psVelm4->_pSysData;    
    float totalMass        = 0.0f;
    float centerOfMass[3]  = { 0.0f, 0.0f, 0.0f };
    for( unsigned int ii  = 0; ii < gpu->natoms; ii++ ){
        float mass;
        if( velm->w > 0.0f ){
            mass        = 1.0f/velm[ii].w;
        } else {
            mass        = 0.0f;
        }
        totalMass        += mass;
        centerOfMass[0]  += mass*posq[ii].x;
        centerOfMass[1]  += mass*posq[ii].y;
        centerOfMass[2]  += mass*posq[ii].z;
543
544
    }

545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
    std::vector<float4>  posqLocal(gpu->natoms);
    if( totalMass > 0.0f ){
        centerOfMass[0]  /= totalMass;
        centerOfMass[1]  /= totalMass;
        centerOfMass[2]  /= totalMass;
    }
    for( unsigned int ii  = 0; ii < gpu->natoms; ii++ ){
        posqLocal[ii].x = posq[ii].x  - centerOfMass[0];
        posqLocal[ii].y = posq[ii].y  - centerOfMass[1];
        posqLocal[ii].z = posq[ii].z  - centerOfMass[2];
        posqLocal[ii].w = posq[ii].w;
    }

    float netchg  = 0.0f;

    float xdpl    = 0.0f;
    float ydpl    = 0.0f;
    float zdpl    = 0.0f;

    float xxqdp   = 0.0f;
    float xyqdp   = 0.0f;
    float xzqdp   = 0.0f;

    float yxqdp   = 0.0f;
    float yyqdp   = 0.0f;
    float yzqdp   = 0.0f;

    float zxqdp   = 0.0f;
    float zyqdp   = 0.0f;
    float zzqdp   = 0.0f;

    amoebaGpu->psLabFrameDipole->Download();
    float* labFrameDipole      = amoebaGpu->psLabFrameDipole->_pSysData;    

    amoebaGpu->psInducedDipole->Download();
    float* inducedDipole       = amoebaGpu->psInducedDipole->_pSysData;    

    amoebaGpu->psLabFrameQuadrupole->Download();
    float* labFrameQuadrupole  = amoebaGpu->psLabFrameQuadrupole->_pSysData;    
    for( unsigned int ii  = 0; ii < gpu->natoms; ii++ ){

        netchg              += posqLocal[ii].w;

        float netDipoleX     = (labFrameDipole[3*ii]    + inducedDipole[3*ii]);
        float netDipoleY     = (labFrameDipole[3*ii+1]  + inducedDipole[3*ii+1]);
        float netDipoleZ     = (labFrameDipole[3*ii+2]  + inducedDipole[3*ii+2]);

        xdpl    += posqLocal[ii].x*posqLocal[ii].w + netDipoleX;
        ydpl    += posqLocal[ii].y*posqLocal[ii].w + netDipoleY;
        zdpl    += posqLocal[ii].z*posqLocal[ii].w + netDipoleZ;

        xxqdp   += posqLocal[ii].x*posqLocal[ii].x*posqLocal[ii].w + 2.0f*posqLocal[ii].x*netDipoleX;
        xyqdp   += posqLocal[ii].x*posqLocal[ii].y*posqLocal[ii].w + posqLocal[ii].x*netDipoleY + posqLocal[ii].y*netDipoleX;
        xzqdp   += posqLocal[ii].x*posqLocal[ii].z*posqLocal[ii].w + posqLocal[ii].x*netDipoleZ + posqLocal[ii].z*netDipoleX;

        yxqdp   += posqLocal[ii].y*posqLocal[ii].x*posqLocal[ii].w + posqLocal[ii].y*netDipoleX + posqLocal[ii].x*netDipoleY;
        yyqdp   += posqLocal[ii].y*posqLocal[ii].y*posqLocal[ii].w + 2.0f*posqLocal[ii].y*netDipoleY;
        yzqdp   += posqLocal[ii].y*posqLocal[ii].z*posqLocal[ii].w + posqLocal[ii].y*netDipoleZ + posqLocal[ii].z*netDipoleY;

        zxqdp   += posqLocal[ii].z*posqLocal[ii].x*posqLocal[ii].w + posqLocal[ii].z*netDipoleX + posqLocal[ii].x*netDipoleZ;
        zyqdp   += posqLocal[ii].z*posqLocal[ii].y*posqLocal[ii].w + posqLocal[ii].z*netDipoleY + posqLocal[ii].y*netDipoleZ;
        zzqdp   += posqLocal[ii].z*posqLocal[ii].z*posqLocal[ii].w + 2.0f*posqLocal[ii].z*netDipoleZ;
607
608
609

    }

610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
//  convert the quadrupole from traced to traceless form
 
    float qave   = (xxqdp + yyqdp + zzqdp)/3.0f;
          xxqdp  = 1.5f*(xxqdp-qave);
          xyqdp  = 1.5f*xyqdp;
          xzqdp  = 1.5f*xzqdp;
          yxqdp  = 1.5f*yxqdp;
          yyqdp  = 1.5f*(yyqdp-qave);
          yzqdp  = 1.5f*yzqdp;
          zxqdp  = 1.5f*zxqdp;
          zyqdp  = 1.5f*zyqdp;
          zzqdp  = 1.5f*(zzqdp-qave);

//  add the traceless atomic quadrupoles to total quadrupole

    for( unsigned int ii  = 0; ii < gpu->natoms; ii++ ){
        xxqdp  = xxqdp + 3.0f*labFrameQuadrupole[9*ii];
        xyqdp  = xyqdp + 3.0f*labFrameQuadrupole[9*ii+1];
        xzqdp  = xzqdp + 3.0f*labFrameQuadrupole[9*ii+2];
        yxqdp  = yxqdp + 3.0f*labFrameQuadrupole[9*ii+3];
        yyqdp  = yyqdp + 3.0f*labFrameQuadrupole[9*ii+4];
        yzqdp  = yzqdp + 3.0f*labFrameQuadrupole[9*ii+5];
        zxqdp  = zxqdp + 3.0f*labFrameQuadrupole[9*ii+6];
        zyqdp  = zyqdp + 3.0f*labFrameQuadrupole[9*ii+7];
        zzqdp  = zzqdp + 3.0f*labFrameQuadrupole[9*ii+8];
    }
 
    float debye                = 4.80321f;
    outputMultipoleMoments.resize( 13 );
639

640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
    outputMultipoleMoments[0]  = netchg;

    outputMultipoleMoments[1]  = xdpl*debye;
    outputMultipoleMoments[2]  = ydpl*debye;
    outputMultipoleMoments[3]  = zdpl*debye;
    
    outputMultipoleMoments[4]  = xxqdp*debye;
    outputMultipoleMoments[5]  = xyqdp*debye;
    outputMultipoleMoments[6]  = xzqdp*debye;

    outputMultipoleMoments[7]  = yxqdp*debye;
    outputMultipoleMoments[8]  = yyqdp*debye;
    outputMultipoleMoments[9]  = yzqdp*debye;

    outputMultipoleMoments[10] = zxqdp*debye;
    outputMultipoleMoments[11] = zyqdp*debye;
    outputMultipoleMoments[12] = zzqdp*debye;
657
658

}