kCalculateAmoebaCudaRotateFrame.cu 23.1 KB
Newer Older
Mark Friedrichs's avatar
Mark Friedrichs committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2009 Stanford University and the Authors.           *
 * Authors: Scott Le Grand, Peter Eastman                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */
Mark Friedrichs's avatar
Mark Friedrichs committed
26

Mark Friedrichs's avatar
Mark Friedrichs committed
27
#include "cudaKernels.h"
Mark Friedrichs's avatar
Mark Friedrichs committed
28
#include "amoebaCudaKernels.h"
Mark Friedrichs's avatar
Mark Friedrichs committed
29
#include "kCalculateAmoebaCudaUtilities.h"
30
#include "openmm/OpenMMException.h"
Mark Friedrichs's avatar
Mark Friedrichs committed
31
32
33
34
35
36
37
38
39
40

#include <stdio.h>
#include <cuda.h>
#include <cstdlib>
using namespace std; 

#define SQRT sqrtf

static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
41
extern __global__ void kFindInteractionsWithinBlocksPeriodic_kernel(unsigned int*);
Mark Friedrichs's avatar
Mark Friedrichs committed
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

void SetCalculateAmoebaMultipoleForcesSim(amoebaGpuContext amoebaGpu)
{
    cudaError_t status;
    gpuContext gpu = amoebaGpu->gpuContext;
    status         = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));     
    RTERROR(status, "SetCalculateAmoebaMultipoleForcesSim: cudaMemcpyToSymbol: SetSim copy to cSim failed");
    status         = cudaMemcpyToSymbol(cAmoebaSim, &amoebaGpu->amoebaSim, sizeof(cudaAmoebaGmxSimulation));     
    RTERROR(status, "SetCalculateAmoebaMultipoleForcesSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed");
}

void GetCalculateAmoebaMultipoleForcesSim(amoebaGpuContext amoebaGpu)
{
    cudaError_t status;
    gpuContext gpu = amoebaGpu->gpuContext;
    status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));     
    RTERROR(status, "GetCalculateAmoebaMultipoleForcesSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed");
    status = cudaMemcpyFromSymbol(&amoebaGpu->amoebaSim, cAmoebaSim, sizeof(cudaAmoebaGmxSimulation));     
    RTERROR(status, "GetCalculateAmoebaMultipoleForcesSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}

63
__device__ static float normVector3( float* vector )
Mark Friedrichs's avatar
Mark Friedrichs committed
64
65
{

66
67
68
    float norm                     = DOT3( vector, vector );
    float returnNorm               = SQRT( norm );
    norm                           = returnNorm > 0.0f ? 1.0f/returnNorm : 0.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
69

70
71
72
    vector[0]                     *= norm;
    vector[1]                     *= norm;
    vector[2]                     *= norm;
Mark Friedrichs's avatar
Mark Friedrichs committed
73
74
75
76

    return returnNorm;
}

77
78
79
80
81
82
// ZThenX      == 0
// Bisector    == 1
// ZBisect     == 2
// ThreeFold   == 3
// ZOnly       == 4
// NoAxisType  == 5
83

Mark Friedrichs's avatar
Mark Friedrichs committed
84
85
86
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
87
#elif (__CUDA_ARCH__ >= 120)
Mark Friedrichs's avatar
Mark Friedrichs committed
88
89
90
91
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
92
void kCudaComputeCheckChiral_kernel( void )
Mark Friedrichs's avatar
Mark Friedrichs committed
93
94
{

95
96
97
98
    const int AD           = 0;
    const int BD           = 1;
    const int CD           = 2;
    const int C            = 3;
99
100
    float delta[4][3];
 
101
102
103
104
    float4* particleCoord         = cSim.pPosq;
    int4* multiPoleParticles      = cAmoebaSim.pMultipoleParticlesIdsAndAxisType;
    float* labFrameDipole         = cAmoebaSim.pLabFrameDipole;
    float* labFrameQuadrupole     = cAmoebaSim.pLabFrameQuadrupole;
105
106
107
 
    // ---------------------------------------------------------------------------------------
 
108
109
    int particleIndex             = blockIdx.x*blockDim.x + threadIdx.x;
    int numberOfParticles         = cSim.atoms;
110
111
112
113
    while( particleIndex < numberOfParticles )
    { 
        // skip z-then-x
    
114
        int axisType              = multiPoleParticles[particleIndex].w; 
Mark Friedrichs's avatar
Mark Friedrichs committed
115
        if( axisType != 0 && multiPoleParticles[particleIndex].x >= 0 && multiPoleParticles[particleIndex].y >=0 && multiPoleParticles[particleIndex].z >= 0 )
116
117
118
119
        {
     
            // ---------------------------------------------------------------------------------------
         
120
121
122
123
            int particleA                 = particleIndex;
            int particleB                 = multiPoleParticles[particleIndex].z;
            int particleC                 = multiPoleParticles[particleIndex].x;
            int particleD                 = multiPoleParticles[particleIndex].y;
124
        
125
126
127
            delta[AD][0]                  = particleCoord[particleA].x - particleCoord[particleD].x;
            delta[AD][1]                  = particleCoord[particleA].y - particleCoord[particleD].y;
            delta[AD][2]                  = particleCoord[particleA].z - particleCoord[particleD].z;
128
        
129
130
131
            delta[BD][0]                  = particleCoord[particleB].x - particleCoord[particleD].x;
            delta[BD][1]                  = particleCoord[particleB].y - particleCoord[particleD].y;
            delta[BD][2]                  = particleCoord[particleB].z - particleCoord[particleD].z;
132
        
133
134
135
            delta[CD][0]                  = particleCoord[particleC].x - particleCoord[particleD].x;
            delta[CD][1]                  = particleCoord[particleC].y - particleCoord[particleD].y;
            delta[CD][2]                  = particleCoord[particleC].z - particleCoord[particleD].z;
136
        
137
138
139
            delta[C][0]                   = delta[BD][1]*delta[CD][2] - delta[BD][2]*delta[CD][1];
            delta[C][1]                   = delta[CD][1]*delta[AD][2] - delta[CD][2]*delta[AD][1];
            delta[C][2]                   = delta[AD][1]*delta[BD][2] - delta[AD][2]*delta[BD][1];
140
         
141
            float volume                  = delta[C][0]*delta[AD][0] + delta[C][1]*delta[BD][0] + delta[C][2]*delta[CD][0];
142
143
144
145
146
147
148
149
150
            if( volume < 0.0 ){
                labFrameDipole[particleIndex*3+1]            *= -1.0f; // pole(3,i)
                labFrameQuadrupole[particleIndex*9+1]        *= -1.0f; // pole(6,i)  && pole(8,i)
                labFrameQuadrupole[particleIndex*9+3]        *= -1.0f; // pole(10,i) && pole(12,i)
                labFrameQuadrupole[particleIndex*9+5]        *= -1.0f; // pole(6,i)  && pole(8,i)
                labFrameQuadrupole[particleIndex*9+7]        *= -1.0f; // pole(10,i) && pole(12,i)
            }
        }
    
151
        particleIndex                 += gridDim.x*blockDim.x;
152
    }    
153
}
Mark Friedrichs's avatar
Mark Friedrichs committed
154

155
156
157
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
158
#elif (__CUDA_ARCH__ >= 120)
159
160
161
162
163
164
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kCudaComputeLabFrameMoments_kernel( void )
{
Mark Friedrichs's avatar
Mark Friedrichs committed
165

166
167
168
169
    float vectorX[3];
    float vectorY[3];
    float vectorZ[3];
 
170
    int particleIndex             = blockIdx.x*blockDim.x + threadIdx.x;
171

172
173
174
175
    float4* particleCoord         = cSim.pPosq;
    int4* multiPoleParticles      = cAmoebaSim.pMultipoleParticlesIdsAndAxisType;
    float* labFrameDipole         = cAmoebaSim.pLabFrameDipole;
    float* labFrameQuadrupole     = cAmoebaSim.pLabFrameQuadrupole;
176
177
178
179
180
181
182
183
184
 
    // get coordinates of this atom and the z & x axis atoms
    // compute the vector between the atoms and 1/sqrt(d2), d2 is distance between
    // this atom and the axis atom
 
    // this atom is referred to as the k-atom in notes below
 
    // code common to ZThenX and Bisector
    
185
    while( particleIndex < cSim.atoms )
186
    {
Mark Friedrichs's avatar
Mark Friedrichs committed
187
188
189

        if( multiPoleParticles[particleIndex].x >= 0 && multiPoleParticles[particleIndex].z >= 0 )
        {
190
            float4 coordinatesThisParticle    = particleCoord[particleIndex];
191
         
192
193
            int multipoleParticleIndex        = multiPoleParticles[particleIndex].z;
            float4 coordinatesAxisParticle    = particleCoord[multipoleParticleIndex];
Mark Friedrichs's avatar
Mark Friedrichs committed
194
         
195
196
197
            vectorZ[0]                        = coordinatesAxisParticle.x - coordinatesThisParticle.x;
            vectorZ[1]                        = coordinatesAxisParticle.y - coordinatesThisParticle.y;
            vectorZ[2]                        = coordinatesAxisParticle.z - coordinatesThisParticle.z;
Mark Friedrichs's avatar
Mark Friedrichs committed
198
              
199
200
            multipoleParticleIndex            = multiPoleParticles[particleIndex].x; 
            coordinatesAxisParticle           = particleCoord[multipoleParticleIndex];
Mark Friedrichs's avatar
Mark Friedrichs committed
201
         
202
203
204
            vectorX[0]                        = coordinatesAxisParticle.x - coordinatesThisParticle.x;
            vectorX[1]                        = coordinatesAxisParticle.y - coordinatesThisParticle.y;
            vectorX[2]                        = coordinatesAxisParticle.z - coordinatesThisParticle.z;
Mark Friedrichs's avatar
Mark Friedrichs committed
205
         
206
            int axisType                      = multiPoleParticles[particleIndex].w; 
207
    
Mark Friedrichs's avatar
Mark Friedrichs committed
208
209
210
211
            /*
                z-only
                   (1) norm z
                   (2) select random x
212
                   (3) x  = x - (x.z)z
Mark Friedrichs's avatar
Mark Friedrichs committed
213
214
215
216
217
                   (4) norm x
        
                z-then-x
                   (1) norm z
                   (2) norm x (not needed)
218
                   (3) x  = x - (x.z)z
Mark Friedrichs's avatar
Mark Friedrichs committed
219
220
221
222
223
                   (4) norm x
        
                bisector
                   (1) norm z
                   (2) norm x 
224
                   (3) z  = x + z
Mark Friedrichs's avatar
Mark Friedrichs committed
225
                   (4) norm z
226
                   (5) x  = x - (x.z)z 
Mark Friedrichs's avatar
Mark Friedrichs committed
227
228
229
230
231
232
                   (6) norm x 
        
                z-bisect
                   (1) norm z
                   (2) norm x 
                   (3) norm y 
233
                   (3) x  = x + y
Mark Friedrichs's avatar
Mark Friedrichs committed
234
                   (4) norm x
235
                   (5) x  = x - (x.z)z 
Mark Friedrichs's avatar
Mark Friedrichs committed
236
237
238
239
240
241
                   (6) norm x 
        
                3-fold
                   (1) norm z
                   (2) norm x 
                   (3) norm y 
242
                   (4) z  = x + y + z
Mark Friedrichs's avatar
Mark Friedrichs committed
243
                   (5) norm z
244
                   (6) x  = x - (x.z)z 
Mark Friedrichs's avatar
Mark Friedrichs committed
245
246
247
248
249
250
                   (7) norm x 
        
            */
        
            // branch based on axis type
             
251
            float sum                    = normVector3( vectorZ );
Mark Friedrichs's avatar
Mark Friedrichs committed
252
        
253
            if( axisType  == 1 ){
Mark Friedrichs's avatar
Mark Friedrichs committed
254
255
256
        
                // bisector
                
257
                sum                      = normVector3( vectorX );
Mark Friedrichs's avatar
Mark Friedrichs committed
258
                
259
260
261
                vectorZ[0]              += vectorX[0];
                vectorZ[1]              += vectorX[1];
                vectorZ[2]              += vectorX[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
262
           
263
                sum                      = normVector3( vectorZ );
Mark Friedrichs's avatar
Mark Friedrichs committed
264
        
265
            } else if( axisType  == 2 || axisType == 3 ){ 
Mark Friedrichs's avatar
Mark Friedrichs committed
266
267
268
         
                // z-bisect
        
269
                multipoleParticleIndex   = multiPoleParticles[particleIndex].y; 
Mark Friedrichs's avatar
Mark Friedrichs committed
270
                if( multipoleParticleIndex >= 0 && multipoleParticleIndex < cSim.atoms ){
271
272
273
274
                    coordinatesAxisParticle  = particleCoord[multipoleParticleIndex];
                    vectorY[0]               = coordinatesAxisParticle.x - coordinatesThisParticle.x;
                    vectorY[1]               = coordinatesAxisParticle.y - coordinatesThisParticle.y;
                    vectorY[2]               = coordinatesAxisParticle.z - coordinatesThisParticle.z;
275
            
276
277
                    sum                      = normVector3( vectorY );
                    sum                      = normVector3( vectorX );
278
            
279
                    if( axisType  == 2 ){
Mark Friedrichs's avatar
Mark Friedrichs committed
280
            
281
282
283
284
                        vectorX[0]          += vectorY[0];
                        vectorX[1]          += vectorY[1];
                        vectorX[2]          += vectorY[2];
                        sum                  = normVector3( vectorX );
Mark Friedrichs's avatar
Mark Friedrichs committed
285
286
287
288
289
             
                    } else { 
             
                        // 3-fold
                
290
291
292
293
                        vectorZ[0]          += vectorX[0] + vectorY[0];
                        vectorZ[1]          += vectorX[1] + vectorY[1];
                        vectorZ[2]          += vectorX[2] + vectorY[2];
                        sum                  = normVector3( vectorZ );
Mark Friedrichs's avatar
Mark Friedrichs committed
294
295
296
297
                    }
                }
         
            } else if( axisType >= 4 ){ 
298
        
299
300
301
                vectorX[0]              = 0.1f;
                vectorX[1]              = 0.1f;
                vectorX[2]              = 0.1f;
302
            }
Mark Friedrichs's avatar
Mark Friedrichs committed
303
            
304
            // x  = x - (x.z)z
Mark Friedrichs's avatar
Mark Friedrichs committed
305
        
306
            float dot          = vectorZ[0]*vectorX[0] + vectorZ[1]*vectorX[1] + vectorZ[2]*vectorX[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
307
                
308
309
310
            vectorX[0]        -= dot*vectorZ[0];
            vectorX[1]        -= dot*vectorZ[1];
            vectorX[2]        -= dot*vectorZ[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
311
             
312
            sum                = normVector3( vectorX );
Mark Friedrichs's avatar
Mark Friedrichs committed
313
        
314
315
316
            vectorY[0]         = (vectorZ[1]*vectorX[2]) - (vectorZ[2]*vectorX[1]);
            vectorY[1]         = (vectorZ[2]*vectorX[0]) - (vectorZ[0]*vectorX[2]);
            vectorY[2]         = (vectorZ[0]*vectorX[1]) - (vectorZ[1]*vectorX[0]);
Mark Friedrichs's avatar
Mark Friedrichs committed
317
318
319
320
         
            // use identity rotation matrix for unrecognized axis types
        
            if( axisType < 0 || axisType > 4 ){
321
        
322
323
324
                vectorX[0]  = 1.0f;
                vectorX[1]  = 0.0f;
                vectorX[2]  = 0.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
325
        
326
327
328
                vectorY[0]  = 0.0f;
                vectorY[1]  = 1.0f;
                vectorY[2]  = 0.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
329
        
330
331
332
                vectorZ[0]  = 0.0f;
                vectorZ[1]  = 0.0f;
                vectorZ[2]  = 1.0f;
Mark Friedrichs's avatar
Mark Friedrichs committed
333
334
            }
        
335
            unsigned int offset            = 3*particleIndex;
336
    
Mark Friedrichs's avatar
Mark Friedrichs committed
337
            float molDipole[3];
338
339
340
            molDipole[0]                   = labFrameDipole[offset];
            molDipole[1]                   = labFrameDipole[offset+1];
            molDipole[2]                   = labFrameDipole[offset+2];
341
            
Mark Friedrichs's avatar
Mark Friedrichs committed
342
            // set out-of-range elements to 0.0f
343
         
344
345
346
            labFrameDipole[offset]         = molDipole[0]*vectorX[0] + molDipole[1]*vectorY[0] + molDipole[2]*vectorZ[0];
            labFrameDipole[offset+1]       = molDipole[0]*vectorX[1] + molDipole[1]*vectorY[1] + molDipole[2]*vectorZ[1];
            labFrameDipole[offset+2]       = molDipole[0]*vectorX[2] + molDipole[1]*vectorY[2] + molDipole[2]*vectorZ[2];
Mark Friedrichs's avatar
Mark Friedrichs committed
347
348
349
350
            
            // ---------------------------------------------------------------------------------------
            
            float mPole[3][3];
351
            offset                         = 9*particleIndex;
Mark Friedrichs's avatar
Mark Friedrichs committed
352
            
353
354
355
            mPole[0][0]                    = labFrameQuadrupole[offset];
            mPole[0][1]                    = labFrameQuadrupole[offset+1];
            mPole[0][2]                    = labFrameQuadrupole[offset+2];
Mark Friedrichs's avatar
Mark Friedrichs committed
356
        
357
358
359
            mPole[1][0]                    = labFrameQuadrupole[offset+3];
            mPole[1][1]                    = labFrameQuadrupole[offset+4];
            mPole[1][2]                    = labFrameQuadrupole[offset+5];
Mark Friedrichs's avatar
Mark Friedrichs committed
360
        
361
362
363
            mPole[2][0]                    = labFrameQuadrupole[offset+6];
            mPole[2][1]                    = labFrameQuadrupole[offset+7];
            mPole[2][2]                    = labFrameQuadrupole[offset+8];
Mark Friedrichs's avatar
Mark Friedrichs committed
364
        
365
366
367
            labFrameQuadrupole[offset+8]   = vectorX[2]*(vectorX[2]*mPole[0][0] + vectorY[2]*mPole[0][1] + vectorZ[2]*mPole[0][2]);
            labFrameQuadrupole[offset+8]  += vectorY[2]*(vectorX[2]*mPole[1][0] + vectorY[2]*mPole[1][1] + vectorZ[2]*mPole[1][2]);
            labFrameQuadrupole[offset+8]  += vectorZ[2]*(vectorX[2]*mPole[2][0] + vectorY[2]*mPole[2][1] + vectorZ[2]*mPole[2][2]);
368
    
369
370
371
            labFrameQuadrupole[offset+4]   = vectorX[1]*(vectorX[1]*mPole[0][0] + vectorY[1]*mPole[0][1] + vectorZ[1]*mPole[0][2]);
            labFrameQuadrupole[offset+4]  += vectorY[1]*(vectorX[1]*mPole[1][0] + vectorY[1]*mPole[1][1] + vectorZ[1]*mPole[1][2]);
            labFrameQuadrupole[offset+4]  += vectorZ[1]*(vectorX[1]*mPole[2][0] + vectorY[1]*mPole[2][1] + vectorZ[1]*mPole[2][2]);
372
    
373
374
375
            labFrameQuadrupole[offset+5]   = vectorX[1]*(vectorX[2]*mPole[0][0] + vectorY[2]*mPole[0][1] + vectorZ[2]*mPole[0][2]);
            labFrameQuadrupole[offset+5]  += vectorY[1]*(vectorX[2]*mPole[1][0] + vectorY[2]*mPole[1][1] + vectorZ[2]*mPole[1][2]);
            labFrameQuadrupole[offset+5]  += vectorZ[1]*(vectorX[2]*mPole[2][0] + vectorY[2]*mPole[2][1] + vectorZ[2]*mPole[2][2]);
376
    
377
378
379
            labFrameQuadrupole[offset]     = vectorX[0]*(vectorX[0]*mPole[0][0] + vectorY[0]*mPole[0][1] + vectorZ[0]*mPole[0][2]);
            labFrameQuadrupole[offset]    += vectorY[0]*(vectorX[0]*mPole[1][0] + vectorY[0]*mPole[1][1] + vectorZ[0]*mPole[1][2]);
            labFrameQuadrupole[offset]    += vectorZ[0]*(vectorX[0]*mPole[2][0] + vectorY[0]*mPole[2][1] + vectorZ[0]*mPole[2][2]);
380
    
381
382
383
            labFrameQuadrupole[offset+1]   = vectorX[0]*(vectorX[1]*mPole[0][0] + vectorY[1]*mPole[0][1] + vectorZ[1]*mPole[0][2]);
            labFrameQuadrupole[offset+1]  += vectorY[0]*(vectorX[1]*mPole[1][0] + vectorY[1]*mPole[1][1] + vectorZ[1]*mPole[1][2]);
            labFrameQuadrupole[offset+1]  += vectorZ[0]*(vectorX[1]*mPole[2][0] + vectorY[1]*mPole[2][1] + vectorZ[1]*mPole[2][2]);
384
    
385
386
387
            labFrameQuadrupole[offset+2]   = vectorX[0]*(vectorX[2]*mPole[0][0] + vectorY[2]*mPole[0][1] + vectorZ[2]*mPole[0][2]);
            labFrameQuadrupole[offset+2]  += vectorY[0]*(vectorX[2]*mPole[1][0] + vectorY[2]*mPole[1][1] + vectorZ[2]*mPole[1][2]);
            labFrameQuadrupole[offset+2]  += vectorZ[0]*(vectorX[2]*mPole[2][0] + vectorY[2]*mPole[2][1] + vectorZ[2]*mPole[2][2]);
Mark Friedrichs's avatar
Mark Friedrichs committed
388
     
389
390
391
            labFrameQuadrupole[offset+3]   = labFrameQuadrupole[offset+1];
            labFrameQuadrupole[offset+6]   = labFrameQuadrupole[offset+2];
            labFrameQuadrupole[offset+7]   = labFrameQuadrupole[offset+5];
392
    
Mark Friedrichs's avatar
Mark Friedrichs committed
393
        }
Mark Friedrichs's avatar
Mark Friedrichs committed
394

395
        particleIndex                 += gridDim.x*blockDim.x;
396
    }
397
     
Mark Friedrichs's avatar
Mark Friedrichs committed
398
399
400
401
402
403
404
}

void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
{

   // ---------------------------------------------------------------------------------------

405
    gpuContext gpu     = amoebaGpu->gpuContext;
Mark Friedrichs's avatar
Mark Friedrichs committed
406

407
408
    int numBlocks      = gpu->sim.blocks;
    int numThreads     = gpu->sim.threads_per_block;
Mark Friedrichs's avatar
Mark Friedrichs committed
409

410
411
412
413
414
415
416
    // copy molecular moments to lab frame moment arrays
    // check if chiral center requires moments to have sign flipped
    // compute lab frame moments

    cudaMemcpy( amoebaGpu->psLabFrameDipole->_pDevData,      amoebaGpu->psMolecularDipole->_pDevData,    3*gpu->sim.paddedNumberOfAtoms*sizeof( float ), cudaMemcpyDeviceToDevice ); 
    cudaMemcpy( amoebaGpu->psLabFrameQuadrupole->_pDevData, amoebaGpu->psMolecularQuadrupole->_pDevData, 9*gpu->sim.paddedNumberOfAtoms*sizeof( float ), cudaMemcpyDeviceToDevice ); 

417
418
419
420
    kCudaComputeCheckChiral_kernel<<< numBlocks, numThreads>>> ( );
    LAUNCHERROR("kCudaComputeCheckChiral");

    kCudaComputeLabFrameMoments_kernel<<< numBlocks, numThreads>>> ( );
Mark Friedrichs's avatar
Mark Friedrichs committed
421
    LAUNCHERROR("kCudaComputeLabFrameMoments");
Mark Friedrichs's avatar
Mark Friedrichs committed
422
423
424

}

425
void kSetupAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaGeneralizedKirkwood ) 
Mark Friedrichs's avatar
Mark Friedrichs committed
426
{
427
    std::string methodName  = "kSetupAmoebaMultipoleForces";
Mark Friedrichs's avatar
Mark Friedrichs committed
428
429
430
431
432

    // compute lab frame moments

    cudaComputeAmoebaLabFrameMoments( amoebaGpu );

Mark Friedrichs's avatar
Mark Friedrichs committed
433
    if( 0 ){
434
        gpuContext gpu                        = amoebaGpu->gpuContext;
Mark Friedrichs's avatar
Mark Friedrichs committed
435
436
437
        std::vector<int> fileId;
        //fileId.push_back( 0 );
        VectorOfDoubleVectors outputVector;
438
439
440
        //cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4,              outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
        cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psLabFrameDipole,     outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
        cudaLoadCudaFloatArray( gpu->natoms,  9, amoebaGpu->psLabFrameQuadrupole, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
Mark Friedrichs's avatar
Mark Friedrichs committed
441
442
443
        cudaWriteVectorOfDoubleVectorsToFile( "CudaLabMoments", fileId, outputVector );
    }   

Mark Friedrichs's avatar
Mark Friedrichs committed
444
445
446
447
448
449
    // compute fixed E-field and mutual induced field 

    if( hasAmoebaGeneralizedKirkwood ){
        cudaComputeAmoebaFixedEAndGkFields( amoebaGpu );
        cudaComputeAmoebaMutualInducedAndGkField( amoebaGpu );
    } else {
450

451
        if( amoebaGpu->multipoleNonbondedMethod  == AMOEBA_NO_CUTOFF ){
452

453
454
            cudaComputeAmoebaFixedEField( amoebaGpu );
            cudaComputeAmoebaMutualInducedField( amoebaGpu );
455

456
        } else {
457

458
            gpuContext gpu  = amoebaGpu->gpuContext;
Mark Friedrichs's avatar
Mark Friedrichs committed
459
460
461
462
            kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
            LAUNCHERROR("kFindBlockBoundsPeriodic");
            kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
            LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
463
464
465
466
467
468
469
470
471

            compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);

            //compactStream( gpu->compactPlan, 
            //               gpu->sim.pInteractingWorkUnit, unsigned int* dOut
            //               amoebaGpu->psWorkUnit->_pDevData, const unsigned int* dIn
            //               gpu->sim.pInteractionFlag,        const unsigned int* dValid
            //               gpu->sim.workUnits,               gpu
            //               gpu->sim.pInteractionCount);
Mark Friedrichs's avatar
Mark Friedrichs committed
472
473
474
            kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
                    sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
            LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
475

476
            cudaComputeAmoebaPmeFixedEField( amoebaGpu );
477
            cudaComputeAmoebaPmeMutualInducedField( amoebaGpu );
478
        }
Mark Friedrichs's avatar
Mark Friedrichs committed
479
480
481
482
    }

    // check if induce dipole calculation converged -- abort if it did not

483
    if( amoebaGpu->mutualInducedDone  == 0 ){
484
       throw OpenMM::OpenMMException("Induced dipole calculation did not converge" );
Mark Friedrichs's avatar
Mark Friedrichs committed
485
486
    }

487
488
489
490
491
492
493
}

void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaGeneralizedKirkwood ) 
{

    kSetupAmoebaMultipoleForces(amoebaGpu, hasAmoebaGeneralizedKirkwood ); 

Mark Friedrichs's avatar
Mark Friedrichs committed
494
495
    // calculate electrostatic forces

496
    if( amoebaGpu->multipoleNonbondedMethod  == AMOEBA_NO_CUTOFF ){
497
        cudaComputeAmoebaElectrostatic( amoebaGpu, (hasAmoebaGeneralizedKirkwood ? 0 : 1) );
498
    } else {
Mark Friedrichs's avatar
Mark Friedrichs committed
499
        cudaComputeAmoebaPmeElectrostatic( amoebaGpu );
500
    }
501

Mark Friedrichs's avatar
Mark Friedrichs committed
502
}