kCalculateAmoebaCudaPmeMutualInducedField.cu 34.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
//-----------------------------------------------------------------------------------------

//-----------------------------------------------------------------------------------------

#include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"

#include <stdio.h>

using namespace std;

static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;

void SetCalculateAmoebaCudaPmeMutualInducedFieldSim(amoebaGpuContext amoebaGpu)
{
    cudaError_t status;
    gpuContext gpu = amoebaGpu->gpuContext;
    status         = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));    
    RTERROR(status, "SetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyToSymbol: SetSim copy to cSim failed");
    status         = cudaMemcpyToSymbol(cAmoebaSim, &amoebaGpu->amoebaSim, sizeof(cudaAmoebaGmxSimulation));    
    RTERROR(status, "SetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed");
}

void GetCalculateAmoebaCudaPmeMutualInducedFieldSim(amoebaGpuContext amoebaGpu)
{
    cudaError_t status;
    gpuContext gpu = amoebaGpu->gpuContext;
    status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));    
    RTERROR(status, "GetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed");
    status = cudaMemcpyFromSymbol(&amoebaGpu->amoebaSim, cAmoebaSim, sizeof(cudaAmoebaGmxSimulation));    
    RTERROR(status, "GetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}

Mark Friedrichs's avatar
Mark Friedrichs committed
36
37
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
38

Mark Friedrichs's avatar
Mark Friedrichs committed
39
#undef INCLUDE_MI_FIELD_BUFFERS
Mark Friedrichs's avatar
Mark Friedrichs committed
40
//#define INCLUDE_MI_FIELD_BUFFERS 
41
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
Mark Friedrichs's avatar
Mark Friedrichs committed
42
#ifdef INCLUDE_MI_FIELD_BUFFERS
Mark Friedrichs's avatar
Mark Friedrichs committed
43
44
45
46
47
48
49
50
51
52
__device__ void sumTempBuffer( MutualInducedParticle& atomI, MutualInducedParticle& atomJ ){

    atomI.tempBuffer[0]  += atomJ.tempBuffer[0];
    atomI.tempBuffer[1]  += atomJ.tempBuffer[1];
    atomI.tempBuffer[2]  += atomJ.tempBuffer[2];

    atomI.tempBufferP[0] += atomJ.tempBufferP[0];
    atomI.tempBufferP[1] += atomJ.tempBufferP[1];
    atomI.tempBufferP[2] += atomJ.tempBufferP[2];
}
Mark Friedrichs's avatar
Mark Friedrichs committed
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#endif

// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field

__device__ void setupMutualInducedFieldPairIxn_kernel( const MutualInducedParticle& atomI, const MutualInducedParticle& atomJ,
                                                       const float uscale, float4* delta, float* preFactor2 ) {

    // compute thedelta->xeal space portion of the Ewald summation
  
    delta->x                = atomJ.x - atomI.x;
    delta->y                = atomJ.y - atomI.y;
    delta->z                = atomJ.z - atomI.z;

    // pdelta->xiodic boundary conditions

    delta->x               -= floor(delta->x*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
    delta->y               -= floor(delta->y*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
    delta->z               -= floor(delta->z*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;

    float r2                = (delta->x*delta->x) + (delta->y*delta->y) + (delta->z*delta->z); 
    if( r2 <= cSim.nonbondedCutoffSqr ){
        float r           = sqrtf(r2);

        // calculate the error function damping terms

        float ralpha      = cSim.alphaEwald*r;

        float bn0         = erfc(ralpha)/r;
        float alsq2       = 2.0f*cSim.alphaEwald*cSim.alphaEwald;
        float alsq2n      = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald);
        float exp2a       = exp(-(ralpha*ralpha));
        alsq2n           *= alsq2;
        float bn1         = (bn0+alsq2n*exp2a)/r2;

        alsq2n           *= alsq2;
        float bn2         = (3.0f*bn1+alsq2n*exp2a)/r2;

        // compute the error function scaled and unscaled terms

        float scale3      = 1.0f;
        float scale5      = 1.0f;
        float damp        = atomI.damp*atomJ.damp;
        if( damp != 0.0f ){

            float ratio  = (r/damp);
                  ratio  = ratio*ratio*ratio;
            float pgamma = atomI.thole < atomJ.thole ? atomI.thole : atomJ.thole;
                  damp   = -pgamma*ratio;

            if( damp > -50.0f) {
                float expdamp = exp(damp);
                scale3        = 1.0f - expdamp;
                scale5        = 1.0f - expdamp*(1.0f-damp);
            }
        }
        float dsc3        = uscale*scale3;
        float dsc5        = uscale*scale5;

        float r3          = (r*r2);
        float r5          = (r3*r2);
        float rr3         = (1.0f-dsc3)/r3;
        float rr5         = 3.0f*(1.0f-dsc5)/r5;

        delta->w          = rr3 - bn1;
        *preFactor2       = bn2 - rr5;
    } else {
        delta->w = *preFactor2 = 0.0f;
    }
}

__device__ void calculateMutualInducedFieldPairIxn_kernel( const float inducedDipole[3], const float4 delta, const float preFactor2, float fieldSum[3] ) {

    float preFactor3  = preFactor2*(inducedDipole[0]*delta.x   + inducedDipole[1]*delta.y  + inducedDipole[2]*delta.z);

    fieldSum[0]      += preFactor3*delta.x + delta.w*inducedDipole[0];
    fieldSum[1]      += preFactor3*delta.y + delta.w*inducedDipole[1];
    fieldSum[2]      += preFactor3*delta.z + delta.w*inducedDipole[2];
}

__device__ void calculateMutualInducedFieldPairIxnNoAdd_kernel( const float inducedDipole[3], const float4 delta, const float preFactor2, float fieldSum[3] ) {

    float preFactor3  = preFactor2*(inducedDipole[0]*delta.x   + inducedDipole[1]*delta.y  + inducedDipole[2]*delta.z);

    fieldSum[0]       = preFactor3*delta.x + delta.w*inducedDipole[0];
    fieldSum[1]       = preFactor3*delta.y + delta.w*inducedDipole[1];
    fieldSum[2]       = preFactor3*delta.z + delta.w*inducedDipole[2];
}
140
141
142
143

// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field

__device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInducedParticle& atomI, MutualInducedParticle& atomJ,
144
                                                                    float uscale, float4 fields[3]
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#ifdef AMOEBA_DEBUG
                                                            , float4* pullBack
#endif

 ){

    // compute the real space portion of the Ewald summation
  
    float xr          = atomJ.x - atomI.x;
    float yr          = atomJ.y - atomI.y;
    float zr          = atomJ.z - atomI.z;

    // periodic boundary conditions

    xr               -= floor(xr*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
    yr               -= floor(yr*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
    zr               -= floor(zr*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;

    float r2          = xr*xr + yr* yr + zr*zr;
Peter Eastman's avatar
Peter Eastman committed
164
165
    if( r2 <= cSim.nonbondedCutoffSqr ){
        float r           = sqrtf(r2);
166

Peter Eastman's avatar
Peter Eastman committed
167
        // calculate the error function damping terms
168

Peter Eastman's avatar
Peter Eastman committed
169
        float ralpha      = cSim.alphaEwald*r;
170

Mark Friedrichs's avatar
Mark Friedrichs committed
171
        float bn0         = erfc(ralpha)/r;
Peter Eastman's avatar
Peter Eastman committed
172
173
174
175
        float alsq2       = 2.0f*cSim.alphaEwald*cSim.alphaEwald;
        float alsq2n      = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald);
        float exp2a       = exp(-(ralpha*ralpha));
        alsq2n           *= alsq2;
Mark Friedrichs's avatar
Mark Friedrichs committed
176
        float bn1         = (bn0+alsq2n*exp2a)/r2;
177

Peter Eastman's avatar
Peter Eastman committed
178
        alsq2n           *= alsq2;
Mark Friedrichs's avatar
Mark Friedrichs committed
179
        float bn2         = (3.0f*bn1+alsq2n*exp2a)/r2;
180

Peter Eastman's avatar
Peter Eastman committed
181
        // compute the error function scaled and unscaled terms
182

Peter Eastman's avatar
Peter Eastman committed
183
184
185
186
        float scale3      = 1.0f;
        float scale5      = 1.0f;
        float damp        = atomI.damp*atomJ.damp;
        if( damp != 0.0f ){
187

Peter Eastman's avatar
Peter Eastman committed
188
189
190
191
            float ratio  = (r/damp);
                  ratio  = ratio*ratio*ratio;
            float pgamma = atomI.thole < atomJ.thole ? atomI.thole : atomJ.thole;
                  damp   = -pgamma*ratio;
192

Peter Eastman's avatar
Peter Eastman committed
193
194
195
196
197
            if( damp > -50.0f) {
                float expdamp = exp(damp);
                scale3        = 1.0f - expdamp;
                scale5        = 1.0f - expdamp*(1.0f-damp);
            }
198
        }
Peter Eastman's avatar
Peter Eastman committed
199
200
        float dsc3        = uscale*scale3;
        float dsc5        = uscale*scale5;
201

Peter Eastman's avatar
Peter Eastman committed
202
203
204
        float r3          = (r*r2);
        float r5          = (r3*r2);
        float rr3         = (1.0f-dsc3)/r3;
Mark Friedrichs's avatar
Mark Friedrichs committed
205
        float rr5         = 3.0f*(1.0f-dsc5)/r5;
206

Mark Friedrichs's avatar
Mark Friedrichs committed
207
208
        float preFactor1  = rr3 - bn1;
        float preFactor2  = bn2 - rr5;
209

Mark Friedrichs's avatar
Mark Friedrichs committed
210
211
        float dukr        = atomJ.inducedDipole[0]*xr      + atomJ.inducedDipole[1]*yr      + atomJ.inducedDipole[2]*zr;
        float preFactor3  = preFactor2*dukr;
212

Mark Friedrichs's avatar
Mark Friedrichs committed
213
214
215
        fields[0].x       = preFactor3*xr + preFactor1*atomJ.inducedDipole[0];
        fields[1].x       = preFactor3*yr + preFactor1*atomJ.inducedDipole[1];
        fields[2].x       = preFactor3*zr + preFactor1*atomJ.inducedDipole[2];
216
217


Mark Friedrichs's avatar
Mark Friedrichs committed
218
219
        float duir        = atomI.inducedDipole[0]*xr      + atomI.inducedDipole[1]*yr      + atomI.inducedDipole[2]*zr;
        preFactor3        = preFactor2*duir;
220

Mark Friedrichs's avatar
Mark Friedrichs committed
221
222
223
        fields[0].y       = preFactor3*xr + preFactor1*atomI.inducedDipole[0];
        fields[1].y       = preFactor3*yr + preFactor1*atomI.inducedDipole[1];
        fields[2].y       = preFactor3*zr + preFactor1*atomI.inducedDipole[2];
224
225


Mark Friedrichs's avatar
Mark Friedrichs committed
226
227
        float pukr        = atomJ.inducedDipolePolar[0]*xr + atomJ.inducedDipolePolar[1]*yr + atomJ.inducedDipolePolar[2]*zr;
        preFactor3        = preFactor2*pukr;
228

Mark Friedrichs's avatar
Mark Friedrichs committed
229
230
231
        fields[0].z       = preFactor3*xr + preFactor1*atomJ.inducedDipolePolar[0];
        fields[1].z       = preFactor3*yr + preFactor1*atomJ.inducedDipolePolar[1];
        fields[2].z       = preFactor3*zr + preFactor1*atomJ.inducedDipolePolar[2];
232
233


Mark Friedrichs's avatar
Mark Friedrichs committed
234
235
236
237
238
        float puir        = atomI.inducedDipolePolar[0]*xr + atomI.inducedDipolePolar[1]*yr + atomI.inducedDipolePolar[2]*zr;
        preFactor3        = preFactor2*puir;
        fields[0].w       = preFactor3*xr + preFactor1*atomI.inducedDipolePolar[0];
        fields[1].w       = preFactor3*yr + preFactor1*atomI.inducedDipolePolar[1];
        fields[2].w       = preFactor3*zr + preFactor1*atomI.inducedDipolePolar[2];
239
240
241

    } else {

242
243
244
245
        fields[0].x       = 0.0f;
        fields[0].y       = 0.0f;
        fields[0].z       = 0.0f;
        fields[0].w       = 0.0f;
246
    
247
248
249
250
        fields[1].x       = 0.0f;
        fields[1].y       = 0.0f;
        fields[1].z       = 0.0f;
        fields[1].w       = 0.0f;
251
    
252
253
254
255
        fields[2].x       = 0.0f;
        fields[2].y       = 0.0f;
        fields[2].z       = 0.0f;
        fields[2].w       = 0.0f;
256
    }
Mark Friedrichs's avatar
Mark Friedrichs committed
257
/*
258
259
260
261
262
263
264
#ifdef AMOEBA_DEBUG
    pullBack[0].x = xr;
    pullBack[0].y = yr;
    pullBack[0].z = zr;
    pullBack[0].w = r2;

    pullBack[1].x = alsq2;
265
266
    pullBack[1].y = bn0;
    pullBack[1].z = bn2;
267
268
269
270
271
272
273
274
275
276
    pullBack[1].w = exp2a;

    pullBack[1].x = atomJ.x - atomI.x;
    pullBack[1].y = atomJ.y - atomI.y;
    pullBack[1].z = atomJ.z - atomI.z;
    pullBack[1].w = (atomJ.x - atomI.x)*(atomJ.x - atomI.x) + (atomJ.y - atomI.y)*(atomJ.y - atomI.y)+ (atomJ.z - atomI.z)*(atomJ.z - atomI.z);
    pullBack[1].x = scale3;
    pullBack[1].y = scale5;
    pullBack[1].z = scale7;
#endif
Mark Friedrichs's avatar
Mark Friedrichs committed
277
*/
278
279
280
281
}

// Include versions of the kernels for N^2 calculations.

Mark Friedrichs's avatar
Mark Friedrichs committed
282
#define METHOD_NAME(a, b) a##Cutoff##b
283
284
285
#include "kCalculateAmoebaCudaPmeMutualInducedField.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
Mark Friedrichs's avatar
Mark Friedrichs committed
286
#define METHOD_NAME(a, b) a##CutoffByWarp##b
287
288
289
290
291
#include "kCalculateAmoebaCudaPmeMutualInducedField.h"

__global__ 
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
292
#elif (__CUDA_ARCH__ >= 120)
293
294
295
296
297
298
299
300
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
static void kInitializeMutualInducedField_kernel(
                   int numberOfAtoms,
                   float* fixedEField,
                   float* fixedEFieldPolar,
301
                   float* polarizability )
302
303
{

Mark Friedrichs's avatar
Mark Friedrichs committed
304
    int pos = blockIdx.x*blockDim.x + threadIdx.x;
305
306
307
308
    while( pos < 3*cSim.atoms )
    {   
        fixedEField[pos]         *= polarizability[pos];
        fixedEFieldPolar[pos]    *= polarizability[pos];
309

310
311
        pos                      += blockDim.x*gridDim.x;
    }
312
313
314
315
316
317

}

__global__ 
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
318
#elif (__CUDA_ARCH__ >= 120)
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
static void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* arrayOfDeltas1, float* arrayOfDeltas2, float* epsilon )
{
    extern __shared__ float2 delta[];

    delta[threadIdx.x].x    = 0.0f;
    delta[threadIdx.x].y    = 0.0f;

    unsigned int pos = threadIdx.x;

    // load deltas

    while( pos < numberOfEntries )
    {   
        delta[threadIdx.x].x  += arrayOfDeltas1[pos];
        delta[threadIdx.x].y  += arrayOfDeltas2[pos];
        pos                   += blockDim.x*gridDim.x;
    }   
    __syncthreads();

    // sum the deltas

    for (int offset = 1; offset < blockDim.x; offset *= 2 )
    {   
        if (threadIdx.x + offset < blockDim.x && (threadIdx.x & (2*offset-1)) == 0)
        {
            delta[threadIdx.x].x   += delta[threadIdx.x+offset].x;
            delta[threadIdx.x].y   += delta[threadIdx.x+offset].y;
        }
        __syncthreads();
    }   

    // set epsilons

    if (threadIdx.x == 0)
    {   
        epsilon[0]  = delta[0].x > delta[0].y ? delta[0].x : delta[0].y;
359
        epsilon[0]  = 48.033324f*sqrtf( epsilon[0]/( (float) (numberOfEntries/3)) );
360
#ifdef AMOEBA_DEBUG
361
362
        epsilon[1]  = 48.033324f*sqrtf( delta[0].x/( (float) (numberOfEntries/3)) );
        epsilon[2]  = 48.033324f*sqrtf( delta[0].y/( (float) (numberOfEntries/3)) );
363
364
365
366
367
368
369
370
371
372
373
374
#endif
    }   
}

/**

   matrixProduct/matrixProductP contains epsilon**2 on output

*/
__global__ 
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
375
#elif (__CUDA_ARCH__ >= 120)
376
377
378
379
380
381
382
383
384
385
386
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
static void kSorUpdateMutualInducedField_kernel(
                   int numberOfEntries,    float* polarizability,
                   float* inducedDipole, float* inducedDipoleP,
                   float* fixedEField,   float* fixedEFieldP,
                   float* matrixProduct, float* matrixProductP )
{

Mark Friedrichs's avatar
Mark Friedrichs committed
387
    int pos = blockIdx.x*blockDim.x + threadIdx.x;
388
389
    while( pos < 3*cSim.atoms )
    {   
390

391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
        float previousDipole           = inducedDipole[pos];
        float previousDipoleP          = inducedDipoleP[pos];
    
        // add self terms to fields
    
        const float term               = (4.0f/3.0f)*(cSim.alphaEwald*cSim.alphaEwald*cSim.alphaEwald)/cAmoebaSim.sqrtPi;
        matrixProduct[pos]            +=  term*previousDipole;
        matrixProductP[pos]           +=  term*previousDipoleP;
    
        inducedDipole[pos]             = fixedEField[pos]     + polarizability[pos]*matrixProduct[pos];
        inducedDipoleP[pos]            = fixedEFieldP[pos]    + polarizability[pos]*matrixProductP[pos];
    
        const float polarSOR           = 0.70f;
        inducedDipole[pos]             = previousDipole   + polarSOR*( inducedDipole[pos]   - previousDipole  );   
        inducedDipoleP[pos]            = previousDipoleP  + polarSOR*( inducedDipoleP[pos]  - previousDipoleP );
    
        matrixProduct[pos]             = ( inducedDipole[pos]  - previousDipole  )*( inducedDipole[pos]  - previousDipole  );
        matrixProductP[pos]            = ( inducedDipoleP[pos] - previousDipoleP )*( inducedDipoleP[pos] - previousDipoleP );
409

410
411
        pos                           += blockDim.x*gridDim.x;
    }
412
413
414
415
416
417
418
419

}

// reduce psWorkArray_3_1
// reduce psWorkArray_3_2

static void kReduceMutualInducedFields(amoebaGpuContext amoebaGpu, CUDAStream<float>* outputArray, CUDAStream<float>* outputPolarArray )
{
420
421
422
    gpuContext gpu = amoebaGpu->gpuContext;
    kReduceFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
                               gpu->sim.paddedNumberOfAtoms*3, gpu->sim.outputBuffers,
423
                               amoebaGpu->psWorkArray_3_1->_pDevData, outputArray->_pDevData, 0 );
424
425
    LAUNCHERROR("kReducePmeMI_Fields1");

426
427
    kReduceFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
                               gpu->sim.paddedNumberOfAtoms*3, gpu->sim.outputBuffers,
428
                               amoebaGpu->psWorkArray_3_2->_pDevData, outputPolarArray->_pDevData, 0 );
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
    LAUNCHERROR("kReducePmeMI_Fields2");
}

/**---------------------------------------------------------------------------------------

   Compute mutual induce field

   @param amoebaGpu        amoebaGpu context

   --------------------------------------------------------------------------------------- */

static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuContext amoebaGpu,
                                                                  CUDAStream<float>* outputArray, CUDAStream<float>* outputPolarArray )
{
  
Mark Friedrichs's avatar
Mark Friedrichs committed
444
445
  static unsigned int threadsPerBlock  = 0;
  gpuContext gpu                       = amoebaGpu->gpuContext;
446
447

#ifdef AMOEBA_DEBUG
Mark Friedrichs's avatar
Mark Friedrichs committed
448
    static int iteration          = 1;
Mark Friedrichs's avatar
Mark Friedrichs committed
449
    int targetAtom                = 546;
450
451
    static const char* methodName = "cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply";
    if( 1 && amoebaGpu->log ){
Mark Friedrichs's avatar
Mark Friedrichs committed
452
        (void) fprintf( amoebaGpu->log, "%s\n", methodName );
453
454
455
        (void) fflush( amoebaGpu->log );
    }
    int paddedNumberOfAtoms                    = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
456
    int maxSlots                               = 10;
Mark Friedrichs's avatar
Mark Friedrichs committed
457
458
    CUDAStream<float4>* debugArray             = new CUDAStream<float4>(maxSlots*paddedNumberOfAtoms, 1, "DebugArray");
    memset( debugArray->_pSysData,      0, sizeof( float )*4*maxSlots*paddedNumberOfAtoms);
459
460
461
462
463
    debugArray->Upload();
#endif

    kClearFields_3( amoebaGpu, 2 );

Mark Friedrichs's avatar
Mark Friedrichs committed
464
465
466
467
468
469
470
471
472
473
    // on first pass, set threads/block

    if( threadsPerBlock == 0 ){  
        unsigned int maxThreads;
        if (gpu->sm_version >= SM_20)
            maxThreads = 384; 
        else if (gpu->sm_version >= SM_12)
            maxThreads = 128; 
        else
            maxThreads = 64; 
Mark Friedrichs's avatar
Mark Friedrichs committed
474
        threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(MutualInducedParticle), gpu->sharedMemoryPerBlock ), maxThreads);
Mark Friedrichs's avatar
Mark Friedrichs committed
475
476
    }    

Mark Friedrichs's avatar
Mark Friedrichs committed
477
#ifdef AMOEBA_DEBUG
478
    if( amoebaGpu->log ){
Mark Friedrichs's avatar
Mark Friedrichs committed
479
480
        (void) fprintf( amoebaGpu->log, "cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%lu shrd=%lu ixnCt=%lu workUnits=%u\n",
                        gpu->sim.nonbond_blocks, threadsPerBlock, gpu->bOutputBufferPerWarp,
481
482
483
484
                        sizeof(MutualInducedParticle), sizeof(MutualInducedParticle)*threadsPerBlock,
                        (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
        (void) fflush( amoebaGpu->log );
    }
Mark Friedrichs's avatar
Mark Friedrichs committed
485
#endif
486
487
488

    if (gpu->bOutputBufferPerWarp){
        kCalculateAmoebaPmeMutualInducedFieldCutoffByWarp_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(MutualInducedParticle)*threadsPerBlock>>>(
Mark Friedrichs's avatar
Mark Friedrichs committed
489
                                                                 gpu->sim.pInteractingWorkUnit,
Mark Friedrichs's avatar
Mark Friedrichs committed
490
491
                                                                 amoebaGpu->psWorkArray_3_1->_pDevData,
                                                                 amoebaGpu->psWorkArray_3_2->_pDevData );
492
493
494

    } else {

495
        kCalculateAmoebaPmeMutualInducedFieldCutoff_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(MutualInducedParticle)*threadsPerBlock>>>(
Mark Friedrichs's avatar
Mark Friedrichs committed
496
                                                                 gpu->sim.pInteractingWorkUnit,
Mark Friedrichs's avatar
Mark Friedrichs committed
497
498
                                                                 amoebaGpu->psWorkArray_3_1->_pDevData,
                                                                 amoebaGpu->psWorkArray_3_2->_pDevData );
499
500
501
502
503
504
505
    }
    LAUNCHERROR("kCalculateAmoebaPmeMutualInducedField");

    kReduceMutualInducedFields( amoebaGpu, outputArray, outputPolarArray );

#ifdef AMOEBA_DEBUG
    if( amoebaGpu->log && iteration == 1 ){
Mark Friedrichs's avatar
Mark Friedrichs committed
506
507
        (void) fprintf( amoebaGpu->log, "Finished maxtrixMultiply kernel execution %d -- Direct only -- self added in kSorUpdateMutualInducedField_kernel\n",
                        iteration ); (void) fflush( amoebaGpu->log );
508
509
        outputArray->Download();
        outputPolarArray->Download();
Mark Friedrichs's avatar
Mark Friedrichs committed
510
511
//        debugArray->Download();
        int maxPrint = 5000000;
512
513
514
515
516
517
518
519
        for( int ii = 0; ii < gpu->natoms; ii++ ){
            (void) fprintf( amoebaGpu->log, "%5d ", ii); 
 
             int indexOffset     = ii*3;
     
            // MI
 
            (void) fprintf( amoebaGpu->log,"Mult[%16.9e %16.9e %16.9e] ",
Mark Friedrichs's avatar
Mark Friedrichs committed
520
521
522
                            outputArray->_pSysData[indexOffset],
                            outputArray->_pSysData[indexOffset+1],
                            outputArray->_pSysData[indexOffset+2] );
523
524
525
526
     
            // MI polar
 
            (void) fprintf( amoebaGpu->log,"MultP[%16.9e %16.9e %16.9e]\n",
Mark Friedrichs's avatar
Mark Friedrichs committed
527
528
529
                            outputPolarArray->_pSysData[indexOffset],
                            outputPolarArray->_pSysData[indexOffset+1],
                            outputPolarArray->_pSysData[indexOffset+2] );
530
531
532
533
534
535
536
537
538
539
540
541
            if( ii == maxPrint && (gpu->natoms - maxPrint) > ii ){
                ii = gpu->natoms - maxPrint;
            }

        }
/*
        int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
        for( int jj = 0; jj < gpu->natoms; jj++ ){
            int debugIndex = jj; 
            (void) fprintf( amoebaGpu->log,"%5d PmeMIMult\n", jj );
            for( int kk = 0; kk < 7; kk++ ){
                (void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e %16.9e]\n",
Mark Friedrichs's avatar
Mark Friedrichs committed
542
543
                                debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
                                debugArray->_pSysData[debugIndex].z, debugArray->_pSysData[debugIndex].w );
544
545
546
547
548
549
550
551
552
553
                debugIndex += paddedNumberOfAtoms;
            }
            (void) fprintf( amoebaGpu->log,"\n" );

        }
*/
        (void) fflush( amoebaGpu->log );
        iteration++;

     }
Mark Friedrichs's avatar
Mark Friedrichs committed
554
//     delete debugArray;
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
#endif

}

/**---------------------------------------------------------------------------------------

   Compute mutual induce field

   @param amoebaGpu        amoebaGpu context

   --------------------------------------------------------------------------------------- */

static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu )
{
  
   // ---------------------------------------------------------------------------------------

572
//#define AMOEBA_DEBUG
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
#ifdef AMOEBA_DEBUG
    static const char* methodName = "cudaComputeAmoebaPmeMutualInducedFieldBySOR";
    static int timestep = 0;
    std::vector<int> fileId;
    timestep++;
    fileId.resize( 2 );
    fileId[0] = timestep;
    fileId[1] = 1;
#endif

   // ---------------------------------------------------------------------------------------

    int done;
    int iteration;

     gpuContext gpu    = amoebaGpu->gpuContext;

   // ---------------------------------------------------------------------------------------

    // set  E_Field & E_FieldPolar] to [ E_Field & E_FieldPolar]*Polarizability
    // initialize [ InducedDipole & InducedDipolePolar ] to [ E_Field & E_FieldPolar]*Polarizability

595
    kInitializeMutualInducedField_kernel<<< gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block >>>(
596
         gpu->natoms,
Mark Friedrichs's avatar
Mark Friedrichs committed
597
598
         amoebaGpu->psE_Field->_pDevData,
         amoebaGpu->psE_FieldPolar->_pDevData,
599
         amoebaGpu->psPolarizability->_pDevData );
600
601
    LAUNCHERROR("AmoebaPmeMutualInducedFieldSetup");  

602
603
604
    cudaMemcpy( amoebaGpu->psInducedDipole->_pDevData,        amoebaGpu->psE_Field->_pDevData,       3*gpu->sim.paddedNumberOfAtoms*sizeof( float ), cudaMemcpyDeviceToDevice );
    cudaMemcpy( amoebaGpu->psInducedDipolePolar->_pDevData,   amoebaGpu->psE_FieldPolar->_pDevData,  3*gpu->sim.paddedNumberOfAtoms*sizeof( float ), cudaMemcpyDeviceToDevice );

605
606
607
#ifdef AMOEBA_DEBUG
    if( amoebaGpu->log ){

Mark Friedrichs's avatar
Mark Friedrichs committed
608
609
        std::vector<int> fileId;
        VectorOfDoubleVectors outputVector;
610
611
612
613
        cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psE_Field,            outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
        cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psE_FieldPolar,       outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
        cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psInducedDipole,      outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
        cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psInducedDipolePolar, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
614
        cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeEFieldPolarity", fileId, outputVector );
615
616
617
    }   
#endif

618
619
620
621
622
623
624
625
626
627
    // if polarization type is direct, set flags signalling done and return

    if( amoebaGpu->amoebaSim.polarizationType )
    {
        amoebaGpu->mutualInducedDone          = 1;
        amoebaGpu->mutualInducedConverged     = 1;
        kCalculateAmoebaPMEInducedDipoleField( amoebaGpu );
        return;
    }

628
629
630
631
632
633
634
635
636
637
    // ---------------------------------------------------------------------------------------
 
    done      = 0;
    iteration = 1;

    while( !done ){

        // matrix multiply

        cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpu, amoebaGpu->psWorkVector[0],  amoebaGpu->psWorkVector[1] );
638
        kCalculateAmoebaPMEInducedDipoleField( amoebaGpu );
639
640
641

        // post matrix multiply

Mark Friedrichs's avatar
Mark Friedrichs committed
642
643
644
645
646
647
648
649
650
651
652
653
        if( 0 ){
            gpuContext gpu = amoebaGpu->gpuContext;
            std::vector<int> fileId;
            fileId.push_back( iteration );
            VectorOfDoubleVectors outputVector;
            cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psInducedDipole, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
            cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psInducedDipolePolar, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
            cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psWorkVector[0], outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
            cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psWorkVector[1], outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
            cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeDirectMIPre", fileId, outputVector );
        }

654
        kSorUpdateMutualInducedField_kernel<<< gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block >>>(
Mark Friedrichs's avatar
Mark Friedrichs committed
655
656
657
658
           gpu->natoms, amoebaGpu->psPolarizability->_pDevData,
           amoebaGpu->psInducedDipole->_pDevData, amoebaGpu->psInducedDipolePolar->_pDevData,
           amoebaGpu->psE_Field->_pDevData,       amoebaGpu->psE_FieldPolar->_pDevData,
           amoebaGpu->psWorkVector[0]->_pDevData, amoebaGpu->psWorkVector[1]->_pDevData );
659
660
        LAUNCHERROR("kSorUpdatePmeMutualInducedField");  

Mark Friedrichs's avatar
Mark Friedrichs committed
661
662
663
664
665
        if( 0 ){
            gpuContext gpu = amoebaGpu->gpuContext;
            std::vector<int> fileId;
            fileId.push_back( iteration );
            VectorOfDoubleVectors outputVector;
Mark Friedrichs's avatar
Mark Friedrichs committed
666
667
668
            cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psE_Field, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
            cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psE_FieldPolar, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
            cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psPolarizability, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
Mark Friedrichs's avatar
Mark Friedrichs committed
669
670
671
            cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psInducedDipole, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
            cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psInducedDipolePolar, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
            cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeDirectMI", fileId, outputVector );
Mark Friedrichs's avatar
Mark Friedrichs committed
672
            //exit(0);
Mark Friedrichs's avatar
Mark Friedrichs committed
673
        }
Mark Friedrichs's avatar
Mark Friedrichs committed
674

675
676
677
        // get total epsilon -- performing sums on gpu

        kReduceMutualInducedFieldDelta_kernel<<<1, amoebaGpu->epsilonThreadsPerBlock, 2*sizeof(float)*amoebaGpu->epsilonThreadsPerBlock>>>(
Mark Friedrichs's avatar
Mark Friedrichs committed
678
679
           3*gpu->natoms, amoebaGpu->psWorkVector[0]->_pDevData, amoebaGpu->psWorkVector[1]->_pDevData,
           amoebaGpu->psCurrentEpsilon->_pDevData );
680
681
        LAUNCHERROR("kReducePmeMutualInducedFieldDelta");

682
        if( 0 && amoebaGpu->log ){ // trackMutualInducedIterations
683
684
685
            trackMutualInducedIterations( amoebaGpu, iteration);
        }

686
        // Debye=48.033324f
687
        amoebaGpu->psCurrentEpsilon->Download();
Mark Friedrichs's avatar
Mark Friedrichs committed
688
        float currentEpsilon                     = amoebaGpu->psCurrentEpsilon->_pSysData[0];
689
690
691
692
693
694
695
696
697
698
699
700
701
        amoebaGpu->mutualInducedCurrentEpsilon   = currentEpsilon;

        if( iteration > amoebaGpu->mutualInducedMaxIterations || amoebaGpu->mutualInducedCurrentEpsilon < amoebaGpu->mutualInducedTargetEpsilon ){ 
            done = 1;
        }

#ifdef AMOEBA_DEBUG
        if( amoebaGpu->log ){
           amoebaGpu->psInducedDipole->Download();
           amoebaGpu->psInducedDipolePolar->Download();
#if 1
           (void) fprintf( amoebaGpu->log, "%s iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d\n",
                           methodName, iteration, amoebaGpu->mutualInducedCurrentEpsilon,
Mark Friedrichs's avatar
Mark Friedrichs committed
702
703
                           amoebaGpu->psCurrentEpsilon->_pSysData[1], 
                           amoebaGpu->psCurrentEpsilon->_pSysData[2], done );
704
705
706
#else
           (void) fprintf( amoebaGpu->log, "%s iteration=%3d eps %14.6e %14.6e crrntEps=%14.6e %14.6e %14.6e %14.6e done=%d\n",
                           methodName, iteration, sum1, sum2, amoebaGpu->mutualInducedCurrentEpsilon,
Mark Friedrichs's avatar
Mark Friedrichs committed
707
708
709
                           amoebaGpu->psCurrentEpsilon->_pSysData[0], 
                           amoebaGpu->psCurrentEpsilon->_pSysData[1], 
                           amoebaGpu->psCurrentEpsilon->_pSysData[2], done );
710
711
712
#endif
           (void) fflush( amoebaGpu->log );

Mark Friedrichs's avatar
Mark Friedrichs committed
713
714
715
716
717
            if( 0 ){
                gpuContext gpu = amoebaGpu->gpuContext;
                std::vector<int> fileId;
                fileId.push_back( iteration );
                VectorOfDoubleVectors outputVector;
718
719
720
721
                cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psE_Field, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
                cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psE_FieldPolar, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
                cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psInducedDipole, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
                cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psInducedDipolePolar, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
Mark Friedrichs's avatar
Mark Friedrichs committed
722
723
724
                cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeMI", fileId, outputVector );
            }
/*
725
            int offset   = 0;
Mark Friedrichs's avatar
Mark Friedrichs committed
726
            int maxPrint = 10;
727
728
729
730
            for( int ii = 0; ii < gpu->natoms; ii++ ){
                (void) fprintf( amoebaGpu->log, "%4d ", ii ); 
    
                (void) fprintf( amoebaGpu->log," Mi[%14.6e %14.6e %14.6e] ",
Mark Friedrichs's avatar
Mark Friedrichs committed
731
732
733
                                amoebaGpu->psInducedDipole->_pSysData[offset],
                                amoebaGpu->psInducedDipole->_pSysData[offset+1],
                                amoebaGpu->psInducedDipole->_pSysData[offset+2] );
734
                (void) fprintf( amoebaGpu->log,"Mip[%14.6e %14.6e %14.6e]\n",
Mark Friedrichs's avatar
Mark Friedrichs committed
735
736
737
                                amoebaGpu->psInducedDipolePolar->_pSysData[offset],
                                amoebaGpu->psInducedDipolePolar->_pSysData[offset+1],
                                amoebaGpu->psInducedDipolePolar->_pSysData[offset+2] );
738
739
740
741
742
743
744
745
                if( ii == maxPrint && (ii < (gpu->natoms - maxPrint) ) ){
                    ii =  (gpu->natoms - maxPrint);
                    offset = 3*(ii+1);
                } else {
                    offset += 3;
                }
            }   
            (void) fflush( amoebaGpu->log );
Mark Friedrichs's avatar
Mark Friedrichs committed
746
*/
Mark Friedrichs's avatar
Mark Friedrichs committed
747

Mark Friedrichs's avatar
Mark Friedrichs committed
748
749
750
751
            if( 0 ){
                std::vector<int> fileId;
                fileId.push_back( iteration );
                VectorOfDoubleVectors outputVector;
752
753
754
                cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4,                    outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
                cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psInducedDipole,      outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
                cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psInducedDipolePolar, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
Mark Friedrichs's avatar
Mark Friedrichs committed
755
756
                cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeMI", fileId, outputVector );
            }
Mark Friedrichs's avatar
Mark Friedrichs committed
757

758
        }
759

Mark Friedrichs's avatar
Mark Friedrichs committed
760
761
762
763
764
        (void) fprintf( amoebaGpu->log, "MI iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d\n",
                        iteration, amoebaGpu->mutualInducedCurrentEpsilon,
                        amoebaGpu->psCurrentEpsilon->_pSysData[1], 
                        amoebaGpu->psCurrentEpsilon->_pSysData[2], done );
        (void) fflush( amoebaGpu->log );
Mark Friedrichs's avatar
Mark Friedrichs committed
765

Mark Friedrichs's avatar
Mark Friedrichs committed
766
767
768
769
770
771
772
773

        if( amoebaGpu->log ){
        (void) fprintf( amoebaGpu->log, "MI iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d\n",
                        iteration, amoebaGpu->mutualInducedCurrentEpsilon,
                        amoebaGpu->psCurrentEpsilon->_pSysData[1], 
                        amoebaGpu->psCurrentEpsilon->_pSysData[2], done );
        (void) fflush( amoebaGpu->log );
        }
Mark Friedrichs's avatar
Mark Friedrichs committed
774
#endif
Mark Friedrichs's avatar
Mark Friedrichs committed
775

776
        // exit if nan
Mark Friedrichs's avatar
Mark Friedrichs committed
777
778

        if( 0 && amoebaGpu->mutualInducedCurrentEpsilon != amoebaGpu->mutualInducedCurrentEpsilon ){
Mark Friedrichs's avatar
Mark Friedrichs committed
779
            (void) fprintf( stderr, "PME MI iteration=%3d eps is nan -- exiting.\n", iteration );
780
781
            exit(0);
        }
Mark Friedrichs's avatar
Mark Friedrichs committed
782

783
784
785
786
787
788
        iteration++;
    }

    amoebaGpu->mutualInducedDone             = done;
    amoebaGpu->mutualInducedConverged        = ( !done || iteration > amoebaGpu->mutualInducedMaxIterations ) ? 0 : 1;

Mark Friedrichs's avatar
Mark Friedrichs committed
789
    if( 0 ){
790
791
792
        std::vector<int> fileId;
        //fileId.push_back( 0 );
        VectorOfDoubleVectors outputVector;
Mark Friedrichs's avatar
Mark Friedrichs committed
793
        cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4,                    outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
794
795
        cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psInducedDipole,      outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
        cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psInducedDipolePolar, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
796
797
798
        cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeMI", fileId, outputVector );
     }

Mark Friedrichs's avatar
Mark Friedrichs committed
799
800
801
802
803
804
    if( 0 ){
        static int iteration = 0;
        checkForNans( gpu->natoms,  3, amoebaGpu->psInducedDipole, gpu->psAtomIndex->_pSysData,    ++iteration, "CudaPmeMI", stderr );
        checkForNans( gpu->natoms,  3, amoebaGpu->psInducedDipolePolar, gpu->psAtomIndex->_pSysData, iteration, "CudaPmeMIPolar", stderr );
     }

805
806
807
808
809
810
811
812
813
   // ---------------------------------------------------------------------------------------
}

void cudaComputeAmoebaPmeMutualInducedField( amoebaGpuContext amoebaGpu )
{
    if( amoebaGpu->mutualInducedIterativeMethod == 0 ){
        cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpu );
    }
}