PME.cpp 25.8 KB
Newer Older
1
/*
2
 * Reference implementation of PME reciprocal space interactions.
3
 *
4
5
6
 * Copyright (c) 2009, Erik Lindahl, Rossen Apostolov, Szilard Pall
 * All rights reserved.
 * Contact: lindahl@cbr.su.se Stockholm University, Sweden.
7
8
 *
 * Redistribution and use in source and binary forms, with or without
9
10
 * modification, are permitted provided that the following conditions are met:
 *
11
12
13
14
 * Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer. Redistributions in binary
 * form must reproduce the above copyright notice, this list of conditions and
 * the following disclaimer in the documentation and/or other materials provided
15
 * with the distribution.
16
17
 * Neither the name of the author/university nor the names of its contributors may
 * be used to endorse or promote products derived from this software without
18
19
 * specific prior written permission.
 *
20
21
22
23
24
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
26
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
30
31
32
33
34
35
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <math.h>
#include <stdio.h>
#include <stdlib.h>

36
#include "PME.h"
37
38
39
40
41
42
43
44
#include "fftpack.h"


typedef int    ivec[3];


struct pme
{
Peter Eastman's avatar
Peter Eastman committed
45
46
    int          natoms;
    RealOpenMM       ewaldcoeff;
47

Peter Eastman's avatar
Peter Eastman committed
48
    t_complex *  grid;                 /* Memory for the grid we spread charges on.
49
50
51
                                        * Element (i,j,k) is accessed as:
                                        * grid[i*ngrid[1]*ngrid[2] + j*ngrid[2] + k]
                                        */
Peter Eastman's avatar
Peter Eastman committed
52
53
    int          ngrid[3];             /* Total grid dimensions (all data is complex!) */
    fftpack_t    fftplan;              /* Handle to fourier transform setup  */
54

Peter Eastman's avatar
Peter Eastman committed
55
    int          order;                /* PME interpolation order. Almost always 4 */
56

57
    /* Data for bspline interpolation, see the Essman PME paper */
Peter Eastman's avatar
Peter Eastman committed
58
59
60
    RealOpenMM *     bsplines_moduli[3];   /* 3 pointers, to x/y/z bspline moduli, each of length ngrid[x/y/z]   */
    RealOpenMM *     bsplines_theta[3];    /* each of x/y/z has length order*natoms */
    RealOpenMM *     bsplines_dtheta[3];   /* each of x/y/z has length order*natoms */
61

Peter Eastman's avatar
Peter Eastman committed
62
    ivec *       particleindex;        /* Array of length natoms. Each element is
63
64
65
                                        * an ivec (3 ints) that specify the grid
                                        * indices for that particular atom. Updated every step!
                                        */
66
    rvec *       particlefraction;     /* Array of length natoms. Fractional offset in the grid for
67
68
                                        * each atom in all three dimensions.
                                        */
69

70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
    /* Further explanation of index/fraction:
     *
     * Assume we have a cell of size 10*10*10nm, and a total grid dimension of 100*100*100 cells.
     * In other words, each cell is 0.1*0.1*0.1 nm.
     *
     * If particle i has coordinates { 0.543 , 6.235 , -0.73 }, we will get:
     *
     * particleindex[i]    = { 5 , 62 , 92 }         (-0.73 + 10 = 9.27, we always apply PBC for grid calculations!)
     * particlefraction[i] = { 0.43 , 0.35 , 0.7 }   ( this is the fraction of the cell length where the atom is)
     *
     * (The reason for precaculating / storing these is that it gets a bit more complex for triclinic cells :-)
     *
     * In the current code version we might assume that a particle is not more than a whole box length away from
     * the central cell, i.e., in this case we would assume all coordinates fall in -10 nm < x,y,z < 20 nm.
     */
85

Peter Eastman's avatar
Peter Eastman committed
86
    RealOpenMM       epsilon_r;             /* Dielectric coefficient to use, typically 1.0 */
87
};
88
89
90
91
92
93
94


/* Internal setup routines */



/* Only called once from init_pme(), performance does not matter! */
95
static void
96
97
pme_calculate_bsplines_moduli(pme_t pme)
{
Peter Eastman's avatar
Peter Eastman committed
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
    int       nmax;
    int       i,j,k,l,d;
    int       order;
    int       ndata;
    RealOpenMM *  data;
    RealOpenMM *  ddata;
    RealOpenMM *  bsplines_data;
    RealOpenMM    div;
    RealOpenMM    sc,ss,arg;

    nmax = 0;
    for(d=0;d<3;d++)
    {
        nmax = (pme->ngrid[d] > nmax) ? pme->ngrid[d] : nmax;
        pme->bsplines_moduli[d] = (RealOpenMM *) malloc(sizeof(RealOpenMM)*pme->ngrid[d]);
113
    }
114

Peter Eastman's avatar
Peter Eastman committed
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
    order = pme->order;

    /* temp storage in this routine */
    data          = (RealOpenMM *) malloc(sizeof(RealOpenMM)*order);
    ddata         = (RealOpenMM *) malloc(sizeof(RealOpenMM)*order);
    bsplines_data = (RealOpenMM *) malloc(sizeof(RealOpenMM)*nmax);

    data[order-1]=0;
    data[1]=0;
    data[0]=1;

    for(k=3;k<order;k++)
    {
        div=(RealOpenMM) (1.0/(k-1.0));
        data[k-1]=0;
        for(l=1;l<(k-1);l++)
        {
            data[k-l-1]=div*(l*data[k-l-2]+(k-l)*data[k-l-1]);
        }
        data[0]=div*data[0];
    }

    /* differentiate */
    ddata[0]=-data[0];
    for(k=1;k<order;k++)
    {
        ddata[k]=data[k-1]-data[k];
    }

    div=(RealOpenMM) (1.0/(order-1));
    data[order-1]=0;

    for(l=1;l<(order-1);l++)
    {
        data[order-l-1]=div*(l*data[order-l-2]+(order-l)*data[order-l-1]);
    }
    data[0]=div*data[0];

    for(i=0;i<nmax;i++)
    {
        bsplines_data[i]=0;
    }
    for(i=1;i<=order;i++)
    {
        bsplines_data[i]=data[i-1];
    }

    /* Evaluate the actual bspline moduli for X/Y/Z */
    for(d=0;d<3;d++)
    {
        ndata = pme->ngrid[d];
        for(i=0;i<ndata;i++)
        {
            sc=ss=0;
            for(j=0;j<ndata;j++)
            {
                arg=(RealOpenMM) ((2.0*M_PI*i*j)/ndata);
                sc+=bsplines_data[j]*cos(arg);
                ss+=bsplines_data[j]*sin(arg);
            }
            pme->bsplines_moduli[d][i]=sc*sc+ss*ss;
        }
        for(i=0;i<ndata;i++)
        {
            if(pme->bsplines_moduli[d][i]<1.0e-7)
            {
                pme->bsplines_moduli[d][i]=(pme->bsplines_moduli[d][i-1]+pme->bsplines_moduli[d][i+1])/2;
            }
        }
    }

    /* Release temp storage */
    free(data);
    free(ddata);
    free(bsplines_data);
190
191
192
193
194
195
}



static void
pme_update_grid_index_and_fraction(pme_t    pme,
Peter Eastman's avatar
Peter Eastman committed
196
197
                                   RealOpenMM ** atomCoordinates,
                                   const RealOpenMM   periodicBoxSize[3])
198
{
Peter Eastman's avatar
Peter Eastman committed
199
200
201
202
203
204
205
206
207
208
    int    i;
    int    d;
    RealOpenMM t;
    int    ti;

    for(i=0;i<pme->natoms;i++)
    {
        for(d=0;d<3;d++)
        {
            /* Index calculation (Look mom, no conditionals!):
209
210
211
212
             *
             * Both for Cuda and modern CPUs it is nice to avoid conditionals, but we still need to apply periodic boundary conditions.
             * Instead of having loops to add/subtract the box dimension, we do it this way:
             *
Peter Eastman's avatar
Peter Eastman committed
213
             * 1. First add the box size, to make sure this atom coordinate isnt -0.1 or something.
214
             *    After this we assume all fractional box positions are *positive*.
215
             *    The reason for this is that we always want to round coordinates _down_ to get
216
217
218
             *    their grid index, and when taking the integer part of -3.4 we would get -3, not -4 as we want.
             *    Since we anyway need the grid indices to fall in the central box, it is more convenient
             *    to first manipulate the coordinates to be positive.
Peter Eastman's avatar
Peter Eastman committed
219
             * 2. Convert to integer grid index
220
221
222
223
224
225
226
227
             *    Since we have added a whole box unit in step 1, this index might actually be larger than
             *    the grid dimension. Examples, assuming 10*10*10nm box and grid dimension 100*100*100 (spacing 0.1 nm):
             *
             *    coordinate is { 0.543 , 6.235 , -0.73 }
             *
             *    x[i][d]/box[d]                      becomes   { 0.0543 , 0.6235 , -0.073 }
             *    (x[i][d]/box[d] + 1.0)              becomes   { 1.0543 , 1.6235 , 0.927 }
             *    (x[i][d]/box[d] + 1.0)*ngrid[d]     becomes   { 105.43 , 162.35 , 92.7 }
228
             *
229
230
231
232
             *    integer part is now { 105 , 162 , 92 }
             *
             *    The fraction is calculates as t-ti, which becomes { 0.43 , 0.35 , 0.7 }
             *
Peter Eastman's avatar
Peter Eastman committed
233
             * 3. Take the first integer index part (which can be larger than the grid) modulo the grid dimension
234
             *
235
             *    Now we get { 5 , 62 , 92 }
236
237
238
239
240
241
242
             *
             *    Voila, both index and fraction, entirely without conditionals. The one limitation here is that
             *    we only add one box length, so if the particle had a coordinate <=-10.0, we would be screwed.
             *    In principle we can of course add 100.0, but that just moves the problem, it doesnt solve it.
             *    In practice, MD programs will apply PBC to reset particles inside the central box to avoid
             *    numerical problems, so this shouldnt cause any problems.
             *    (And, by adding 100.0 box lengths, we would lose a bit of numerical accuracy here!)
Peter Eastman's avatar
Peter Eastman committed
243
             */
244
245
246
            RealOpenMM coord = atomCoordinates[i][d];
            coord -= floor(coord/periodicBoxSize[d])*periodicBoxSize[d];
            t  = (coord / periodicBoxSize[d])*pme->ngrid[d];
Peter Eastman's avatar
Peter Eastman committed
247
248
249
250
251
252
            ti = (int) t;

            pme->particlefraction[i][d] = t - ti;
            pme->particleindex[i][d]    = ti % pme->ngrid[d];
        }
    }
253
254
255
}


256
/* Ugly bspline calculation taken from Tom Dardens reference equations.
257
258
259
260
 * This probably very sub-optimal in Cuda? Separate kernel?
 *
 * In practice, it might help to require order=4 for the cuda port.
 */
261
static void
262
263
pme_update_bsplines(pme_t    pme)
{
Peter Eastman's avatar
Peter Eastman committed
264
265
266
267
268
    int       i,j,k,l;
    int       order;
    RealOpenMM    dr,div;
    RealOpenMM *  data;
    RealOpenMM *  ddata;
269

Peter Eastman's avatar
Peter Eastman committed
270
    order = pme->order;
271
272

    for(i=0; (i<pme->natoms); i++)
Peter Eastman's avatar
Peter Eastman committed
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
    {
        for(j=0; j<3; j++)
        {
            /* dr is relative offset from lower cell limit */
            dr = pme->particlefraction[i][j];

            data  = &(pme->bsplines_theta[j][i*order]);
            ddata = &(pme->bsplines_dtheta[j][i*order]);
            data[order-1] = 0;
            data[1]       = dr;
            data[0]       = 1-dr;

            for(k=3; k<order; k++)
            {
                div = (RealOpenMM) (1.0/(k-1.0));
                data[k-1] = div*dr*data[k-2];
                for(l=1; l<(k-1); l++)
                {
                    data[k-l-1] = div*((dr+l)*data[k-l-2]+(k-l-dr)*data[k-l-1]);
                }
                data[0] = div*(1-dr)*data[0];
            }

            /* differentiate */
            ddata[0] = -data[0];

            for(k=1; k<order; k++)
            {
                ddata[k] = data[k-1]-data[k];
            }

            div           = (RealOpenMM) (1.0/(order-1));
            data[order-1] = div*dr*data[order-2];

            for(l=1; l<(order-1); l++)
            {
                data[order-l-1] = div*((dr+l)*data[order-l-2]+(order-l-dr)*data[order-l-1]);
            }
            data[0] = div*(1-dr)*data[0];
        }
313
314
315
316
317
318
    }
}


static void
pme_grid_spread_charge(pme_t      pme,
Peter Eastman's avatar
Peter Eastman committed
319
                       RealOpenMM **   atomParameters)
320
321
{
        static const int   QIndex = 2; // atom charges are stored in atomParameters[atomID][2]
Peter Eastman's avatar
Peter Eastman committed
322
323
324
325
326
327
328
329
330
331
332
333
    int       order;
    int       i;
    int       ix,iy,iz;
    int       x0index,y0index,z0index;
    int       xindex,yindex,zindex;
    int       index;
    RealOpenMM    q;
    RealOpenMM *  thetax;
    RealOpenMM *  thetay;
    RealOpenMM *  thetaz;

    order = pme->order;
334

335
    /* Reset the grid */
Peter Eastman's avatar
Peter Eastman committed
336
337
338
339
    for(i=0;i<pme->ngrid[0]*pme->ngrid[1]*pme->ngrid[2];i++)
    {
        pme->grid[i].re = pme->grid[i].im = 0;
    }
340

Peter Eastman's avatar
Peter Eastman committed
341
342
343
    for(i=0;i<pme->natoms;i++)
    {
        q = atomParameters[i][QIndex];
344

345
        /* Grid index for the actual atom position */
Peter Eastman's avatar
Peter Eastman committed
346
347
348
        x0index = pme->particleindex[i][0];
        y0index = pme->particleindex[i][1];
        z0index = pme->particleindex[i][2];
349

350
        /* Bspline factors for this atom in each dimension , calculated from fractional coordinates */
Peter Eastman's avatar
Peter Eastman committed
351
352
353
        thetax  = &(pme->bsplines_theta[0][i*order]);
        thetay  = &(pme->bsplines_theta[1][i*order]);
        thetaz  = &(pme->bsplines_theta[2][i*order]);
354

355
356
357
        /* Loop over norder*norder*norder (typically 4*4*4) neighbor cells.
         *
         * As a neat optimization, we only spread in the forward direction, but apply PBC!
358
         *
359
         * Since we are going to do an FFT on the grid, it doesnt matter where the data is,
360
         * in frequency space the result will be the same.
361
362
363
364
365
         *
         * So, the influence function (bsplines) will probably be something like (0.15,0.35,0.35,0.15),
         * with largest weight 2-3 steps forward (you dont need to understand that for the implementation :-)
         * Effectively, you can look at this as translating the entire grid.
         *
366
         * Why do we do this stupid thing?
367
         *
368
         * 1) The loops get much simpler
369
370
371
         * 2) Just looking forward will hopefully get us more cache hits
         * 3) When we parallelize things, we only need to communicate in one direction instead of two!
         */
372

Peter Eastman's avatar
Peter Eastman committed
373
374
        for(ix=0;ix<order;ix++)
        {
375
            /* Calculate index, apply PBC so we spread to index 0/1/2 when a particle is close to the upper limit of the grid */
Peter Eastman's avatar
Peter Eastman committed
376
            xindex = (x0index + ix) % pme->ngrid[0];
377

Peter Eastman's avatar
Peter Eastman committed
378
379
380
            for(iy=0;iy<order;iy++)
            {
                yindex = (y0index + iy) % pme->ngrid[1];
381

Peter Eastman's avatar
Peter Eastman committed
382
383
384
385
                for(iz=0;iz<order;iz++)
                {
                    /* Can be optimized, but we keep it simple here */
                    zindex               = (z0index + iz) % pme->ngrid[2];
386
                    /* Calculate index in the charge grid */
Peter Eastman's avatar
Peter Eastman committed
387
                    index                = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex;
388
                    /* Add the charge times the bspline spread/interpolation factors to this grid position */
Peter Eastman's avatar
Peter Eastman committed
389
390
391
392
393
                    pme->grid[index].re += q*thetax[ix]*thetay[iy]*thetaz[iz];
                }
            }
        }
    }
394
395
396
397
398
399
}



static void
pme_reciprocal_convolution(pme_t     pme,
Peter Eastman's avatar
Peter Eastman committed
400
401
402
                           const RealOpenMM    periodicBoxSize[3],
                           RealOpenMM *  energy,
                           RealOpenMM    pme_virial[3][3])
403
{
Peter Eastman's avatar
Peter Eastman committed
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
    int kx,ky,kz;
    int nx,ny,nz;
    RealOpenMM mx,my,mz;
    RealOpenMM mhx,mhy,mhz,m2;
    RealOpenMM one_4pi_eps;
    RealOpenMM virxx,virxy,virxz,viryy,viryz,virzz;
    RealOpenMM bx,by,bz;
    RealOpenMM d1,d2;
    RealOpenMM eterm,vfactor,struct2,ets2;
    RealOpenMM esum;
    RealOpenMM factor;
    RealOpenMM denom;
    RealOpenMM boxfactor;
    RealOpenMM maxkx,maxky,maxkz;

    t_complex *ptr;

    nx = pme->ngrid[0];
    ny = pme->ngrid[1];
    nz = pme->ngrid[2];

    one_4pi_eps = (RealOpenMM) (ONE_4PI_EPS0/pme->epsilon_r);
    factor = (RealOpenMM) (M_PI*M_PI/(pme->ewaldcoeff*pme->ewaldcoeff));
    boxfactor = (RealOpenMM) (M_PI*periodicBoxSize[0]*periodicBoxSize[1]*periodicBoxSize[2]);

    esum = 0;
    virxx = 0;
    virxy = 0;
    virxz = 0;
    viryy = 0;
    viryz = 0;
    virzz = 0;

    maxkx = (RealOpenMM) ((nx+1)/2);
    maxky = (RealOpenMM) ((ny+1)/2);
    maxkz = (RealOpenMM) ((nz+1)/2);
440
441
442
443

    for(kx=0;kx<nx;kx++)
    {
        /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
Peter Eastman's avatar
Peter Eastman committed
444
        mx  = (RealOpenMM) ((kx<maxkx) ? kx : (kx-nx));
445
446
447
448
449
450
        mhx = mx/periodicBoxSize[0];
        bx  = boxfactor*pme->bsplines_moduli[0][kx];

        for(ky=0;ky<ny;ky++)
        {
            /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
Peter Eastman's avatar
Peter Eastman committed
451
            my  = (RealOpenMM) ((ky<maxky) ? ky : (ky-ny));
452
453
            mhy = my/periodicBoxSize[1];
            by  = pme->bsplines_moduli[1][ky];
454

Peter Eastman's avatar
Peter Eastman committed
455
456
            for(kz=0;kz<nz;kz++)
            {
457
458
459
460
461
462
463
464
                /* If the net charge of the system is 0.0, there will not be any DC (direct current, zero frequency) component. However,
                 * we can still handle charged systems through a charge correction, in which case the DC
                 * component should be excluded from recprocal space. We will anyway run into problems below when dividing with the
                 * frequency if it is zero...
                 *
                 * In cuda you could probably work around this by setting something to 0.0 instead, but the short story is that we
                 * should skip the zero frequency case!
                 */
Peter Eastman's avatar
Peter Eastman committed
465
466
467
468
                if (kx==0 && ky==0 && kz==0)
                {
                    continue;
                }
469
470

                /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
Peter Eastman's avatar
Peter Eastman committed
471
472
                mz        = (RealOpenMM) ((kz<maxkz) ? kz : (kz-nz));
                mhz       = mz/periodicBoxSize[2];
473
474

                /* Pointer to the grid cell in question */
Peter Eastman's avatar
Peter Eastman committed
475
                ptr       = pme->grid + kx*ny*nz + ky*nz + kz;
476
477

                /* Get grid data for this frequency */
Peter Eastman's avatar
Peter Eastman committed
478
479
                d1        = ptr->re;
                d2        = ptr->im;
480

481
                /* Calculate the convolution - see the Essman/Darden paper for the equation! */
Peter Eastman's avatar
Peter Eastman committed
482
483
484
                m2        = mhx*mhx+mhy*mhy+mhz*mhz;
                bz        = pme->bsplines_moduli[2][kz];
                denom     = m2*bx*by*bz;
485

Peter Eastman's avatar
Peter Eastman committed
486
                eterm     = one_4pi_eps*exp(-factor*m2)/denom;
487
488

                /* write back convolution data to grid */
Peter Eastman's avatar
Peter Eastman committed
489
490
                ptr->re   = d1*eterm;
                ptr->im   = d2*eterm;
491

Peter Eastman's avatar
Peter Eastman committed
492
                struct2   = (d1*d1+d2*d2);
493

Peter Eastman's avatar
Peter Eastman committed
494
495
496
                /* Long-range PME contribution to the energy for this frequency */
                ets2      = eterm*struct2;
                esum     += ets2;
497

Peter Eastman's avatar
Peter Eastman committed
498
499
500
                /* PME long-range contribution to atomic virial. Since it is symmetric, we only calculate half the matrix inside this loop. */
                vfactor   = (factor*m2+1)*2/m2;
                virxx    += ets2*(vfactor*mhx*mhx-1);
501
502
                virxy    += ets2*vfactor*mhx*mhy;
                virxz    += ets2*vfactor*mhx*mhz;
Peter Eastman's avatar
Peter Eastman committed
503
                viryy    += ets2*(vfactor*mhy*mhy-1);
504
                viryz    += ets2*vfactor*mhy*mhz;
Peter Eastman's avatar
Peter Eastman committed
505
506
507
508
509
510
511
512
513
514
                virzz    += ets2*(vfactor*mhz*mhz-1);
            }
        }
    }
    pme_virial[0][0] = (RealOpenMM) (0.25*virxx);
    pme_virial[1][1] = (RealOpenMM) (0.25*viryy);
    pme_virial[2][2] = (RealOpenMM) (0.25*virzz);
    pme_virial[0][1] = pme_virial[1][0] = (RealOpenMM) (0.25*virxy);
    pme_virial[0][2] = pme_virial[2][0] = (RealOpenMM) (0.25*virxz);
    pme_virial[1][2] = pme_virial[2][1] = (RealOpenMM) (0.25*viryz);
515

516
    /* The factor 0.5 is nothing special, but it is better to have it here than inside the loop :-) */
517
    *energy = (RealOpenMM) (0.5*esum);
518
519
520
521
522
}


static void
pme_grid_interpolate_force(pme_t      pme,
Peter Eastman's avatar
Peter Eastman committed
523
524
525
                           const RealOpenMM     periodicBoxSize[3],
                           RealOpenMM **   atomParameters,
                           RealOpenMM **   forces)
526
527
{
        static const int   QIndex = 2; // atom charges are stored in atomParameters[atomID][2]
Peter Eastman's avatar
Peter Eastman committed
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
    int       i;
    int       ix,iy,iz;
    int       x0index,y0index,z0index;
    int       xindex,yindex,zindex;
    int       index;
    int       order;
    RealOpenMM    q;
    RealOpenMM *  thetax;
    RealOpenMM *  thetay;
    RealOpenMM *  thetaz;
    RealOpenMM *  dthetax;
    RealOpenMM *  dthetay;
    RealOpenMM *  dthetaz;
    RealOpenMM    tx,ty,tz;
    RealOpenMM    dtx,dty,dtz;
    RealOpenMM    fx,fy,fz;
    RealOpenMM    gridvalue;
    int       nx,ny,nz;

    nx    = pme->ngrid[0];
    ny    = pme->ngrid[1];
    nz    = pme->ngrid[2];

    order = pme->order;
552

553
    /* This is almost identical to the charge spreading routine! */
554

Peter Eastman's avatar
Peter Eastman committed
555
556
557
    for(i=0;i<pme->natoms;i++)
    {
        fx = fy = fz = 0;
558

Peter Eastman's avatar
Peter Eastman committed
559
        q = atomParameters[i][QIndex];
560

561
        /* Grid index for the actual atom position */
Peter Eastman's avatar
Peter Eastman committed
562
563
564
        x0index = pme->particleindex[i][0];
        y0index = pme->particleindex[i][1];
        z0index = pme->particleindex[i][2];
565

566
        /* Bspline factors for this atom in each dimension , calculated from fractional coordinates */
Peter Eastman's avatar
Peter Eastman committed
567
568
569
570
571
572
        thetax  = &(pme->bsplines_theta[0][i*order]);
        thetay  = &(pme->bsplines_theta[1][i*order]);
        thetaz  = &(pme->bsplines_theta[2][i*order]);
        dthetax = &(pme->bsplines_dtheta[0][i*order]);
        dthetay = &(pme->bsplines_dtheta[1][i*order]);
        dthetaz = &(pme->bsplines_dtheta[2][i*order]);
573

Peter Eastman's avatar
Peter Eastman committed
574
        /* See pme_grid_spread_charge() for comments about the order here, and only interpolation in one direction */
575

576
577
578
        /* Since we will add order^3 (typically 4*4*4=64) terms to the force on each particle, we use temporary fx/fy/fz
         * variables, and only add it to memory forces[] at the end.
         */
Peter Eastman's avatar
Peter Eastman committed
579
580
581
        for(ix=0;ix<order;ix++)
        {
            xindex = (x0index + ix) % pme->ngrid[0];
582
            /* Get both the bspline factor and its derivative with respect to the x coordinate! */
Peter Eastman's avatar
Peter Eastman committed
583
584
            tx     = thetax[ix];
            dtx    = dthetax[ix];
585

Peter Eastman's avatar
Peter Eastman committed
586
587
588
            for(iy=0;iy<order;iy++)
            {
                yindex = (y0index + iy) % pme->ngrid[1];
589
                /* bspline + derivative wrt y */
Peter Eastman's avatar
Peter Eastman committed
590
591
                ty     = thetay[iy];
                dty    = dthetay[iy];
592

Peter Eastman's avatar
Peter Eastman committed
593
594
595
596
                for(iz=0;iz<order;iz++)
                {
                    /* Can be optimized, but we keep it simple here */
                    zindex               = (z0index + iz) % pme->ngrid[2];
597
                    /* bspline + derivative wrt z */
Peter Eastman's avatar
Peter Eastman committed
598
599
600
                    tz                   = thetaz[iz];
                    dtz                  = dthetaz[iz];
                    index                = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex;
601

602
603
                    /* Get the fft+convoluted+ifft:d data from the grid, which must be real by definition */
                    /* Checking that the imaginary part is indeed zero might be a good check :-) */
Peter Eastman's avatar
Peter Eastman committed
604
605
606
607
608
609
610
611
612
                    gridvalue            = pme->grid[index].re;

                    /* The d component of the force is calculated by taking the derived bspline in dimension d, normal bsplines in the other two */
                    fx                  += dtx*ty*tz*gridvalue;
                    fy                  += tx*dty*tz*gridvalue;
                    fz                  += tx*ty*dtz*gridvalue;
                }
            }
        }
613
        /* Update memory force, note that we multiply by charge and some box stuff */
Peter Eastman's avatar
Peter Eastman committed
614
615
616
617
        forces[i][0] -= q*fx*nx/periodicBoxSize[0];
        forces[i][1] -= q*fy*ny/periodicBoxSize[1];
        forces[i][2] -= q*fz*nz/periodicBoxSize[2];
    }
618
619
620
621
622
623
}



/* EXPORTED ROUTINES */

624
int
625
pme_init(pme_t *       ppme,
Peter Eastman's avatar
Peter Eastman committed
626
627
628
629
         RealOpenMM        ewaldcoeff,
         int           natoms,
         const int           ngrid[3],
         int           pme_order,
630
631
632
633
         RealOpenMM        epsilon_r)
{
    pme_t pme;
    int   d;
634

635
636
637
638
639
    pme = (pme_t) malloc(sizeof(struct pme));

    pme->order       = pme_order;
    pme->epsilon_r   = epsilon_r;
    pme->ewaldcoeff  = ewaldcoeff;
Peter Eastman's avatar
Peter Eastman committed
640
    pme->natoms      = natoms;
641

642
    for(d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
643
644
645
646
647
    {
        pme->ngrid[d]            = ngrid[d];
        pme->bsplines_theta[d]   = (RealOpenMM *)malloc(sizeof(RealOpenMM)*pme_order*natoms);
        pme->bsplines_dtheta[d]  = (RealOpenMM *)malloc(sizeof(RealOpenMM)*pme_order*natoms);
    }
648

Peter Eastman's avatar
Peter Eastman committed
649
650
    pme->particlefraction = (rvec *)malloc(sizeof(rvec)*natoms);
    pme->particleindex    = (ivec *)malloc(sizeof(ivec)*natoms);
651

Peter Eastman's avatar
Peter Eastman committed
652
653
    /* Allocate charge grid storage */
    pme->grid        = (t_complex *)malloc(sizeof(t_complex)*ngrid[0]*ngrid[1]*ngrid[2]);
654

Peter Eastman's avatar
Peter Eastman committed
655
    fftpack_init_3d(&pme->fftplan,ngrid[0],ngrid[1],ngrid[2]);
656

Peter Eastman's avatar
Peter Eastman committed
657
658
    /* Setup bspline moduli (see Essman paper) */
    pme_calculate_bsplines_moduli(pme);
659

Peter Eastman's avatar
Peter Eastman committed
660
    *ppme = pme;
661

662
663
664
665
666
667
668
669
    return 0;
}





int pme_exec(pme_t       pme,
Peter Eastman's avatar
Peter Eastman committed
670
671
672
673
674
675
             RealOpenMM **   atomCoordinates,
             RealOpenMM **   forces,
             RealOpenMM **   atomParameters,
             const RealOpenMM      periodicBoxSize[3],
             RealOpenMM *    energy,
             RealOpenMM      pme_virial[3][3])
676
677
{
    /* Routine is called with coordinates in x, a box, and charges in q */
678

679
680
681
682
    /* Before we can do the actual interpolation, we need to recalculate and update
     * the indices for each particle in the charge grid (initialized in pme_init()),
     * and what its fractional offset in this grid cell is.
     */
683

Peter Eastman's avatar
Peter Eastman committed
684
    /* Update charge grid indices and fractional offsets for each atom.
685
     * The indices/fractions are stored internally in the pme datatype
686
     */
Peter Eastman's avatar
Peter Eastman committed
687
    pme_update_grid_index_and_fraction(pme,atomCoordinates,periodicBoxSize);
688

Peter Eastman's avatar
Peter Eastman committed
689
690
    /* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */
    pme_update_bsplines(pme);
691

Peter Eastman's avatar
Peter Eastman committed
692
693
    /* Spread the charges on grid (using newly calculated bsplines in the pme structure) */
    pme_grid_spread_charge(pme,atomParameters);
694

Peter Eastman's avatar
Peter Eastman committed
695
696
    /* do 3d-fft */
    fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid);
697

Peter Eastman's avatar
Peter Eastman committed
698
699
    /* solve in k-space */
    pme_reciprocal_convolution(pme,periodicBoxSize,energy,pme_virial);
700

Peter Eastman's avatar
Peter Eastman committed
701
702
    /* do 3d-invfft */
    fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid);
703

Peter Eastman's avatar
Peter Eastman committed
704
705
    /* Get the particle forces from the grid and bsplines in the pme structure */
    pme_grid_interpolate_force(pme,periodicBoxSize,atomParameters,forces);
706
707
708
709
710
711
712
713
714

    return 0;
}



int
pme_destroy(pme_t    pme)
{
Peter Eastman's avatar
Peter Eastman committed
715
    int d;
716

Peter Eastman's avatar
Peter Eastman committed
717
    free(pme->grid);
718

Peter Eastman's avatar
Peter Eastman committed
719
720
721
722
723
724
    for(d=0;d<3;d++)
    {
        free(pme->bsplines_moduli[d]);
        free(pme->bsplines_theta[d]);
        free(pme->bsplines_dtheta[d]);
    }
725

Peter Eastman's avatar
Peter Eastman committed
726
727
    free(pme->particlefraction);
    free(pme->particleindex);
728

Peter Eastman's avatar
Peter Eastman committed
729
    fftpack_destroy(pme->fftplan);
730

Peter Eastman's avatar
Peter Eastman committed
731
732
    /* destroy structure itself */
    free(pme);
733

Peter Eastman's avatar
Peter Eastman committed
734
    return 0;
735
}