ReferencePME.cpp 30 KB
Newer Older
1
/*
2
 * Reference implementation of PME reciprocal space interactions.
3
 *
4
5
6
 * Copyright (c) 2009, Erik Lindahl, Rossen Apostolov, Szilard Pall
 * All rights reserved.
 * Contact: lindahl@cbr.su.se Stockholm University, Sweden.
7
8
 *
 * Redistribution and use in source and binary forms, with or without
9
10
 * modification, are permitted provided that the following conditions are met:
 *
11
12
13
14
 * Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer. Redistributions in binary
 * form must reproduce the above copyright notice, this list of conditions and
 * the following disclaimer in the documentation and/or other materials provided
15
 * with the distribution.
16
17
 * Neither the name of the author/university nor the names of its contributors may
 * be used to endorse or promote products derived from this software without
18
19
 * specific prior written permission.
 *
20
21
22
23
24
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
26
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
30
31
32
33
34
35
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <math.h>
#include <stdio.h>
#include <stdlib.h>

36
#include "ReferencePME.h"
37
#include "fftpack.h"
peastman's avatar
peastman committed
38
#include "SimTKOpenMMRealType.h"
39

40
using std::vector;
41
42
43

typedef int    ivec[3];

44
namespace OpenMM {
45
46
47

struct pme
{
Peter Eastman's avatar
Peter Eastman committed
48
    int          natoms;
peastman's avatar
peastman committed
49
    double       ewaldcoeff;
50

Peter Eastman's avatar
Peter Eastman committed
51
    t_complex *  grid;                 /* Memory for the grid we spread charges on.
52
53
54
                                        * Element (i,j,k) is accessed as:
                                        * grid[i*ngrid[1]*ngrid[2] + j*ngrid[2] + k]
                                        */
Peter Eastman's avatar
Peter Eastman committed
55
56
    int          ngrid[3];             /* Total grid dimensions (all data is complex!) */
    fftpack_t    fftplan;              /* Handle to fourier transform setup  */
57

Peter Eastman's avatar
Peter Eastman committed
58
    int          order;                /* PME interpolation order. Almost always 4 */
59

60
    /* Data for bspline interpolation, see the Essman PME paper */
peastman's avatar
peastman committed
61
62
63
    double *     bsplines_moduli[3];   /* 3 pointers, to x/y/z bspline moduli, each of length ngrid[x/y/z]   */
    double *     bsplines_theta[3];    /* each of x/y/z has length order*natoms */
    double *     bsplines_dtheta[3];   /* each of x/y/z has length order*natoms */
64

Peter Eastman's avatar
Peter Eastman committed
65
    ivec *       particleindex;        /* Array of length natoms. Each element is
66
67
68
                                        * an ivec (3 ints) that specify the grid
                                        * indices for that particular atom. Updated every step!
                                        */
69
    rvec *       particlefraction;     /* Array of length natoms. Fractional offset in the grid for
70
71
                                        * each atom in all three dimensions.
                                        */
72

73
74
75
76
77
78
79
80
    /* Further explanation of index/fraction:
     *
     * Assume we have a cell of size 10*10*10nm, and a total grid dimension of 100*100*100 cells.
     * In other words, each cell is 0.1*0.1*0.1 nm.
     *
     * If particle i has coordinates { 0.543 , 6.235 , -0.73 }, we will get:
     *
     * particleindex[i]    = { 5 , 62 , 92 }         (-0.73 + 10 = 9.27, we always apply PBC for grid calculations!)
81
     * particlefraction[i] = { 0.43 , 0.35 , 0.7 }   (this is the fraction of the cell length where the atom is)
82
83
84
85
86
87
     *
     * (The reason for precaculating / storing these is that it gets a bit more complex for triclinic cells :-)
     *
     * In the current code version we might assume that a particle is not more than a whole box length away from
     * the central cell, i.e., in this case we would assume all coordinates fall in -10 nm < x,y,z < 20 nm.
     */
88

peastman's avatar
peastman committed
89
    double       epsilon_r;             /* Dielectric coefficient to use, typically 1.0 */
90
};
91
92
93
94
95
96
97


/* Internal setup routines */



/* Only called once from init_pme(), performance does not matter! */
98
static void
99
100
pme_calculate_bsplines_moduli(pme_t pme)
{
Peter Eastman's avatar
Peter Eastman committed
101
102
103
104
    int       nmax;
    int       i,j,k,l,d;
    int       order;
    int       ndata;
peastman's avatar
peastman committed
105
106
107
108
109
    double *  data;
    double *  ddata;
    double *  bsplines_data;
    double    div;
    double    sc,ss,arg;
Peter Eastman's avatar
Peter Eastman committed
110
111

    nmax = 0;
112
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
113
114
    {
        nmax = (pme->ngrid[d] > nmax) ? pme->ngrid[d] : nmax;
peastman's avatar
peastman committed
115
        pme->bsplines_moduli[d] = (double *) malloc(sizeof(double)*pme->ngrid[d]);
116
    }
117

Peter Eastman's avatar
Peter Eastman committed
118
119
120
    order = pme->order;

    /* temp storage in this routine */
peastman's avatar
peastman committed
121
122
123
    data          = (double *) malloc(sizeof(double)*order);
    ddata         = (double *) malloc(sizeof(double)*order);
    bsplines_data = (double *) malloc(sizeof(double)*nmax);
Peter Eastman's avatar
Peter Eastman committed
124
125
126
127
128

    data[order-1]=0;
    data[1]=0;
    data[0]=1;

129
    for (k=3;k<order;k++)
Peter Eastman's avatar
Peter Eastman committed
130
    {
peastman's avatar
peastman committed
131
        div=1.0/(k-1.0);
Peter Eastman's avatar
Peter Eastman committed
132
        data[k-1]=0;
133
        for (l=1;l<(k-1);l++)
Peter Eastman's avatar
Peter Eastman committed
134
135
136
137
138
139
140
141
        {
            data[k-l-1]=div*(l*data[k-l-2]+(k-l)*data[k-l-1]);
        }
        data[0]=div*data[0];
    }

    /* differentiate */
    ddata[0]=-data[0];
142
    for (k=1;k<order;k++)
Peter Eastman's avatar
Peter Eastman committed
143
144
145
146
    {
        ddata[k]=data[k-1]-data[k];
    }

peastman's avatar
peastman committed
147
    div=1.0/(order-1);
Peter Eastman's avatar
Peter Eastman committed
148
149
    data[order-1]=0;

150
    for (l=1;l<(order-1);l++)
Peter Eastman's avatar
Peter Eastman committed
151
152
153
154
155
    {
        data[order-l-1]=div*(l*data[order-l-2]+(order-l)*data[order-l-1]);
    }
    data[0]=div*data[0];

156
    for (i=0;i<nmax;i++)
Peter Eastman's avatar
Peter Eastman committed
157
158
159
    {
        bsplines_data[i]=0;
    }
160
    for (i=1;i<=order;i++)
Peter Eastman's avatar
Peter Eastman committed
161
162
163
164
165
    {
        bsplines_data[i]=data[i-1];
    }

    /* Evaluate the actual bspline moduli for X/Y/Z */
166
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
167
168
    {
        ndata = pme->ngrid[d];
169
        for (i=0;i<ndata;i++)
Peter Eastman's avatar
Peter Eastman committed
170
171
        {
            sc=ss=0;
172
            for (j=0;j<ndata;j++)
Peter Eastman's avatar
Peter Eastman committed
173
            {
peastman's avatar
peastman committed
174
                arg=(2.0*M_PI*i*j)/ndata;
Peter Eastman's avatar
Peter Eastman committed
175
176
177
178
179
                sc+=bsplines_data[j]*cos(arg);
                ss+=bsplines_data[j]*sin(arg);
            }
            pme->bsplines_moduli[d][i]=sc*sc+ss*ss;
        }
180
        for (i=0;i<ndata;i++)
Peter Eastman's avatar
Peter Eastman committed
181
        {
182
            if (pme->bsplines_moduli[d][i]<1.0e-7)
Peter Eastman's avatar
Peter Eastman committed
183
            {
184
                pme->bsplines_moduli[d][i]=(pme->bsplines_moduli[d][(i-1+ndata)%ndata]+pme->bsplines_moduli[d][(i+1)%ndata])/2;
Peter Eastman's avatar
Peter Eastman committed
185
186
187
188
189
190
191
192
            }
        }
    }

    /* Release temp storage */
    free(data);
    free(ddata);
    free(bsplines_data);
193
194
195
}


peastman's avatar
peastman committed
196
static void invert_box_vectors(const Vec3 boxVectors[3], Vec3 recipBoxVectors[3])
197
{
peastman's avatar
peastman committed
198
    double determinant = boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2];
199
    assert(determinant > 0);
peastman's avatar
peastman committed
200
201
202
203
    double scale = 1.0/determinant;
    recipBoxVectors[0] = Vec3(boxVectors[1][1]*boxVectors[2][2], 0, 0)*scale;
    recipBoxVectors[1] = Vec3(-boxVectors[1][0]*boxVectors[2][2], boxVectors[0][0]*boxVectors[2][2], 0)*scale;
    recipBoxVectors[2] = Vec3(boxVectors[1][0]*boxVectors[2][1]-boxVectors[1][1]*boxVectors[2][0], -boxVectors[0][0]*boxVectors[2][1], boxVectors[0][0]*boxVectors[1][1])*scale;
204
}
205
206
207

static void
pme_update_grid_index_and_fraction(pme_t    pme,
peastman's avatar
peastman committed
208
209
210
                                   const vector<Vec3>& atomCoordinates,
                                   const Vec3 periodicBoxVectors[3],
                                   const Vec3 recipBoxVectors[3])
211
{
Peter Eastman's avatar
Peter Eastman committed
212
213
    int    i;
    int    d;
peastman's avatar
peastman committed
214
    double t;
Peter Eastman's avatar
Peter Eastman committed
215
216
    int    ti;

217
    for (i=0;i<pme->natoms;i++)
Peter Eastman's avatar
Peter Eastman committed
218
    {
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
        /* Index calculation (Look mom, no conditionals!):
         *
         * Both for Cuda and modern CPUs it is nice to avoid conditionals, but we still need to apply periodic boundary conditions.
         * Instead of having loops to add/subtract the box dimension, we do it this way:
         *
         * 1. First add the box size, to make sure this atom coordinate isnt -0.1 or something.
         *    After this we assume all fractional box positions are *positive*.
         *    The reason for this is that we always want to round coordinates _down_ to get
         *    their grid index, and when taking the integer part of -3.4 we would get -3, not -4 as we want.
         *    Since we anyway need the grid indices to fall in the central box, it is more convenient
         *    to first manipulate the coordinates to be positive.
         * 2. Convert to integer grid index
         *    Since we have added a whole box unit in step 1, this index might actually be larger than
         *    the grid dimension. Examples, assuming 10*10*10nm box and grid dimension 100*100*100 (spacing 0.1 nm):
         *
         *    coordinate is { 0.543 , 6.235 , -0.73 }
         *
         *    x[i][d]/box[d]                      becomes   { 0.0543 , 0.6235 , -0.073 }
         *    (x[i][d]/box[d] + 1.0)              becomes   { 1.0543 , 1.6235 , 0.927 }
         *    (x[i][d]/box[d] + 1.0)*ngrid[d]     becomes   { 105.43 , 162.35 , 92.7 }
         *
         *    integer part is now { 105 , 162 , 92 }
         *
         *    The fraction is calculates as t-ti, which becomes { 0.43 , 0.35 , 0.7 }
         *
         * 3. Take the first integer index part (which can be larger than the grid) modulo the grid dimension
         *
         *    Now we get { 5 , 62 , 92 }
         *
         *    Voila, both index and fraction, entirely without conditionals. The one limitation here is that
         *    we only add one box length, so if the particle had a coordinate <=-10.0, we would be screwed.
         *    In principle we can of course add 100.0, but that just moves the problem, it doesnt solve it.
         *    In practice, MD programs will apply PBC to reset particles inside the central box to avoid
         *    numerical problems, so this shouldnt cause any problems.
         *    (And, by adding 100.0 box lengths, we would lose a bit of numerical accuracy here!)
         */
peastman's avatar
peastman committed
255
        Vec3 coord = atomCoordinates[i];
256
        for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
257
        {
peastman's avatar
peastman committed
258
259
            t = coord[0]*recipBoxVectors[0][d]+coord[1]*recipBoxVectors[1][d]+coord[2]*recipBoxVectors[2][d];
            t = (t-floor(t))*pme->ngrid[d];
Peter Eastman's avatar
Peter Eastman committed
260
261
262
263
264
265
            ti = (int) t;

            pme->particlefraction[i][d] = t - ti;
            pme->particleindex[i][d]    = ti % pme->ngrid[d];
        }
    }
266
267
268
}


269
/* Ugly bspline calculation taken from Tom Dardens reference equations.
270
271
272
273
 * This probably very sub-optimal in Cuda? Separate kernel?
 *
 * In practice, it might help to require order=4 for the cuda port.
 */
274
static void
275
276
pme_update_bsplines(pme_t    pme)
{
Peter Eastman's avatar
Peter Eastman committed
277
278
    int       i,j,k,l;
    int       order;
peastman's avatar
peastman committed
279
280
281
    double    dr,div;
    double *  data;
    double *  ddata;
282

Peter Eastman's avatar
Peter Eastman committed
283
    order = pme->order;
284

285
    for (i=0; (i<pme->natoms); i++)
Peter Eastman's avatar
Peter Eastman committed
286
    {
287
        for (j=0; j<3; j++)
Peter Eastman's avatar
Peter Eastman committed
288
289
290
291
292
293
294
295
296
297
        {
            /* dr is relative offset from lower cell limit */
            dr = pme->particlefraction[i][j];

            data  = &(pme->bsplines_theta[j][i*order]);
            ddata = &(pme->bsplines_dtheta[j][i*order]);
            data[order-1] = 0;
            data[1]       = dr;
            data[0]       = 1-dr;

298
            for (k=3; k<order; k++)
Peter Eastman's avatar
Peter Eastman committed
299
            {
peastman's avatar
peastman committed
300
                div = 1.0/(k-1.0);
Peter Eastman's avatar
Peter Eastman committed
301
                data[k-1] = div*dr*data[k-2];
302
                for (l=1; l<(k-1); l++)
Peter Eastman's avatar
Peter Eastman committed
303
304
305
306
307
308
309
310
311
                {
                    data[k-l-1] = div*((dr+l)*data[k-l-2]+(k-l-dr)*data[k-l-1]);
                }
                data[0] = div*(1-dr)*data[0];
            }

            /* differentiate */
            ddata[0] = -data[0];

312
            for (k=1; k<order; k++)
Peter Eastman's avatar
Peter Eastman committed
313
314
315
316
            {
                ddata[k] = data[k-1]-data[k];
            }

peastman's avatar
peastman committed
317
            div           = 1.0/(order-1);
Peter Eastman's avatar
Peter Eastman committed
318
319
            data[order-1] = div*dr*data[order-2];

320
            for (l=1; l<(order-1); l++)
Peter Eastman's avatar
Peter Eastman committed
321
322
323
324
325
            {
                data[order-l-1] = div*((dr+l)*data[order-l-2]+(order-l-dr)*data[order-l-1]);
            }
            data[0] = div*(1-dr)*data[0];
        }
326
327
328
329
330
    }
}


static void
peastman's avatar
peastman committed
331
pme_grid_spread_charge(pme_t pme, const vector<double>& charges)
332
{
Peter Eastman's avatar
Peter Eastman committed
333
334
335
336
337
338
    int       order;
    int       i;
    int       ix,iy,iz;
    int       x0index,y0index,z0index;
    int       xindex,yindex,zindex;
    int       index;
peastman's avatar
peastman committed
339
340
341
342
    double    q;
    double *  thetax;
    double *  thetay;
    double *  thetaz;
Peter Eastman's avatar
Peter Eastman committed
343
344

    order = pme->order;
345

346
    /* Reset the grid */
347
    for (i=0;i<pme->ngrid[0]*pme->ngrid[1]*pme->ngrid[2];i++)
Peter Eastman's avatar
Peter Eastman committed
348
349
350
    {
        pme->grid[i].re = pme->grid[i].im = 0;
    }
351

352
    for (i=0;i<pme->natoms;i++)
Peter Eastman's avatar
Peter Eastman committed
353
    {
peastman's avatar
peastman committed
354
        q = charges[i];
355

356
        /* Grid index for the actual atom position */
Peter Eastman's avatar
Peter Eastman committed
357
358
359
        x0index = pme->particleindex[i][0];
        y0index = pme->particleindex[i][1];
        z0index = pme->particleindex[i][2];
360

361
        /* Bspline factors for this atom in each dimension , calculated from fractional coordinates */
Peter Eastman's avatar
Peter Eastman committed
362
363
364
        thetax  = &(pme->bsplines_theta[0][i*order]);
        thetay  = &(pme->bsplines_theta[1][i*order]);
        thetaz  = &(pme->bsplines_theta[2][i*order]);
365

366
367
368
        /* Loop over norder*norder*norder (typically 4*4*4) neighbor cells.
         *
         * As a neat optimization, we only spread in the forward direction, but apply PBC!
369
         *
370
         * Since we are going to do an FFT on the grid, it doesnt matter where the data is,
371
         * in frequency space the result will be the same.
372
373
374
375
376
         *
         * So, the influence function (bsplines) will probably be something like (0.15,0.35,0.35,0.15),
         * with largest weight 2-3 steps forward (you dont need to understand that for the implementation :-)
         * Effectively, you can look at this as translating the entire grid.
         *
377
         * Why do we do this stupid thing?
378
         *
379
         * 1) The loops get much simpler
380
381
382
         * 2) Just looking forward will hopefully get us more cache hits
         * 3) When we parallelize things, we only need to communicate in one direction instead of two!
         */
383

384
        for (ix=0;ix<order;ix++)
Peter Eastman's avatar
Peter Eastman committed
385
        {
386
            /* Calculate index, apply PBC so we spread to index 0/1/2 when a particle is close to the upper limit of the grid */
Peter Eastman's avatar
Peter Eastman committed
387
            xindex = (x0index + ix) % pme->ngrid[0];
388

389
            for (iy=0;iy<order;iy++)
Peter Eastman's avatar
Peter Eastman committed
390
391
            {
                yindex = (y0index + iy) % pme->ngrid[1];
392

393
                for (iz=0;iz<order;iz++)
Peter Eastman's avatar
Peter Eastman committed
394
395
396
                {
                    /* Can be optimized, but we keep it simple here */
                    zindex               = (z0index + iz) % pme->ngrid[2];
397
                    /* Calculate index in the charge grid */
Peter Eastman's avatar
Peter Eastman committed
398
                    index                = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex;
399
                    /* Add the charge times the bspline spread/interpolation factors to this grid position */
Peter Eastman's avatar
Peter Eastman committed
400
401
402
403
404
                    pme->grid[index].re += q*thetax[ix]*thetay[iy]*thetaz[iz];
                }
            }
        }
    }
405
406
407
408
409
410
}



static void
pme_reciprocal_convolution(pme_t     pme,
peastman's avatar
peastman committed
411
412
413
                           const Vec3 periodicBoxVectors[3],
                           const Vec3 recipBoxVectors[3],
                           double *  energy)
414
{
Peter Eastman's avatar
Peter Eastman committed
415
416
    int kx,ky,kz;
    int nx,ny,nz;
peastman's avatar
peastman committed
417
418
419
420
421
422
423
424
425
426
427
428
    double mx,my,mz;
    double mhx,mhy,mhz,m2;
    double one_4pi_eps;
    double virxx,virxy,virxz,viryy,viryz,virzz;
    double bx,by,bz;
    double d1,d2;
    double eterm,vfactor,struct2,ets2;
    double esum;
    double factor;
    double denom;
    double boxfactor;
    double maxkx,maxky,maxkz;
Peter Eastman's avatar
Peter Eastman committed
429
430
431
432
433
434
435

    t_complex *ptr;

    nx = pme->ngrid[0];
    ny = pme->ngrid[1];
    nz = pme->ngrid[2];

peastman's avatar
peastman committed
436
437
438
    one_4pi_eps = ONE_4PI_EPS0/pme->epsilon_r;
    factor = M_PI*M_PI/(pme->ewaldcoeff*pme->ewaldcoeff);
    boxfactor = M_PI*periodicBoxVectors[0][0]*periodicBoxVectors[1][1]*periodicBoxVectors[2][2];
Peter Eastman's avatar
Peter Eastman committed
439
440
441
442
443
444
445
446
447

    esum = 0;
    virxx = 0;
    virxy = 0;
    virxz = 0;
    viryy = 0;
    viryz = 0;
    virzz = 0;

peastman's avatar
peastman committed
448
449
450
    maxkx = (nx+1)/2;
    maxky = (ny+1)/2;
    maxkz = (nz+1)/2;
451

452
    for (kx=0;kx<nx;kx++)
453
454
    {
        /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
455
        mx  = (kx<maxkx) ? kx : (kx-nx);
456
        mhx = mx*recipBoxVectors[0][0];
457
458
        bx  = boxfactor*pme->bsplines_moduli[0][kx];

459
        for (ky=0;ky<ny;ky++)
460
461
        {
            /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
462
            my  = (ky<maxky) ? ky : (ky-ny);
463
            mhy = mx*recipBoxVectors[1][0]+my*recipBoxVectors[1][1];
464
            by  = pme->bsplines_moduli[1][ky];
465

466
            for (kz=0;kz<nz;kz++)
Peter Eastman's avatar
Peter Eastman committed
467
            {
468
469
470
471
472
473
474
475
                /* If the net charge of the system is 0.0, there will not be any DC (direct current, zero frequency) component. However,
                 * we can still handle charged systems through a charge correction, in which case the DC
                 * component should be excluded from recprocal space. We will anyway run into problems below when dividing with the
                 * frequency if it is zero...
                 *
                 * In cuda you could probably work around this by setting something to 0.0 instead, but the short story is that we
                 * should skip the zero frequency case!
                 */
Peter Eastman's avatar
Peter Eastman committed
476
477
478
479
                if (kx==0 && ky==0 && kz==0)
                {
                    continue;
                }
480
481

                /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
482
                mz        = (kz<maxkz) ? kz : (kz-nz);
483
                mhz       = mx*recipBoxVectors[2][0]+my*recipBoxVectors[2][1]+mz*recipBoxVectors[2][2];
484
485

                /* Pointer to the grid cell in question */
Peter Eastman's avatar
Peter Eastman committed
486
                ptr       = pme->grid + kx*ny*nz + ky*nz + kz;
487
488

                /* Get grid data for this frequency */
Peter Eastman's avatar
Peter Eastman committed
489
490
                d1        = ptr->re;
                d2        = ptr->im;
491

492
                /* Calculate the convolution - see the Essman/Darden paper for the equation! */
Peter Eastman's avatar
Peter Eastman committed
493
494
495
                m2        = mhx*mhx+mhy*mhy+mhz*mhz;
                bz        = pme->bsplines_moduli[2][kz];
                denom     = m2*bx*by*bz;
496

Peter Eastman's avatar
Peter Eastman committed
497
                eterm     = one_4pi_eps*exp(-factor*m2)/denom;
498
499

                /* write back convolution data to grid */
Peter Eastman's avatar
Peter Eastman committed
500
501
                ptr->re   = d1*eterm;
                ptr->im   = d2*eterm;
502

Peter Eastman's avatar
Peter Eastman committed
503
                struct2   = (d1*d1+d2*d2);
504

Peter Eastman's avatar
Peter Eastman committed
505
506
507
508
509
510
                /* Long-range PME contribution to the energy for this frequency */
                ets2      = eterm*struct2;
                esum     += ets2;
            }
        }
    }
511

512
    /* The factor 0.5 is nothing special, but it is better to have it here than inside the loop :-) */
peastman's avatar
peastman committed
513
    *energy = 0.5*esum;
514
515
516
}


517
static void
peastman's avatar
peastman committed
518
519
520
521
dpme_reciprocal_convolution(pme_t pme,
                           const Vec3 periodicBoxVectors[3],
                           const Vec3 recipBoxVectors[3],
                           double* energy)
522
523
524
{
    int kx,ky,kz;
    int nx,ny,nz;
peastman's avatar
peastman committed
525
526
527
528
529
530
531
532
533
    double mx,my,mz;
    double mhx,mhy,mhz,m2;
    double bx,by,bz;
    double d1,d2;
    double eterm,struct2,ets2;
    double esum;
    double denom;
    double boxfactor;
    double maxkx,maxky,maxkz;
534
535
536
537
538
539
540

    t_complex *ptr;

    nx = pme->ngrid[0];
    ny = pme->ngrid[1];
    nz = pme->ngrid[2];

peastman's avatar
peastman committed
541
    boxfactor = -2*M_PI*sqrt(M_PI) / (6.0*periodicBoxVectors[0][0]*periodicBoxVectors[1][1]*periodicBoxVectors[2][2]);
542
543
544

    esum = 0;

peastman's avatar
peastman committed
545
546
547
    maxkx = (nx+1)/2;
    maxky = (ny+1)/2;
    maxkz = (nz+1)/2;
548

peastman's avatar
peastman committed
549
550
551
552
553
    double bfac = M_PI / pme->ewaldcoeff;
    double fac1 = 2.0*M_PI*M_PI*M_PI*sqrt(M_PI);
    double fac2 = pme->ewaldcoeff*pme->ewaldcoeff*pme->ewaldcoeff;
    double fac3 = -2.0*pme->ewaldcoeff*M_PI*M_PI;
    double b, m, m3, expfac, expterm, erfcterm;
554
555
556
557

    for (kx=0;kx<nx;kx++)
    {
        /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
558
        mx  = ((kx<maxkx) ? kx : (kx-nx));
559
560
561
562
563
564
        mhx = mx*recipBoxVectors[0][0];
        bx  = pme->bsplines_moduli[0][kx];

        for (ky=0;ky<ny;ky++)
        {
            /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
565
            my  = ((ky<maxky) ? ky : (ky-ny));
566
567
568
569
570
571
572
573
574
575
            mhy = mx*recipBoxVectors[1][0]+my*recipBoxVectors[1][1];
            by  = pme->bsplines_moduli[1][ky];

            for (kz=0;kz<nz;kz++)
            {
                /*
                 * Unlike the Coulombic case, there's an m=0 term so all terms are considered here.
                 */

                /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
576
                mz        = ((kz<maxkz) ? kz : (kz-nz));
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
                mhz       = mx*recipBoxVectors[2][0]+my*recipBoxVectors[2][1]+mz*recipBoxVectors[2][2];

                /* Pointer to the grid cell in question */
                ptr       = pme->grid + kx*ny*nz + ky*nz + kz;

                /* Get grid data for this frequency */
                d1        = ptr->re;
                d2        = ptr->im;

                /* Calculate the convolution - see the Essman/Darden paper for the equation! */
                m2        = mhx*mhx+mhy*mhy+mhz*mhz;
                bz        = pme->bsplines_moduli[2][kz];
                denom     = boxfactor / (bx*by*bz);

                m = sqrt(m2);
                m3 = m*m2;
                b = bfac*m;
                expfac = -b*b;
                erfcterm = erfc(b);
                expterm = exp(expfac);

                eterm     = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;

                /* write back convolution data to grid */
                ptr->re   = d1*eterm;
                ptr->im   = d2*eterm;

                struct2   = (d1*d1+d2*d2);

                /* Long-range PME contribution to the energy for this frequency */
                ets2      = eterm*struct2;
                esum     += ets2;
            }
        }
    }
    // Remember the C6 energy is attractive, hence the negative sign.
peastman's avatar
peastman committed
613
    *energy = 0.5*esum;
614
615
616
}


617
static void
618
pme_grid_interpolate_force(pme_t pme,
peastman's avatar
peastman committed
619
620
621
                           const Vec3 recipBoxVectors[3],
                           const vector<double>& charges,
                           vector<Vec3>& forces)
622
{
Peter Eastman's avatar
Peter Eastman committed
623
624
625
626
627
628
    int       i;
    int       ix,iy,iz;
    int       x0index,y0index,z0index;
    int       xindex,yindex,zindex;
    int       index;
    int       order;
peastman's avatar
peastman committed
629
630
631
632
633
634
635
636
637
638
639
    double    q;
    double *  thetax;
    double *  thetay;
    double *  thetaz;
    double *  dthetax;
    double *  dthetay;
    double *  dthetaz;
    double    tx,ty,tz;
    double    dtx,dty,dtz;
    double    fx,fy,fz;
    double    gridvalue;
Peter Eastman's avatar
Peter Eastman committed
640
641
642
643
644
645
646
    int       nx,ny,nz;

    nx    = pme->ngrid[0];
    ny    = pme->ngrid[1];
    nz    = pme->ngrid[2];

    order = pme->order;
647

648
    /* This is almost identical to the charge spreading routine! */
649

650
    for (i=0;i<pme->natoms;i++)
Peter Eastman's avatar
Peter Eastman committed
651
652
    {
        fx = fy = fz = 0;
653

peastman's avatar
peastman committed
654
        q = charges[i];
655

656
        /* Grid index for the actual atom position */
Peter Eastman's avatar
Peter Eastman committed
657
658
659
        x0index = pme->particleindex[i][0];
        y0index = pme->particleindex[i][1];
        z0index = pme->particleindex[i][2];
660

661
        /* Bspline factors for this atom in each dimension , calculated from fractional coordinates */
Peter Eastman's avatar
Peter Eastman committed
662
663
664
665
666
667
        thetax  = &(pme->bsplines_theta[0][i*order]);
        thetay  = &(pme->bsplines_theta[1][i*order]);
        thetaz  = &(pme->bsplines_theta[2][i*order]);
        dthetax = &(pme->bsplines_dtheta[0][i*order]);
        dthetay = &(pme->bsplines_dtheta[1][i*order]);
        dthetaz = &(pme->bsplines_dtheta[2][i*order]);
668

Peter Eastman's avatar
Peter Eastman committed
669
        /* See pme_grid_spread_charge() for comments about the order here, and only interpolation in one direction */
670

671
672
673
        /* Since we will add order^3 (typically 4*4*4=64) terms to the force on each particle, we use temporary fx/fy/fz
         * variables, and only add it to memory forces[] at the end.
         */
674
        for (ix=0;ix<order;ix++)
Peter Eastman's avatar
Peter Eastman committed
675
676
        {
            xindex = (x0index + ix) % pme->ngrid[0];
677
            /* Get both the bspline factor and its derivative with respect to the x coordinate! */
Peter Eastman's avatar
Peter Eastman committed
678
679
            tx     = thetax[ix];
            dtx    = dthetax[ix];
680

681
            for (iy=0;iy<order;iy++)
Peter Eastman's avatar
Peter Eastman committed
682
683
            {
                yindex = (y0index + iy) % pme->ngrid[1];
684
                /* bspline + derivative wrt y */
Peter Eastman's avatar
Peter Eastman committed
685
686
                ty     = thetay[iy];
                dty    = dthetay[iy];
687

688
                for (iz=0;iz<order;iz++)
Peter Eastman's avatar
Peter Eastman committed
689
690
691
                {
                    /* Can be optimized, but we keep it simple here */
                    zindex               = (z0index + iz) % pme->ngrid[2];
692
                    /* bspline + derivative wrt z */
Peter Eastman's avatar
Peter Eastman committed
693
694
695
                    tz                   = thetaz[iz];
                    dtz                  = dthetaz[iz];
                    index                = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex;
696

697
698
                    /* Get the fft+convoluted+ifft:d data from the grid, which must be real by definition */
                    /* Checking that the imaginary part is indeed zero might be a good check :-) */
Peter Eastman's avatar
Peter Eastman committed
699
700
701
702
703
704
705
706
707
                    gridvalue            = pme->grid[index].re;

                    /* The d component of the force is calculated by taking the derived bspline in dimension d, normal bsplines in the other two */
                    fx                  += dtx*ty*tz*gridvalue;
                    fy                  += tx*dty*tz*gridvalue;
                    fz                  += tx*ty*dtz*gridvalue;
                }
            }
        }
708
        /* Update memory force, note that we multiply by charge and some box stuff */
709
710
711
        forces[i][0] -= q*(fx*nx*recipBoxVectors[0][0]);
        forces[i][1] -= q*(fx*nx*recipBoxVectors[1][0]+fy*ny*recipBoxVectors[1][1]);
        forces[i][2] -= q*(fx*nx*recipBoxVectors[2][0]+fy*ny*recipBoxVectors[2][1]+fz*nz*recipBoxVectors[2][2]);
Peter Eastman's avatar
Peter Eastman committed
712
    }
713
714
715
716
717
718
}



/* EXPORTED ROUTINES */

719
int
720
pme_init(pme_t *       ppme,
peastman's avatar
peastman committed
721
         double        ewaldcoeff,
Peter Eastman's avatar
Peter Eastman committed
722
         int           natoms,
peastman's avatar
peastman committed
723
         const int     ngrid[3],
Peter Eastman's avatar
Peter Eastman committed
724
         int           pme_order,
peastman's avatar
peastman committed
725
         double        epsilon_r)
726
727
728
{
    pme_t pme;
    int   d;
729

730
731
732
733
734
    pme = (pme_t) malloc(sizeof(struct pme));

    pme->order       = pme_order;
    pme->epsilon_r   = epsilon_r;
    pme->ewaldcoeff  = ewaldcoeff;
Peter Eastman's avatar
Peter Eastman committed
735
    pme->natoms      = natoms;
736

737
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
738
739
    {
        pme->ngrid[d]            = ngrid[d];
peastman's avatar
peastman committed
740
741
        pme->bsplines_theta[d]   = (double *)malloc(sizeof(double)*pme_order*natoms);
        pme->bsplines_dtheta[d]  = (double *)malloc(sizeof(double)*pme_order*natoms);
Peter Eastman's avatar
Peter Eastman committed
742
    }
743

Peter Eastman's avatar
Peter Eastman committed
744
745
    pme->particlefraction = (rvec *)malloc(sizeof(rvec)*natoms);
    pme->particleindex    = (ivec *)malloc(sizeof(ivec)*natoms);
746

Peter Eastman's avatar
Peter Eastman committed
747
748
    /* Allocate charge grid storage */
    pme->grid        = (t_complex *)malloc(sizeof(t_complex)*ngrid[0]*ngrid[1]*ngrid[2]);
749

Peter Eastman's avatar
Peter Eastman committed
750
    fftpack_init_3d(&pme->fftplan,ngrid[0],ngrid[1],ngrid[2]);
751

Peter Eastman's avatar
Peter Eastman committed
752
753
    /* Setup bspline moduli (see Essman paper) */
    pme_calculate_bsplines_moduli(pme);
754

Peter Eastman's avatar
Peter Eastman committed
755
    *ppme = pme;
756

757
758
759
760
761
762
763
764
    return 0;
}





int pme_exec(pme_t       pme,
peastman's avatar
peastman committed
765
766
767
768
769
             const vector<Vec3>& atomCoordinates,
             vector<Vec3>& forces,
             const vector<double>& charges,
             const Vec3 periodicBoxVectors[3],
             double* energy)
770
771
{
    /* Routine is called with coordinates in x, a box, and charges in q */
772

peastman's avatar
peastman committed
773
    Vec3 recipBoxVectors[3];
774
775
    invert_box_vectors(periodicBoxVectors, recipBoxVectors);
    
776
777
778
779
    /* Before we can do the actual interpolation, we need to recalculate and update
     * the indices for each particle in the charge grid (initialized in pme_init()),
     * and what its fractional offset in this grid cell is.
     */
780

Peter Eastman's avatar
Peter Eastman committed
781
    /* Update charge grid indices and fractional offsets for each atom.
782
     * The indices/fractions are stored internally in the pme datatype
783
     */
784
    pme_update_grid_index_and_fraction(pme,atomCoordinates,periodicBoxVectors,recipBoxVectors);
785

Peter Eastman's avatar
Peter Eastman committed
786
787
    /* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */
    pme_update_bsplines(pme);
788

Peter Eastman's avatar
Peter Eastman committed
789
    /* Spread the charges on grid (using newly calculated bsplines in the pme structure) */
peastman's avatar
peastman committed
790
    pme_grid_spread_charge(pme, charges);
791

Peter Eastman's avatar
Peter Eastman committed
792
793
    /* do 3d-fft */
    fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid);
794

Peter Eastman's avatar
Peter Eastman committed
795
    /* solve in k-space */
796
    pme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy);
797

Peter Eastman's avatar
Peter Eastman committed
798
799
    /* do 3d-invfft */
    fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid);
800

Peter Eastman's avatar
Peter Eastman committed
801
    /* Get the particle forces from the grid and bsplines in the pme structure */
802
    pme_grid_interpolate_force(pme,recipBoxVectors,charges,forces);
803
804
805
806
807

    return 0;
}


808
int pme_exec_dpme(pme_t       pme,
peastman's avatar
peastman committed
809
810
811
812
813
             const vector<Vec3>& atomCoordinates,
             vector<Vec3>& forces,
             const vector<double>& c6s,
             const Vec3 periodicBoxVectors[3],
             double* energy)
814
815
816
{
    /* Routine is called with coordinates in x, a box, and charges in q */

peastman's avatar
peastman committed
817
    Vec3 recipBoxVectors[3];
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
    invert_box_vectors(periodicBoxVectors, recipBoxVectors);

    /* Before we can do the actual interpolation, we need to recalculate and update
     * the indices for each particle in the charge grid (initialized in pme_init()),
     * and what its fractional offset in this grid cell is.
     */

    /* Update charge grid indices and fractional offsets for each atom.
     * The indices/fractions are stored internally in the pme datatype
     */
    pme_update_grid_index_and_fraction(pme,atomCoordinates,periodicBoxVectors,recipBoxVectors);

    /* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */
    pme_update_bsplines(pme);

    /* Spread the charges on grid (using newly calculated bsplines in the pme structure) */
    pme_grid_spread_charge(pme, c6s);

    /* do 3d-fft */
    fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid);

    /* solve in k-space */
    dpme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy);

    /* do 3d-invfft */
    fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid);

    /* Get the particle forces from the grid and bsplines in the pme structure */
    pme_grid_interpolate_force(pme,recipBoxVectors,c6s,forces);

    return 0;
}

851
852
853
854

int
pme_destroy(pme_t    pme)
{
Peter Eastman's avatar
Peter Eastman committed
855
    int d;
856

Peter Eastman's avatar
Peter Eastman committed
857
    free(pme->grid);
858

859
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
860
861
862
863
864
    {
        free(pme->bsplines_moduli[d]);
        free(pme->bsplines_theta[d]);
        free(pme->bsplines_dtheta[d]);
    }
865

Peter Eastman's avatar
Peter Eastman committed
866
867
    free(pme->particlefraction);
    free(pme->particleindex);
868

Peter Eastman's avatar
Peter Eastman committed
869
    fftpack_destroy(pme->fftplan);
870

Peter Eastman's avatar
Peter Eastman committed
871
872
    /* destroy structure itself */
    free(pme);
873

Peter Eastman's avatar
Peter Eastman committed
874
    return 0;
875
}
876
877

} // namespace OpenMM