"tools/vscode:/vscode.git/clone" did not exist on "bb87353a2e7b89d6e6d64b8815bac51e571ab231"
ReferencePME.cpp 30.6 KB
Newer Older
1
/*
2
 * Reference implementation of PME reciprocal space interactions.
3
 *
4
5
6
 * Copyright (c) 2009, Erik Lindahl, Rossen Apostolov, Szilard Pall
 * All rights reserved.
 * Contact: lindahl@cbr.su.se Stockholm University, Sweden.
7
8
 *
 * Redistribution and use in source and binary forms, with or without
9
10
 * modification, are permitted provided that the following conditions are met:
 *
11
12
13
14
 * Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer. Redistributions in binary
 * form must reproduce the above copyright notice, this list of conditions and
 * the following disclaimer in the documentation and/or other materials provided
15
 * with the distribution.
16
17
 * Neither the name of the author/university nor the names of its contributors may
 * be used to endorse or promote products derived from this software without
18
19
 * specific prior written permission.
 *
20
21
22
23
24
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
26
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
30
31
32
33
34
35
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <math.h>
#include <stdio.h>
#include <stdlib.h>

36
#include "ReferencePME.h"
37
38
#include "fftpack.h"

39
using std::vector;
40
41
42

typedef int    ivec[3];

43
namespace OpenMM {
44
45
46

struct pme
{
Peter Eastman's avatar
Peter Eastman committed
47
48
    int          natoms;
    RealOpenMM       ewaldcoeff;
49

Peter Eastman's avatar
Peter Eastman committed
50
    t_complex *  grid;                 /* Memory for the grid we spread charges on.
51
52
53
                                        * Element (i,j,k) is accessed as:
                                        * grid[i*ngrid[1]*ngrid[2] + j*ngrid[2] + k]
                                        */
Peter Eastman's avatar
Peter Eastman committed
54
55
    int          ngrid[3];             /* Total grid dimensions (all data is complex!) */
    fftpack_t    fftplan;              /* Handle to fourier transform setup  */
56

Peter Eastman's avatar
Peter Eastman committed
57
    int          order;                /* PME interpolation order. Almost always 4 */
58

59
    /* Data for bspline interpolation, see the Essman PME paper */
Peter Eastman's avatar
Peter Eastman committed
60
61
62
    RealOpenMM *     bsplines_moduli[3];   /* 3 pointers, to x/y/z bspline moduli, each of length ngrid[x/y/z]   */
    RealOpenMM *     bsplines_theta[3];    /* each of x/y/z has length order*natoms */
    RealOpenMM *     bsplines_dtheta[3];   /* each of x/y/z has length order*natoms */
63

Peter Eastman's avatar
Peter Eastman committed
64
    ivec *       particleindex;        /* Array of length natoms. Each element is
65
66
67
                                        * an ivec (3 ints) that specify the grid
                                        * indices for that particular atom. Updated every step!
                                        */
68
    rvec *       particlefraction;     /* Array of length natoms. Fractional offset in the grid for
69
70
                                        * each atom in all three dimensions.
                                        */
71

72
73
74
75
76
77
78
79
    /* Further explanation of index/fraction:
     *
     * Assume we have a cell of size 10*10*10nm, and a total grid dimension of 100*100*100 cells.
     * In other words, each cell is 0.1*0.1*0.1 nm.
     *
     * If particle i has coordinates { 0.543 , 6.235 , -0.73 }, we will get:
     *
     * particleindex[i]    = { 5 , 62 , 92 }         (-0.73 + 10 = 9.27, we always apply PBC for grid calculations!)
80
     * particlefraction[i] = { 0.43 , 0.35 , 0.7 }   (this is the fraction of the cell length where the atom is)
81
82
83
84
85
86
     *
     * (The reason for precaculating / storing these is that it gets a bit more complex for triclinic cells :-)
     *
     * In the current code version we might assume that a particle is not more than a whole box length away from
     * the central cell, i.e., in this case we would assume all coordinates fall in -10 nm < x,y,z < 20 nm.
     */
87

Peter Eastman's avatar
Peter Eastman committed
88
    RealOpenMM       epsilon_r;             /* Dielectric coefficient to use, typically 1.0 */
89
};
90
91
92
93
94
95
96


/* Internal setup routines */



/* Only called once from init_pme(), performance does not matter! */
97
static void
98
99
pme_calculate_bsplines_moduli(pme_t pme)
{
Peter Eastman's avatar
Peter Eastman committed
100
101
102
103
104
105
106
107
108
109
110
    int       nmax;
    int       i,j,k,l,d;
    int       order;
    int       ndata;
    RealOpenMM *  data;
    RealOpenMM *  ddata;
    RealOpenMM *  bsplines_data;
    RealOpenMM    div;
    RealOpenMM    sc,ss,arg;

    nmax = 0;
111
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
112
113
114
    {
        nmax = (pme->ngrid[d] > nmax) ? pme->ngrid[d] : nmax;
        pme->bsplines_moduli[d] = (RealOpenMM *) malloc(sizeof(RealOpenMM)*pme->ngrid[d]);
115
    }
116

Peter Eastman's avatar
Peter Eastman committed
117
118
119
120
121
122
123
124
125
126
127
    order = pme->order;

    /* temp storage in this routine */
    data          = (RealOpenMM *) malloc(sizeof(RealOpenMM)*order);
    ddata         = (RealOpenMM *) malloc(sizeof(RealOpenMM)*order);
    bsplines_data = (RealOpenMM *) malloc(sizeof(RealOpenMM)*nmax);

    data[order-1]=0;
    data[1]=0;
    data[0]=1;

128
    for (k=3;k<order;k++)
Peter Eastman's avatar
Peter Eastman committed
129
130
131
    {
        div=(RealOpenMM) (1.0/(k-1.0));
        data[k-1]=0;
132
        for (l=1;l<(k-1);l++)
Peter Eastman's avatar
Peter Eastman committed
133
134
135
136
137
138
139
140
        {
            data[k-l-1]=div*(l*data[k-l-2]+(k-l)*data[k-l-1]);
        }
        data[0]=div*data[0];
    }

    /* differentiate */
    ddata[0]=-data[0];
141
    for (k=1;k<order;k++)
Peter Eastman's avatar
Peter Eastman committed
142
143
144
145
146
147
148
    {
        ddata[k]=data[k-1]-data[k];
    }

    div=(RealOpenMM) (1.0/(order-1));
    data[order-1]=0;

149
    for (l=1;l<(order-1);l++)
Peter Eastman's avatar
Peter Eastman committed
150
151
152
153
154
    {
        data[order-l-1]=div*(l*data[order-l-2]+(order-l)*data[order-l-1]);
    }
    data[0]=div*data[0];

155
    for (i=0;i<nmax;i++)
Peter Eastman's avatar
Peter Eastman committed
156
157
158
    {
        bsplines_data[i]=0;
    }
159
    for (i=1;i<=order;i++)
Peter Eastman's avatar
Peter Eastman committed
160
161
162
163
164
    {
        bsplines_data[i]=data[i-1];
    }

    /* Evaluate the actual bspline moduli for X/Y/Z */
165
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
166
167
    {
        ndata = pme->ngrid[d];
168
        for (i=0;i<ndata;i++)
Peter Eastman's avatar
Peter Eastman committed
169
170
        {
            sc=ss=0;
171
            for (j=0;j<ndata;j++)
Peter Eastman's avatar
Peter Eastman committed
172
173
174
175
176
177
178
            {
                arg=(RealOpenMM) ((2.0*M_PI*i*j)/ndata);
                sc+=bsplines_data[j]*cos(arg);
                ss+=bsplines_data[j]*sin(arg);
            }
            pme->bsplines_moduli[d][i]=sc*sc+ss*ss;
        }
179
        for (i=0;i<ndata;i++)
Peter Eastman's avatar
Peter Eastman committed
180
        {
181
            if (pme->bsplines_moduli[d][i]<1.0e-7)
Peter Eastman's avatar
Peter Eastman committed
182
183
184
185
186
187
188
189
190
191
            {
                pme->bsplines_moduli[d][i]=(pme->bsplines_moduli[d][i-1]+pme->bsplines_moduli[d][i+1])/2;
            }
        }
    }

    /* Release temp storage */
    free(data);
    free(ddata);
    free(bsplines_data);
192
193
194
}


195
196
197
198
199
200
201
202
203
static void invert_box_vectors(const RealVec boxVectors[3], RealVec recipBoxVectors[3])
{
    RealOpenMM determinant = boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2];
    assert(determinant > 0);
    RealOpenMM scale = 1.0/determinant;
    recipBoxVectors[0] = RealVec(boxVectors[1][1]*boxVectors[2][2], 0, 0)*scale;
    recipBoxVectors[1] = RealVec(-boxVectors[1][0]*boxVectors[2][2], boxVectors[0][0]*boxVectors[2][2], 0)*scale;
    recipBoxVectors[2] = RealVec(boxVectors[1][0]*boxVectors[2][1]-boxVectors[1][1]*boxVectors[2][0], -boxVectors[0][0]*boxVectors[2][1], boxVectors[0][0]*boxVectors[1][1])*scale;
}
204
205
206

static void
pme_update_grid_index_and_fraction(pme_t    pme,
207
                                   const vector<RealVec>& atomCoordinates,
208
209
                                   const RealVec periodicBoxVectors[3],
                                   const RealVec recipBoxVectors[3])
210
{
Peter Eastman's avatar
Peter Eastman committed
211
212
213
214
215
    int    i;
    int    d;
    RealOpenMM t;
    int    ti;

216
    for (i=0;i<pme->natoms;i++)
Peter Eastman's avatar
Peter Eastman committed
217
    {
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
        /* Index calculation (Look mom, no conditionals!):
         *
         * Both for Cuda and modern CPUs it is nice to avoid conditionals, but we still need to apply periodic boundary conditions.
         * Instead of having loops to add/subtract the box dimension, we do it this way:
         *
         * 1. First add the box size, to make sure this atom coordinate isnt -0.1 or something.
         *    After this we assume all fractional box positions are *positive*.
         *    The reason for this is that we always want to round coordinates _down_ to get
         *    their grid index, and when taking the integer part of -3.4 we would get -3, not -4 as we want.
         *    Since we anyway need the grid indices to fall in the central box, it is more convenient
         *    to first manipulate the coordinates to be positive.
         * 2. Convert to integer grid index
         *    Since we have added a whole box unit in step 1, this index might actually be larger than
         *    the grid dimension. Examples, assuming 10*10*10nm box and grid dimension 100*100*100 (spacing 0.1 nm):
         *
         *    coordinate is { 0.543 , 6.235 , -0.73 }
         *
         *    x[i][d]/box[d]                      becomes   { 0.0543 , 0.6235 , -0.073 }
         *    (x[i][d]/box[d] + 1.0)              becomes   { 1.0543 , 1.6235 , 0.927 }
         *    (x[i][d]/box[d] + 1.0)*ngrid[d]     becomes   { 105.43 , 162.35 , 92.7 }
         *
         *    integer part is now { 105 , 162 , 92 }
         *
         *    The fraction is calculates as t-ti, which becomes { 0.43 , 0.35 , 0.7 }
         *
         * 3. Take the first integer index part (which can be larger than the grid) modulo the grid dimension
         *
         *    Now we get { 5 , 62 , 92 }
         *
         *    Voila, both index and fraction, entirely without conditionals. The one limitation here is that
         *    we only add one box length, so if the particle had a coordinate <=-10.0, we would be screwed.
         *    In principle we can of course add 100.0, but that just moves the problem, it doesnt solve it.
         *    In practice, MD programs will apply PBC to reset particles inside the central box to avoid
         *    numerical problems, so this shouldnt cause any problems.
         *    (And, by adding 100.0 box lengths, we would lose a bit of numerical accuracy here!)
         */
        RealVec coord = atomCoordinates[i];
255
        for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
256
        {
peastman's avatar
peastman committed
257
258
            t = coord[0]*recipBoxVectors[0][d]+coord[1]*recipBoxVectors[1][d]+coord[2]*recipBoxVectors[2][d];
            t = (t-floor(t))*pme->ngrid[d];
Peter Eastman's avatar
Peter Eastman committed
259
260
261
262
263
264
            ti = (int) t;

            pme->particlefraction[i][d] = t - ti;
            pme->particleindex[i][d]    = ti % pme->ngrid[d];
        }
    }
265
266
267
}


268
/* Ugly bspline calculation taken from Tom Dardens reference equations.
269
270
271
272
 * This probably very sub-optimal in Cuda? Separate kernel?
 *
 * In practice, it might help to require order=4 for the cuda port.
 */
273
static void
274
275
pme_update_bsplines(pme_t    pme)
{
Peter Eastman's avatar
Peter Eastman committed
276
277
278
279
280
    int       i,j,k,l;
    int       order;
    RealOpenMM    dr,div;
    RealOpenMM *  data;
    RealOpenMM *  ddata;
281

Peter Eastman's avatar
Peter Eastman committed
282
    order = pme->order;
283

284
    for (i=0; (i<pme->natoms); i++)
Peter Eastman's avatar
Peter Eastman committed
285
    {
286
        for (j=0; j<3; j++)
Peter Eastman's avatar
Peter Eastman committed
287
288
289
290
291
292
293
294
295
296
        {
            /* dr is relative offset from lower cell limit */
            dr = pme->particlefraction[i][j];

            data  = &(pme->bsplines_theta[j][i*order]);
            ddata = &(pme->bsplines_dtheta[j][i*order]);
            data[order-1] = 0;
            data[1]       = dr;
            data[0]       = 1-dr;

297
            for (k=3; k<order; k++)
Peter Eastman's avatar
Peter Eastman committed
298
299
300
            {
                div = (RealOpenMM) (1.0/(k-1.0));
                data[k-1] = div*dr*data[k-2];
301
                for (l=1; l<(k-1); l++)
Peter Eastman's avatar
Peter Eastman committed
302
303
304
305
306
307
308
309
310
                {
                    data[k-l-1] = div*((dr+l)*data[k-l-2]+(k-l-dr)*data[k-l-1]);
                }
                data[0] = div*(1-dr)*data[0];
            }

            /* differentiate */
            ddata[0] = -data[0];

311
            for (k=1; k<order; k++)
Peter Eastman's avatar
Peter Eastman committed
312
313
314
315
316
317
318
            {
                ddata[k] = data[k-1]-data[k];
            }

            div           = (RealOpenMM) (1.0/(order-1));
            data[order-1] = div*dr*data[order-2];

319
            for (l=1; l<(order-1); l++)
Peter Eastman's avatar
Peter Eastman committed
320
321
322
323
324
            {
                data[order-l-1] = div*((dr+l)*data[order-l-2]+(order-l-dr)*data[order-l-1]);
            }
            data[0] = div*(1-dr)*data[0];
        }
325
326
327
328
329
    }
}


static void
330
pme_grid_spread_charge(pme_t pme, const vector<RealOpenMM>& charges)
331
{
Peter Eastman's avatar
Peter Eastman committed
332
333
334
335
336
337
338
339
340
341
342
343
    int       order;
    int       i;
    int       ix,iy,iz;
    int       x0index,y0index,z0index;
    int       xindex,yindex,zindex;
    int       index;
    RealOpenMM    q;
    RealOpenMM *  thetax;
    RealOpenMM *  thetay;
    RealOpenMM *  thetaz;

    order = pme->order;
344

345
    /* Reset the grid */
346
    for (i=0;i<pme->ngrid[0]*pme->ngrid[1]*pme->ngrid[2];i++)
Peter Eastman's avatar
Peter Eastman committed
347
348
349
    {
        pme->grid[i].re = pme->grid[i].im = 0;
    }
350

351
    for (i=0;i<pme->natoms;i++)
Peter Eastman's avatar
Peter Eastman committed
352
    {
peastman's avatar
peastman committed
353
        q = charges[i];
354

355
        /* Grid index for the actual atom position */
Peter Eastman's avatar
Peter Eastman committed
356
357
358
        x0index = pme->particleindex[i][0];
        y0index = pme->particleindex[i][1];
        z0index = pme->particleindex[i][2];
359

360
        /* Bspline factors for this atom in each dimension , calculated from fractional coordinates */
Peter Eastman's avatar
Peter Eastman committed
361
362
363
        thetax  = &(pme->bsplines_theta[0][i*order]);
        thetay  = &(pme->bsplines_theta[1][i*order]);
        thetaz  = &(pme->bsplines_theta[2][i*order]);
364

365
366
367
        /* Loop over norder*norder*norder (typically 4*4*4) neighbor cells.
         *
         * As a neat optimization, we only spread in the forward direction, but apply PBC!
368
         *
369
         * Since we are going to do an FFT on the grid, it doesnt matter where the data is,
370
         * in frequency space the result will be the same.
371
372
373
374
375
         *
         * So, the influence function (bsplines) will probably be something like (0.15,0.35,0.35,0.15),
         * with largest weight 2-3 steps forward (you dont need to understand that for the implementation :-)
         * Effectively, you can look at this as translating the entire grid.
         *
376
         * Why do we do this stupid thing?
377
         *
378
         * 1) The loops get much simpler
379
380
381
         * 2) Just looking forward will hopefully get us more cache hits
         * 3) When we parallelize things, we only need to communicate in one direction instead of two!
         */
382

383
        for (ix=0;ix<order;ix++)
Peter Eastman's avatar
Peter Eastman committed
384
        {
385
            /* Calculate index, apply PBC so we spread to index 0/1/2 when a particle is close to the upper limit of the grid */
Peter Eastman's avatar
Peter Eastman committed
386
            xindex = (x0index + ix) % pme->ngrid[0];
387

388
            for (iy=0;iy<order;iy++)
Peter Eastman's avatar
Peter Eastman committed
389
390
            {
                yindex = (y0index + iy) % pme->ngrid[1];
391

392
                for (iz=0;iz<order;iz++)
Peter Eastman's avatar
Peter Eastman committed
393
394
395
                {
                    /* Can be optimized, but we keep it simple here */
                    zindex               = (z0index + iz) % pme->ngrid[2];
396
                    /* Calculate index in the charge grid */
Peter Eastman's avatar
Peter Eastman committed
397
                    index                = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex;
398
                    /* Add the charge times the bspline spread/interpolation factors to this grid position */
Peter Eastman's avatar
Peter Eastman committed
399
400
401
402
403
                    pme->grid[index].re += q*thetax[ix]*thetay[iy]*thetaz[iz];
                }
            }
        }
    }
404
405
406
407
408
409
}



static void
pme_reciprocal_convolution(pme_t     pme,
410
411
412
                           const RealVec periodicBoxVectors[3],
                           const RealVec recipBoxVectors[3],
                           RealOpenMM *  energy)
413
{
Peter Eastman's avatar
Peter Eastman committed
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
    int kx,ky,kz;
    int nx,ny,nz;
    RealOpenMM mx,my,mz;
    RealOpenMM mhx,mhy,mhz,m2;
    RealOpenMM one_4pi_eps;
    RealOpenMM virxx,virxy,virxz,viryy,viryz,virzz;
    RealOpenMM bx,by,bz;
    RealOpenMM d1,d2;
    RealOpenMM eterm,vfactor,struct2,ets2;
    RealOpenMM esum;
    RealOpenMM factor;
    RealOpenMM denom;
    RealOpenMM boxfactor;
    RealOpenMM maxkx,maxky,maxkz;

    t_complex *ptr;

    nx = pme->ngrid[0];
    ny = pme->ngrid[1];
    nz = pme->ngrid[2];

    one_4pi_eps = (RealOpenMM) (ONE_4PI_EPS0/pme->epsilon_r);
    factor = (RealOpenMM) (M_PI*M_PI/(pme->ewaldcoeff*pme->ewaldcoeff));
437
    boxfactor = (RealOpenMM) (M_PI*periodicBoxVectors[0][0]*periodicBoxVectors[1][1]*periodicBoxVectors[2][2]);
Peter Eastman's avatar
Peter Eastman committed
438
439
440
441
442
443
444
445
446
447
448
449

    esum = 0;
    virxx = 0;
    virxy = 0;
    virxz = 0;
    viryy = 0;
    viryz = 0;
    virzz = 0;

    maxkx = (RealOpenMM) ((nx+1)/2);
    maxky = (RealOpenMM) ((ny+1)/2);
    maxkz = (RealOpenMM) ((nz+1)/2);
450

451
    for (kx=0;kx<nx;kx++)
452
453
    {
        /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
Peter Eastman's avatar
Peter Eastman committed
454
        mx  = (RealOpenMM) ((kx<maxkx) ? kx : (kx-nx));
455
        mhx = mx*recipBoxVectors[0][0];
456
457
        bx  = boxfactor*pme->bsplines_moduli[0][kx];

458
        for (ky=0;ky<ny;ky++)
459
460
        {
            /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
Peter Eastman's avatar
Peter Eastman committed
461
            my  = (RealOpenMM) ((ky<maxky) ? ky : (ky-ny));
462
            mhy = mx*recipBoxVectors[1][0]+my*recipBoxVectors[1][1];
463
            by  = pme->bsplines_moduli[1][ky];
464

465
            for (kz=0;kz<nz;kz++)
Peter Eastman's avatar
Peter Eastman committed
466
            {
467
468
469
470
471
472
473
474
                /* If the net charge of the system is 0.0, there will not be any DC (direct current, zero frequency) component. However,
                 * we can still handle charged systems through a charge correction, in which case the DC
                 * component should be excluded from recprocal space. We will anyway run into problems below when dividing with the
                 * frequency if it is zero...
                 *
                 * In cuda you could probably work around this by setting something to 0.0 instead, but the short story is that we
                 * should skip the zero frequency case!
                 */
Peter Eastman's avatar
Peter Eastman committed
475
476
477
478
                if (kx==0 && ky==0 && kz==0)
                {
                    continue;
                }
479
480

                /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
Peter Eastman's avatar
Peter Eastman committed
481
                mz        = (RealOpenMM) ((kz<maxkz) ? kz : (kz-nz));
482
                mhz       = mx*recipBoxVectors[2][0]+my*recipBoxVectors[2][1]+mz*recipBoxVectors[2][2];
483
484

                /* Pointer to the grid cell in question */
Peter Eastman's avatar
Peter Eastman committed
485
                ptr       = pme->grid + kx*ny*nz + ky*nz + kz;
486
487

                /* Get grid data for this frequency */
Peter Eastman's avatar
Peter Eastman committed
488
489
                d1        = ptr->re;
                d2        = ptr->im;
490

491
                /* Calculate the convolution - see the Essman/Darden paper for the equation! */
Peter Eastman's avatar
Peter Eastman committed
492
493
494
                m2        = mhx*mhx+mhy*mhy+mhz*mhz;
                bz        = pme->bsplines_moduli[2][kz];
                denom     = m2*bx*by*bz;
495

Peter Eastman's avatar
Peter Eastman committed
496
                eterm     = one_4pi_eps*exp(-factor*m2)/denom;
497
498

                /* write back convolution data to grid */
Peter Eastman's avatar
Peter Eastman committed
499
500
                ptr->re   = d1*eterm;
                ptr->im   = d2*eterm;
501

Peter Eastman's avatar
Peter Eastman committed
502
                struct2   = (d1*d1+d2*d2);
503

Peter Eastman's avatar
Peter Eastman committed
504
505
506
507
508
509
                /* Long-range PME contribution to the energy for this frequency */
                ets2      = eterm*struct2;
                esum     += ets2;
            }
        }
    }
510

511
    /* The factor 0.5 is nothing special, but it is better to have it here than inside the loop :-) */
512
    *energy = (RealOpenMM) (0.5*esum);
513
514
515
}


516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
static void
dpme_reciprocal_convolution(pme_t     pme,
                           const RealVec periodicBoxVectors[3],
                           const RealVec recipBoxVectors[3],
                           RealOpenMM *  energy)
{
    int kx,ky,kz;
    int nx,ny,nz;
    RealOpenMM mx,my,mz;
    RealOpenMM mhx,mhy,mhz,m2;
    RealOpenMM bx,by,bz;
    RealOpenMM d1,d2;
    RealOpenMM eterm,struct2,ets2;
    RealOpenMM esum;
    RealOpenMM denom;
    RealOpenMM boxfactor;
    RealOpenMM maxkx,maxky,maxkz;

    t_complex *ptr;

    nx = pme->ngrid[0];
    ny = pme->ngrid[1];
    nz = pme->ngrid[2];

    boxfactor = (RealOpenMM) M_PI*sqrt(M_PI) / (6.0*periodicBoxVectors[0][0]*periodicBoxVectors[1][1]*periodicBoxVectors[2][2]);

    esum = 0;

    maxkx = (RealOpenMM) ((nx+1)/2);
    maxky = (RealOpenMM) ((ny+1)/2);
    maxkz = (RealOpenMM) ((nz+1)/2);

    RealOpenMM bfac = M_PI / pme->ewaldcoeff;
    RealOpenMM fac1 = 2.0*M_PI*M_PI*M_PI*sqrt(M_PI);
    RealOpenMM fac2 = pme->ewaldcoeff*pme->ewaldcoeff*pme->ewaldcoeff;
    RealOpenMM fac3 = -2.0*pme->ewaldcoeff*M_PI*M_PI;
    RealOpenMM b, m, m3, expfac, expterm, erfcterm;

    for (kx=0;kx<nx;kx++)
    {
        /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
        mx  = (RealOpenMM) ((kx<maxkx) ? kx : (kx-nx));
        mhx = mx*recipBoxVectors[0][0];
        bx  = pme->bsplines_moduli[0][kx];

        for (ky=0;ky<ny;ky++)
        {
            /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
            my  = (RealOpenMM) ((ky<maxky) ? ky : (ky-ny));
            mhy = mx*recipBoxVectors[1][0]+my*recipBoxVectors[1][1];
            by  = pme->bsplines_moduli[1][ky];

            for (kz=0;kz<nz;kz++)
            {
                /*
                 * Unlike the Coulombic case, there's an m=0 term so all terms are considered here.
                 */

                /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
                mz        = (RealOpenMM) ((kz<maxkz) ? kz : (kz-nz));
                mhz       = mx*recipBoxVectors[2][0]+my*recipBoxVectors[2][1]+mz*recipBoxVectors[2][2];

                /* Pointer to the grid cell in question */
                ptr       = pme->grid + kx*ny*nz + ky*nz + kz;

                /* Get grid data for this frequency */
                d1        = ptr->re;
                d2        = ptr->im;

                /* Calculate the convolution - see the Essman/Darden paper for the equation! */
                m2        = mhx*mhx+mhy*mhy+mhz*mhz;
                bz        = pme->bsplines_moduli[2][kz];
                denom     = boxfactor / (bx*by*bz);

                m = sqrt(m2);
                m3 = m*m2;
                b = bfac*m;
                expfac = -b*b;
                erfcterm = erfc(b);
                expterm = exp(expfac);

                eterm     = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;

                /* write back convolution data to grid */
                ptr->re   = d1*eterm;
                ptr->im   = d2*eterm;

                struct2   = (d1*d1+d2*d2);

                /* Long-range PME contribution to the energy for this frequency */
                ets2      = eterm*struct2;
                esum     += ets2;
            }
        }
    }
    // Remember the C6 energy is attractive, hence the negative sign.
    *energy = (RealOpenMM) (-esum);
}


616
static void
617
pme_grid_interpolate_force(pme_t pme,
618
                           const RealVec recipBoxVectors[3],
619
620
                           const vector<RealOpenMM>& charges,
                           vector<RealVec>& forces)
621
{
Peter Eastman's avatar
Peter Eastman committed
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
    int       i;
    int       ix,iy,iz;
    int       x0index,y0index,z0index;
    int       xindex,yindex,zindex;
    int       index;
    int       order;
    RealOpenMM    q;
    RealOpenMM *  thetax;
    RealOpenMM *  thetay;
    RealOpenMM *  thetaz;
    RealOpenMM *  dthetax;
    RealOpenMM *  dthetay;
    RealOpenMM *  dthetaz;
    RealOpenMM    tx,ty,tz;
    RealOpenMM    dtx,dty,dtz;
    RealOpenMM    fx,fy,fz;
    RealOpenMM    gridvalue;
    int       nx,ny,nz;

    nx    = pme->ngrid[0];
    ny    = pme->ngrid[1];
    nz    = pme->ngrid[2];

    order = pme->order;
646

647
    /* This is almost identical to the charge spreading routine! */
648

649
    for (i=0;i<pme->natoms;i++)
Peter Eastman's avatar
Peter Eastman committed
650
651
    {
        fx = fy = fz = 0;
652

peastman's avatar
peastman committed
653
        q = charges[i];
654

655
        /* Grid index for the actual atom position */
Peter Eastman's avatar
Peter Eastman committed
656
657
658
        x0index = pme->particleindex[i][0];
        y0index = pme->particleindex[i][1];
        z0index = pme->particleindex[i][2];
659

660
        /* Bspline factors for this atom in each dimension , calculated from fractional coordinates */
Peter Eastman's avatar
Peter Eastman committed
661
662
663
664
665
666
        thetax  = &(pme->bsplines_theta[0][i*order]);
        thetay  = &(pme->bsplines_theta[1][i*order]);
        thetaz  = &(pme->bsplines_theta[2][i*order]);
        dthetax = &(pme->bsplines_dtheta[0][i*order]);
        dthetay = &(pme->bsplines_dtheta[1][i*order]);
        dthetaz = &(pme->bsplines_dtheta[2][i*order]);
667

Peter Eastman's avatar
Peter Eastman committed
668
        /* See pme_grid_spread_charge() for comments about the order here, and only interpolation in one direction */
669

670
671
672
        /* Since we will add order^3 (typically 4*4*4=64) terms to the force on each particle, we use temporary fx/fy/fz
         * variables, and only add it to memory forces[] at the end.
         */
673
        for (ix=0;ix<order;ix++)
Peter Eastman's avatar
Peter Eastman committed
674
675
        {
            xindex = (x0index + ix) % pme->ngrid[0];
676
            /* Get both the bspline factor and its derivative with respect to the x coordinate! */
Peter Eastman's avatar
Peter Eastman committed
677
678
            tx     = thetax[ix];
            dtx    = dthetax[ix];
679

680
            for (iy=0;iy<order;iy++)
Peter Eastman's avatar
Peter Eastman committed
681
682
            {
                yindex = (y0index + iy) % pme->ngrid[1];
683
                /* bspline + derivative wrt y */
Peter Eastman's avatar
Peter Eastman committed
684
685
                ty     = thetay[iy];
                dty    = dthetay[iy];
686

687
                for (iz=0;iz<order;iz++)
Peter Eastman's avatar
Peter Eastman committed
688
689
690
                {
                    /* Can be optimized, but we keep it simple here */
                    zindex               = (z0index + iz) % pme->ngrid[2];
691
                    /* bspline + derivative wrt z */
Peter Eastman's avatar
Peter Eastman committed
692
693
694
                    tz                   = thetaz[iz];
                    dtz                  = dthetaz[iz];
                    index                = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex;
695

696
697
                    /* Get the fft+convoluted+ifft:d data from the grid, which must be real by definition */
                    /* Checking that the imaginary part is indeed zero might be a good check :-) */
Peter Eastman's avatar
Peter Eastman committed
698
699
700
701
702
703
704
705
706
                    gridvalue            = pme->grid[index].re;

                    /* The d component of the force is calculated by taking the derived bspline in dimension d, normal bsplines in the other two */
                    fx                  += dtx*ty*tz*gridvalue;
                    fy                  += tx*dty*tz*gridvalue;
                    fz                  += tx*ty*dtz*gridvalue;
                }
            }
        }
707
        /* Update memory force, note that we multiply by charge and some box stuff */
708
709
710
        forces[i][0] -= q*(fx*nx*recipBoxVectors[0][0]);
        forces[i][1] -= q*(fx*nx*recipBoxVectors[1][0]+fy*ny*recipBoxVectors[1][1]);
        forces[i][2] -= q*(fx*nx*recipBoxVectors[2][0]+fy*ny*recipBoxVectors[2][1]+fz*nz*recipBoxVectors[2][2]);
Peter Eastman's avatar
Peter Eastman committed
711
    }
712
713
714
715
716
717
}



/* EXPORTED ROUTINES */

718
int
719
pme_init(pme_t *       ppme,
Peter Eastman's avatar
Peter Eastman committed
720
721
722
723
         RealOpenMM        ewaldcoeff,
         int           natoms,
         const int           ngrid[3],
         int           pme_order,
724
725
726
727
         RealOpenMM        epsilon_r)
{
    pme_t pme;
    int   d;
728

729
730
731
732
733
    pme = (pme_t) malloc(sizeof(struct pme));

    pme->order       = pme_order;
    pme->epsilon_r   = epsilon_r;
    pme->ewaldcoeff  = ewaldcoeff;
Peter Eastman's avatar
Peter Eastman committed
734
    pme->natoms      = natoms;
735

736
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
737
738
739
740
741
    {
        pme->ngrid[d]            = ngrid[d];
        pme->bsplines_theta[d]   = (RealOpenMM *)malloc(sizeof(RealOpenMM)*pme_order*natoms);
        pme->bsplines_dtheta[d]  = (RealOpenMM *)malloc(sizeof(RealOpenMM)*pme_order*natoms);
    }
742

Peter Eastman's avatar
Peter Eastman committed
743
744
    pme->particlefraction = (rvec *)malloc(sizeof(rvec)*natoms);
    pme->particleindex    = (ivec *)malloc(sizeof(ivec)*natoms);
745

Peter Eastman's avatar
Peter Eastman committed
746
747
    /* Allocate charge grid storage */
    pme->grid        = (t_complex *)malloc(sizeof(t_complex)*ngrid[0]*ngrid[1]*ngrid[2]);
748

Peter Eastman's avatar
Peter Eastman committed
749
    fftpack_init_3d(&pme->fftplan,ngrid[0],ngrid[1],ngrid[2]);
750

Peter Eastman's avatar
Peter Eastman committed
751
752
    /* Setup bspline moduli (see Essman paper) */
    pme_calculate_bsplines_moduli(pme);
753

Peter Eastman's avatar
Peter Eastman committed
754
    *ppme = pme;
755

756
757
758
759
760
761
762
763
    return 0;
}





int pme_exec(pme_t       pme,
764
765
766
             const vector<RealVec>& atomCoordinates,
             vector<RealVec>& forces,
             const vector<RealOpenMM>& charges,
767
768
             const RealVec periodicBoxVectors[3],
             RealOpenMM* energy)
769
770
{
    /* Routine is called with coordinates in x, a box, and charges in q */
771

772
773
774
    RealVec recipBoxVectors[3];
    invert_box_vectors(periodicBoxVectors, recipBoxVectors);
    
775
776
777
778
    /* Before we can do the actual interpolation, we need to recalculate and update
     * the indices for each particle in the charge grid (initialized in pme_init()),
     * and what its fractional offset in this grid cell is.
     */
779

Peter Eastman's avatar
Peter Eastman committed
780
    /* Update charge grid indices and fractional offsets for each atom.
781
     * The indices/fractions are stored internally in the pme datatype
782
     */
783
    pme_update_grid_index_and_fraction(pme,atomCoordinates,periodicBoxVectors,recipBoxVectors);
784

Peter Eastman's avatar
Peter Eastman committed
785
786
    /* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */
    pme_update_bsplines(pme);
787

Peter Eastman's avatar
Peter Eastman committed
788
    /* Spread the charges on grid (using newly calculated bsplines in the pme structure) */
peastman's avatar
peastman committed
789
    pme_grid_spread_charge(pme, charges);
790

Peter Eastman's avatar
Peter Eastman committed
791
792
    /* do 3d-fft */
    fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid);
793

Peter Eastman's avatar
Peter Eastman committed
794
    /* solve in k-space */
795
    pme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy);
796

Peter Eastman's avatar
Peter Eastman committed
797
798
    /* do 3d-invfft */
    fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid);
799

Peter Eastman's avatar
Peter Eastman committed
800
    /* Get the particle forces from the grid and bsplines in the pme structure */
801
    pme_grid_interpolate_force(pme,recipBoxVectors,charges,forces);
802
803
804
805
806

    return 0;
}


807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
int pme_exec_dpme(pme_t       pme,
             const vector<RealVec>& atomCoordinates,
             vector<RealVec>& forces,
             const vector<RealOpenMM>& c6s,
             const RealVec periodicBoxVectors[3],
             RealOpenMM* energy)
{
    /* Routine is called with coordinates in x, a box, and charges in q */

    RealVec recipBoxVectors[3];
    invert_box_vectors(periodicBoxVectors, recipBoxVectors);

    /* Before we can do the actual interpolation, we need to recalculate and update
     * the indices for each particle in the charge grid (initialized in pme_init()),
     * and what its fractional offset in this grid cell is.
     */

    /* Update charge grid indices and fractional offsets for each atom.
     * The indices/fractions are stored internally in the pme datatype
     */
    pme_update_grid_index_and_fraction(pme,atomCoordinates,periodicBoxVectors,recipBoxVectors);

    /* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */
    pme_update_bsplines(pme);

    /* Spread the charges on grid (using newly calculated bsplines in the pme structure) */
    pme_grid_spread_charge(pme, c6s);

    /* do 3d-fft */
    fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid);

    /* solve in k-space */
    dpme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy);

    /* do 3d-invfft */
    fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid);

    /* Get the particle forces from the grid and bsplines in the pme structure */
    pme_grid_interpolate_force(pme,recipBoxVectors,c6s,forces);

    return 0;
}

850
851
852
853

int
pme_destroy(pme_t    pme)
{
Peter Eastman's avatar
Peter Eastman committed
854
    int d;
855

Peter Eastman's avatar
Peter Eastman committed
856
    free(pme->grid);
857

858
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
859
860
861
862
863
    {
        free(pme->bsplines_moduli[d]);
        free(pme->bsplines_theta[d]);
        free(pme->bsplines_dtheta[d]);
    }
864

Peter Eastman's avatar
Peter Eastman committed
865
866
    free(pme->particlefraction);
    free(pme->particleindex);
867

Peter Eastman's avatar
Peter Eastman committed
868
    fftpack_destroy(pme->fftplan);
869

Peter Eastman's avatar
Peter Eastman committed
870
871
    /* destroy structure itself */
    free(pme);
872

Peter Eastman's avatar
Peter Eastman committed
873
    return 0;
874
}
875
876

} // namespace OpenMM