ReferencePME.cpp 30.2 KB
Newer Older
1
/*
2
 * Reference implementation of PME reciprocal space interactions.
3
 *
4
 * Copyright (c) 2009-2023, Erik Lindahl, Rossen Apostolov, Szilard Pall, Peter Eastman
5
6
 * All rights reserved.
 * Contact: lindahl@cbr.su.se Stockholm University, Sweden.
7
8
 *
 * Redistribution and use in source and binary forms, with or without
9
10
 * modification, are permitted provided that the following conditions are met:
 *
11
12
13
14
 * Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer. Redistributions in binary
 * form must reproduce the above copyright notice, this list of conditions and
 * the following disclaimer in the documentation and/or other materials provided
15
 * with the distribution.
16
17
 * Neither the name of the author/university nor the names of its contributors may
 * be used to endorse or promote products derived from this software without
18
19
 * specific prior written permission.
 *
20
21
22
23
24
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
26
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
30
31
32
33
34
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
Peter Eastman's avatar
Peter Eastman committed
35
#include <complex>
36

37
#include "ReferencePME.h"
38
#include "ReferenceForce.h"
peastman's avatar
peastman committed
39
#include "SimTKOpenMMRealType.h"
40

Peter Eastman's avatar
Peter Eastman committed
41
42
43
44
45
46
#ifdef _MSC_VER
  #define POCKETFFT_NO_VECTORS
#endif
#include "pocketfft_hdronly.h"

using namespace std;
47
48
49

typedef int    ivec[3];

50
namespace OpenMM {
51
52
53

struct pme
{
Peter Eastman's avatar
Peter Eastman committed
54
    int          natoms;
peastman's avatar
peastman committed
55
    double       ewaldcoeff;
56

Peter Eastman's avatar
Peter Eastman committed
57
    complex<double>* grid;             /* Memory for the grid we spread charges on.
58
59
60
                                        * Element (i,j,k) is accessed as:
                                        * grid[i*ngrid[1]*ngrid[2] + j*ngrid[2] + k]
                                        */
Peter Eastman's avatar
Peter Eastman committed
61
    int          ngrid[3];             /* Total grid dimensions (all data is complex!) */
62

Peter Eastman's avatar
Peter Eastman committed
63
    int          order;                /* PME interpolation order. Almost always 4 */
64

65
    /* Data for bspline interpolation, see the Essman PME paper */
peastman's avatar
peastman committed
66
67
68
    double *     bsplines_moduli[3];   /* 3 pointers, to x/y/z bspline moduli, each of length ngrid[x/y/z]   */
    double *     bsplines_theta[3];    /* each of x/y/z has length order*natoms */
    double *     bsplines_dtheta[3];   /* each of x/y/z has length order*natoms */
69

Peter Eastman's avatar
Peter Eastman committed
70
    ivec *       particleindex;        /* Array of length natoms. Each element is
71
72
73
                                        * an ivec (3 ints) that specify the grid
                                        * indices for that particular atom. Updated every step!
                                        */
74
    rvec *       particlefraction;     /* Array of length natoms. Fractional offset in the grid for
75
76
                                        * each atom in all three dimensions.
                                        */
77

78
79
80
81
82
83
84
85
    /* Further explanation of index/fraction:
     *
     * Assume we have a cell of size 10*10*10nm, and a total grid dimension of 100*100*100 cells.
     * In other words, each cell is 0.1*0.1*0.1 nm.
     *
     * If particle i has coordinates { 0.543 , 6.235 , -0.73 }, we will get:
     *
     * particleindex[i]    = { 5 , 62 , 92 }         (-0.73 + 10 = 9.27, we always apply PBC for grid calculations!)
86
     * particlefraction[i] = { 0.43 , 0.35 , 0.7 }   (this is the fraction of the cell length where the atom is)
87
88
89
90
91
92
     *
     * (The reason for precaculating / storing these is that it gets a bit more complex for triclinic cells :-)
     *
     * In the current code version we might assume that a particle is not more than a whole box length away from
     * the central cell, i.e., in this case we would assume all coordinates fall in -10 nm < x,y,z < 20 nm.
     */
93

peastman's avatar
peastman committed
94
    double       epsilon_r;             /* Dielectric coefficient to use, typically 1.0 */
95
};
96
97
98
99
100
101
102


/* Internal setup routines */



/* Only called once from init_pme(), performance does not matter! */
103
static void
104
105
pme_calculate_bsplines_moduli(pme_t pme)
{
Peter Eastman's avatar
Peter Eastman committed
106
107
108
109
    int       nmax;
    int       i,j,k,l,d;
    int       order;
    int       ndata;
peastman's avatar
peastman committed
110
111
112
113
114
    double *  data;
    double *  ddata;
    double *  bsplines_data;
    double    div;
    double    sc,ss,arg;
Peter Eastman's avatar
Peter Eastman committed
115
116

    nmax = 0;
117
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
118
119
    {
        nmax = (pme->ngrid[d] > nmax) ? pme->ngrid[d] : nmax;
peastman's avatar
peastman committed
120
        pme->bsplines_moduli[d] = (double *) malloc(sizeof(double)*pme->ngrid[d]);
121
    }
122

Peter Eastman's avatar
Peter Eastman committed
123
124
125
    order = pme->order;

    /* temp storage in this routine */
peastman's avatar
peastman committed
126
127
128
    data          = (double *) malloc(sizeof(double)*order);
    ddata         = (double *) malloc(sizeof(double)*order);
    bsplines_data = (double *) malloc(sizeof(double)*nmax);
Peter Eastman's avatar
Peter Eastman committed
129
130
131
132
133

    data[order-1]=0;
    data[1]=0;
    data[0]=1;

134
    for (k=3;k<order;k++)
Peter Eastman's avatar
Peter Eastman committed
135
    {
peastman's avatar
peastman committed
136
        div=1.0/(k-1.0);
Peter Eastman's avatar
Peter Eastman committed
137
        data[k-1]=0;
138
        for (l=1;l<(k-1);l++)
Peter Eastman's avatar
Peter Eastman committed
139
140
141
142
143
144
145
146
        {
            data[k-l-1]=div*(l*data[k-l-2]+(k-l)*data[k-l-1]);
        }
        data[0]=div*data[0];
    }

    /* differentiate */
    ddata[0]=-data[0];
147
    for (k=1;k<order;k++)
Peter Eastman's avatar
Peter Eastman committed
148
149
150
151
    {
        ddata[k]=data[k-1]-data[k];
    }

peastman's avatar
peastman committed
152
    div=1.0/(order-1);
Peter Eastman's avatar
Peter Eastman committed
153
154
    data[order-1]=0;

155
    for (l=1;l<(order-1);l++)
Peter Eastman's avatar
Peter Eastman committed
156
157
158
159
160
    {
        data[order-l-1]=div*(l*data[order-l-2]+(order-l)*data[order-l-1]);
    }
    data[0]=div*data[0];

161
    for (i=0;i<nmax;i++)
Peter Eastman's avatar
Peter Eastman committed
162
163
164
    {
        bsplines_data[i]=0;
    }
165
    for (i=1;i<=order;i++)
Peter Eastman's avatar
Peter Eastman committed
166
167
168
169
170
    {
        bsplines_data[i]=data[i-1];
    }

    /* Evaluate the actual bspline moduli for X/Y/Z */
171
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
172
173
    {
        ndata = pme->ngrid[d];
174
        for (i=0;i<ndata;i++)
Peter Eastman's avatar
Peter Eastman committed
175
176
        {
            sc=ss=0;
177
            for (j=0;j<ndata;j++)
Peter Eastman's avatar
Peter Eastman committed
178
            {
peastman's avatar
peastman committed
179
                arg=(2.0*M_PI*i*j)/ndata;
Peter Eastman's avatar
Peter Eastman committed
180
181
182
183
184
                sc+=bsplines_data[j]*cos(arg);
                ss+=bsplines_data[j]*sin(arg);
            }
            pme->bsplines_moduli[d][i]=sc*sc+ss*ss;
        }
185
        for (i=0;i<ndata;i++)
Peter Eastman's avatar
Peter Eastman committed
186
        {
187
            if (pme->bsplines_moduli[d][i]<1.0e-7)
Peter Eastman's avatar
Peter Eastman committed
188
            {
189
                pme->bsplines_moduli[d][i]=(pme->bsplines_moduli[d][(i-1+ndata)%ndata]+pme->bsplines_moduli[d][(i+1)%ndata])/2;
Peter Eastman's avatar
Peter Eastman committed
190
191
192
193
194
195
196
197
            }
        }
    }

    /* Release temp storage */
    free(data);
    free(ddata);
    free(bsplines_data);
198
199
200
201
}

static void
pme_update_grid_index_and_fraction(pme_t    pme,
peastman's avatar
peastman committed
202
203
204
                                   const vector<Vec3>& atomCoordinates,
                                   const Vec3 periodicBoxVectors[3],
                                   const Vec3 recipBoxVectors[3])
205
{
Peter Eastman's avatar
Peter Eastman committed
206
207
    int    i;
    int    d;
peastman's avatar
peastman committed
208
    double t;
Peter Eastman's avatar
Peter Eastman committed
209
210
    int    ti;

211
    for (i=0;i<pme->natoms;i++)
Peter Eastman's avatar
Peter Eastman committed
212
    {
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
        /* Index calculation (Look mom, no conditionals!):
         *
         * Both for Cuda and modern CPUs it is nice to avoid conditionals, but we still need to apply periodic boundary conditions.
         * Instead of having loops to add/subtract the box dimension, we do it this way:
         *
         * 1. First add the box size, to make sure this atom coordinate isnt -0.1 or something.
         *    After this we assume all fractional box positions are *positive*.
         *    The reason for this is that we always want to round coordinates _down_ to get
         *    their grid index, and when taking the integer part of -3.4 we would get -3, not -4 as we want.
         *    Since we anyway need the grid indices to fall in the central box, it is more convenient
         *    to first manipulate the coordinates to be positive.
         * 2. Convert to integer grid index
         *    Since we have added a whole box unit in step 1, this index might actually be larger than
         *    the grid dimension. Examples, assuming 10*10*10nm box and grid dimension 100*100*100 (spacing 0.1 nm):
         *
         *    coordinate is { 0.543 , 6.235 , -0.73 }
         *
         *    x[i][d]/box[d]                      becomes   { 0.0543 , 0.6235 , -0.073 }
         *    (x[i][d]/box[d] + 1.0)              becomes   { 1.0543 , 1.6235 , 0.927 }
         *    (x[i][d]/box[d] + 1.0)*ngrid[d]     becomes   { 105.43 , 162.35 , 92.7 }
         *
         *    integer part is now { 105 , 162 , 92 }
         *
         *    The fraction is calculates as t-ti, which becomes { 0.43 , 0.35 , 0.7 }
         *
         * 3. Take the first integer index part (which can be larger than the grid) modulo the grid dimension
         *
         *    Now we get { 5 , 62 , 92 }
         *
         *    Voila, both index and fraction, entirely without conditionals. The one limitation here is that
         *    we only add one box length, so if the particle had a coordinate <=-10.0, we would be screwed.
         *    In principle we can of course add 100.0, but that just moves the problem, it doesnt solve it.
         *    In practice, MD programs will apply PBC to reset particles inside the central box to avoid
         *    numerical problems, so this shouldnt cause any problems.
         *    (And, by adding 100.0 box lengths, we would lose a bit of numerical accuracy here!)
         */
peastman's avatar
peastman committed
249
        Vec3 coord = atomCoordinates[i];
250
        for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
251
        {
peastman's avatar
peastman committed
252
253
            t = coord[0]*recipBoxVectors[0][d]+coord[1]*recipBoxVectors[1][d]+coord[2]*recipBoxVectors[2][d];
            t = (t-floor(t))*pme->ngrid[d];
Peter Eastman's avatar
Peter Eastman committed
254
255
256
257
258
259
            ti = (int) t;

            pme->particlefraction[i][d] = t - ti;
            pme->particleindex[i][d]    = ti % pme->ngrid[d];
        }
    }
260
261
262
}


263
/* Ugly bspline calculation taken from Tom Dardens reference equations.
264
265
266
267
 * This probably very sub-optimal in Cuda? Separate kernel?
 *
 * In practice, it might help to require order=4 for the cuda port.
 */
268
static void
269
270
pme_update_bsplines(pme_t    pme)
{
Peter Eastman's avatar
Peter Eastman committed
271
272
    int       i,j,k,l;
    int       order;
peastman's avatar
peastman committed
273
274
275
    double    dr,div;
    double *  data;
    double *  ddata;
276

Peter Eastman's avatar
Peter Eastman committed
277
    order = pme->order;
278

279
    for (i=0; (i<pme->natoms); i++)
Peter Eastman's avatar
Peter Eastman committed
280
    {
281
        for (j=0; j<3; j++)
Peter Eastman's avatar
Peter Eastman committed
282
283
284
285
286
287
288
289
290
291
        {
            /* dr is relative offset from lower cell limit */
            dr = pme->particlefraction[i][j];

            data  = &(pme->bsplines_theta[j][i*order]);
            ddata = &(pme->bsplines_dtheta[j][i*order]);
            data[order-1] = 0;
            data[1]       = dr;
            data[0]       = 1-dr;

292
            for (k=3; k<order; k++)
Peter Eastman's avatar
Peter Eastman committed
293
            {
peastman's avatar
peastman committed
294
                div = 1.0/(k-1.0);
Peter Eastman's avatar
Peter Eastman committed
295
                data[k-1] = div*dr*data[k-2];
296
                for (l=1; l<(k-1); l++)
Peter Eastman's avatar
Peter Eastman committed
297
298
299
300
301
302
303
304
305
                {
                    data[k-l-1] = div*((dr+l)*data[k-l-2]+(k-l-dr)*data[k-l-1]);
                }
                data[0] = div*(1-dr)*data[0];
            }

            /* differentiate */
            ddata[0] = -data[0];

306
            for (k=1; k<order; k++)
Peter Eastman's avatar
Peter Eastman committed
307
308
309
310
            {
                ddata[k] = data[k-1]-data[k];
            }

peastman's avatar
peastman committed
311
            div           = 1.0/(order-1);
Peter Eastman's avatar
Peter Eastman committed
312
313
            data[order-1] = div*dr*data[order-2];

314
            for (l=1; l<(order-1); l++)
Peter Eastman's avatar
Peter Eastman committed
315
316
317
318
319
            {
                data[order-l-1] = div*((dr+l)*data[order-l-2]+(order-l-dr)*data[order-l-1]);
            }
            data[0] = div*(1-dr)*data[0];
        }
320
321
322
323
324
    }
}


static void
peastman's avatar
peastman committed
325
pme_grid_spread_charge(pme_t pme, const vector<double>& charges)
326
{
Peter Eastman's avatar
Peter Eastman committed
327
328
329
330
331
332
    int       order;
    int       i;
    int       ix,iy,iz;
    int       x0index,y0index,z0index;
    int       xindex,yindex,zindex;
    int       index;
peastman's avatar
peastman committed
333
334
335
336
    double    q;
    double *  thetax;
    double *  thetay;
    double *  thetaz;
Peter Eastman's avatar
Peter Eastman committed
337
338

    order = pme->order;
339

340
    /* Reset the grid */
341
    for (i=0;i<pme->ngrid[0]*pme->ngrid[1]*pme->ngrid[2];i++)
Peter Eastman's avatar
Peter Eastman committed
342
    {
Peter Eastman's avatar
Peter Eastman committed
343
        pme->grid[i] = complex<double>(0, 0);
Peter Eastman's avatar
Peter Eastman committed
344
    }
345

346
    for (i=0;i<pme->natoms;i++)
Peter Eastman's avatar
Peter Eastman committed
347
    {
peastman's avatar
peastman committed
348
        q = charges[i];
349

350
        /* Grid index for the actual atom position */
Peter Eastman's avatar
Peter Eastman committed
351
352
353
        x0index = pme->particleindex[i][0];
        y0index = pme->particleindex[i][1];
        z0index = pme->particleindex[i][2];
354

355
        /* Bspline factors for this atom in each dimension , calculated from fractional coordinates */
Peter Eastman's avatar
Peter Eastman committed
356
357
358
        thetax  = &(pme->bsplines_theta[0][i*order]);
        thetay  = &(pme->bsplines_theta[1][i*order]);
        thetaz  = &(pme->bsplines_theta[2][i*order]);
359

360
        /* Loop over norder*norder*norder (typically 5*5*5) neighbor cells.
361
362
         *
         * As a neat optimization, we only spread in the forward direction, but apply PBC!
363
         *
364
         * Since we are going to do an FFT on the grid, it doesn't matter where the data is,
365
         * in frequency space the result will be the same.
366
367
         *
         * So, the influence function (bsplines) will probably be something like (0.15,0.35,0.35,0.15),
368
         * with largest weight 2-3 steps forward (you don't need to understand that for the implementation :-)
369
370
         * Effectively, you can look at this as translating the entire grid.
         *
371
         * Why do we do this stupid thing?
372
         *
373
         * 1) The loops get much simpler
374
375
376
         * 2) Just looking forward will hopefully get us more cache hits
         * 3) When we parallelize things, we only need to communicate in one direction instead of two!
         */
377

378
        for (ix=0;ix<order;ix++)
Peter Eastman's avatar
Peter Eastman committed
379
        {
380
            /* Calculate index, apply PBC so we spread to index 0/1/2 when a particle is close to the upper limit of the grid */
Peter Eastman's avatar
Peter Eastman committed
381
            xindex = (x0index + ix) % pme->ngrid[0];
382

383
            for (iy=0;iy<order;iy++)
Peter Eastman's avatar
Peter Eastman committed
384
385
            {
                yindex = (y0index + iy) % pme->ngrid[1];
386

387
                for (iz=0;iz<order;iz++)
Peter Eastman's avatar
Peter Eastman committed
388
389
                {
                    /* Can be optimized, but we keep it simple here */
Peter Eastman's avatar
Peter Eastman committed
390
                    zindex            = (z0index + iz) % pme->ngrid[2];
391
                    /* Calculate index in the charge grid */
Peter Eastman's avatar
Peter Eastman committed
392
                    index             = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex;
393
                    /* Add the charge times the bspline spread/interpolation factors to this grid position */
Peter Eastman's avatar
Peter Eastman committed
394
                    pme->grid[index] += q*thetax[ix]*thetay[iy]*thetaz[iz];
Peter Eastman's avatar
Peter Eastman committed
395
396
397
398
                }
            }
        }
    }
399
400
401
402
403
404
}



static void
pme_reciprocal_convolution(pme_t     pme,
peastman's avatar
peastman committed
405
406
407
                           const Vec3 periodicBoxVectors[3],
                           const Vec3 recipBoxVectors[3],
                           double *  energy)
408
{
Peter Eastman's avatar
Peter Eastman committed
409
410
    int kx,ky,kz;
    int nx,ny,nz;
peastman's avatar
peastman committed
411
412
413
414
415
416
417
418
419
420
421
422
    double mx,my,mz;
    double mhx,mhy,mhz,m2;
    double one_4pi_eps;
    double virxx,virxy,virxz,viryy,viryz,virzz;
    double bx,by,bz;
    double d1,d2;
    double eterm,vfactor,struct2,ets2;
    double esum;
    double factor;
    double denom;
    double boxfactor;
    double maxkx,maxky,maxkz;
Peter Eastman's avatar
Peter Eastman committed
423

Peter Eastman's avatar
Peter Eastman committed
424
    complex<double> *ptr;
Peter Eastman's avatar
Peter Eastman committed
425
426
427
428
429

    nx = pme->ngrid[0];
    ny = pme->ngrid[1];
    nz = pme->ngrid[2];

peastman's avatar
peastman committed
430
431
432
    one_4pi_eps = ONE_4PI_EPS0/pme->epsilon_r;
    factor = M_PI*M_PI/(pme->ewaldcoeff*pme->ewaldcoeff);
    boxfactor = M_PI*periodicBoxVectors[0][0]*periodicBoxVectors[1][1]*periodicBoxVectors[2][2];
Peter Eastman's avatar
Peter Eastman committed
433
434
435
436
437
438
439
440
441

    esum = 0;
    virxx = 0;
    virxy = 0;
    virxz = 0;
    viryy = 0;
    viryz = 0;
    virzz = 0;

peastman's avatar
peastman committed
442
443
444
    maxkx = (nx+1)/2;
    maxky = (ny+1)/2;
    maxkz = (nz+1)/2;
445

446
    for (kx=0;kx<nx;kx++)
447
448
    {
        /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
449
        mx  = (kx<maxkx) ? kx : (kx-nx);
450
        mhx = mx*recipBoxVectors[0][0];
451
452
        bx  = boxfactor*pme->bsplines_moduli[0][kx];

453
        for (ky=0;ky<ny;ky++)
454
455
        {
            /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
456
            my  = (ky<maxky) ? ky : (ky-ny);
457
            mhy = mx*recipBoxVectors[1][0]+my*recipBoxVectors[1][1];
458
            by  = pme->bsplines_moduli[1][ky];
459

460
            for (kz=0;kz<nz;kz++)
Peter Eastman's avatar
Peter Eastman committed
461
            {
462
463
464
465
466
467
468
469
                /* If the net charge of the system is 0.0, there will not be any DC (direct current, zero frequency) component. However,
                 * we can still handle charged systems through a charge correction, in which case the DC
                 * component should be excluded from recprocal space. We will anyway run into problems below when dividing with the
                 * frequency if it is zero...
                 *
                 * In cuda you could probably work around this by setting something to 0.0 instead, but the short story is that we
                 * should skip the zero frequency case!
                 */
Peter Eastman's avatar
Peter Eastman committed
470
471
472
473
                if (kx==0 && ky==0 && kz==0)
                {
                    continue;
                }
474
475

                /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
476
                mz        = (kz<maxkz) ? kz : (kz-nz);
477
                mhz       = mx*recipBoxVectors[2][0]+my*recipBoxVectors[2][1]+mz*recipBoxVectors[2][2];
478
479

                /* Pointer to the grid cell in question */
Peter Eastman's avatar
Peter Eastman committed
480
                ptr       = pme->grid + kx*ny*nz + ky*nz + kz;
481
482

                /* Get grid data for this frequency */
Peter Eastman's avatar
Peter Eastman committed
483
484
                d1        = ptr->real();
                d2        = ptr->imag();
485

486
                /* Calculate the convolution - see the Essman/Darden paper for the equation! */
Peter Eastman's avatar
Peter Eastman committed
487
488
489
                m2        = mhx*mhx+mhy*mhy+mhz*mhz;
                bz        = pme->bsplines_moduli[2][kz];
                denom     = m2*bx*by*bz;
490

Peter Eastman's avatar
Peter Eastman committed
491
                eterm     = one_4pi_eps*exp(-factor*m2)/denom;
492
493

                /* write back convolution data to grid */
Peter Eastman's avatar
Peter Eastman committed
494
495
                ptr->real(d1*eterm);
                ptr->imag(d2*eterm);
496

Peter Eastman's avatar
Peter Eastman committed
497
                struct2   = (d1*d1+d2*d2);
498

Peter Eastman's avatar
Peter Eastman committed
499
500
501
502
503
504
                /* Long-range PME contribution to the energy for this frequency */
                ets2      = eterm*struct2;
                esum     += ets2;
            }
        }
    }
505

506
    /* The factor 0.5 is nothing special, but it is better to have it here than inside the loop :-) */
peastman's avatar
peastman committed
507
    *energy = 0.5*esum;
508
509
510
}


511
static void
peastman's avatar
peastman committed
512
513
514
515
dpme_reciprocal_convolution(pme_t pme,
                           const Vec3 periodicBoxVectors[3],
                           const Vec3 recipBoxVectors[3],
                           double* energy)
516
517
518
{
    int kx,ky,kz;
    int nx,ny,nz;
peastman's avatar
peastman committed
519
520
521
522
523
524
525
526
527
    double mx,my,mz;
    double mhx,mhy,mhz,m2;
    double bx,by,bz;
    double d1,d2;
    double eterm,struct2,ets2;
    double esum;
    double denom;
    double boxfactor;
    double maxkx,maxky,maxkz;
528

Peter Eastman's avatar
Peter Eastman committed
529
    complex<double> *ptr;
530
531
532
533
534

    nx = pme->ngrid[0];
    ny = pme->ngrid[1];
    nz = pme->ngrid[2];

peastman's avatar
peastman committed
535
    boxfactor = -2*M_PI*sqrt(M_PI) / (6.0*periodicBoxVectors[0][0]*periodicBoxVectors[1][1]*periodicBoxVectors[2][2]);
536
537
538

    esum = 0;

peastman's avatar
peastman committed
539
540
541
    maxkx = (nx+1)/2;
    maxky = (ny+1)/2;
    maxkz = (nz+1)/2;
542

peastman's avatar
peastman committed
543
544
545
546
547
    double bfac = M_PI / pme->ewaldcoeff;
    double fac1 = 2.0*M_PI*M_PI*M_PI*sqrt(M_PI);
    double fac2 = pme->ewaldcoeff*pme->ewaldcoeff*pme->ewaldcoeff;
    double fac3 = -2.0*pme->ewaldcoeff*M_PI*M_PI;
    double b, m, m3, expfac, expterm, erfcterm;
548
549
550
551

    for (kx=0;kx<nx;kx++)
    {
        /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
552
        mx  = ((kx<maxkx) ? kx : (kx-nx));
553
554
555
556
557
558
        mhx = mx*recipBoxVectors[0][0];
        bx  = pme->bsplines_moduli[0][kx];

        for (ky=0;ky<ny;ky++)
        {
            /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
559
            my  = ((ky<maxky) ? ky : (ky-ny));
560
561
562
563
564
565
566
567
568
569
            mhy = mx*recipBoxVectors[1][0]+my*recipBoxVectors[1][1];
            by  = pme->bsplines_moduli[1][ky];

            for (kz=0;kz<nz;kz++)
            {
                /*
                 * Unlike the Coulombic case, there's an m=0 term so all terms are considered here.
                 */

                /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
570
                mz        = ((kz<maxkz) ? kz : (kz-nz));
571
572
573
574
575
576
                mhz       = mx*recipBoxVectors[2][0]+my*recipBoxVectors[2][1]+mz*recipBoxVectors[2][2];

                /* Pointer to the grid cell in question */
                ptr       = pme->grid + kx*ny*nz + ky*nz + kz;

                /* Get grid data for this frequency */
Peter Eastman's avatar
Peter Eastman committed
577
578
                d1        = ptr->real();
                d2        = ptr->imag();
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594

                /* Calculate the convolution - see the Essman/Darden paper for the equation! */
                m2        = mhx*mhx+mhy*mhy+mhz*mhz;
                bz        = pme->bsplines_moduli[2][kz];
                denom     = boxfactor / (bx*by*bz);

                m = sqrt(m2);
                m3 = m*m2;
                b = bfac*m;
                expfac = -b*b;
                erfcterm = erfc(b);
                expterm = exp(expfac);

                eterm     = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;

                /* write back convolution data to grid */
Peter Eastman's avatar
Peter Eastman committed
595
596
                ptr->real(d1*eterm);
                ptr->imag(d2*eterm);
597
598
599
600
601
602
603
604
605
606

                struct2   = (d1*d1+d2*d2);

                /* Long-range PME contribution to the energy for this frequency */
                ets2      = eterm*struct2;
                esum     += ets2;
            }
        }
    }
    // Remember the C6 energy is attractive, hence the negative sign.
peastman's avatar
peastman committed
607
    *energy = 0.5*esum;
608
609
610
}


611
static void
612
pme_grid_interpolate_force(pme_t pme,
peastman's avatar
peastman committed
613
614
615
                           const Vec3 recipBoxVectors[3],
                           const vector<double>& charges,
                           vector<Vec3>& forces)
616
{
Peter Eastman's avatar
Peter Eastman committed
617
618
619
620
621
622
    int       i;
    int       ix,iy,iz;
    int       x0index,y0index,z0index;
    int       xindex,yindex,zindex;
    int       index;
    int       order;
peastman's avatar
peastman committed
623
624
625
626
627
628
629
630
631
632
633
    double    q;
    double *  thetax;
    double *  thetay;
    double *  thetaz;
    double *  dthetax;
    double *  dthetay;
    double *  dthetaz;
    double    tx,ty,tz;
    double    dtx,dty,dtz;
    double    fx,fy,fz;
    double    gridvalue;
Peter Eastman's avatar
Peter Eastman committed
634
635
636
637
638
639
640
    int       nx,ny,nz;

    nx    = pme->ngrid[0];
    ny    = pme->ngrid[1];
    nz    = pme->ngrid[2];

    order = pme->order;
641

642
    /* This is almost identical to the charge spreading routine! */
643

644
    for (i=0;i<pme->natoms;i++)
Peter Eastman's avatar
Peter Eastman committed
645
646
    {
        fx = fy = fz = 0;
647

peastman's avatar
peastman committed
648
        q = charges[i];
649

650
        /* Grid index for the actual atom position */
Peter Eastman's avatar
Peter Eastman committed
651
652
653
        x0index = pme->particleindex[i][0];
        y0index = pme->particleindex[i][1];
        z0index = pme->particleindex[i][2];
654

655
        /* Bspline factors for this atom in each dimension , calculated from fractional coordinates */
Peter Eastman's avatar
Peter Eastman committed
656
657
658
659
660
661
        thetax  = &(pme->bsplines_theta[0][i*order]);
        thetay  = &(pme->bsplines_theta[1][i*order]);
        thetaz  = &(pme->bsplines_theta[2][i*order]);
        dthetax = &(pme->bsplines_dtheta[0][i*order]);
        dthetay = &(pme->bsplines_dtheta[1][i*order]);
        dthetaz = &(pme->bsplines_dtheta[2][i*order]);
662

Peter Eastman's avatar
Peter Eastman committed
663
        /* See pme_grid_spread_charge() for comments about the order here, and only interpolation in one direction */
664

665
        /* Since we will add order^3 (typically 5*5*5=125) terms to the force on each particle, we use temporary fx/fy/fz
666
667
         * variables, and only add it to memory forces[] at the end.
         */
668
        for (ix=0;ix<order;ix++)
Peter Eastman's avatar
Peter Eastman committed
669
670
        {
            xindex = (x0index + ix) % pme->ngrid[0];
671
            /* Get both the bspline factor and its derivative with respect to the x coordinate! */
Peter Eastman's avatar
Peter Eastman committed
672
673
            tx     = thetax[ix];
            dtx    = dthetax[ix];
674

675
            for (iy=0;iy<order;iy++)
Peter Eastman's avatar
Peter Eastman committed
676
677
            {
                yindex = (y0index + iy) % pme->ngrid[1];
678
                /* bspline + derivative wrt y */
Peter Eastman's avatar
Peter Eastman committed
679
680
                ty     = thetay[iy];
                dty    = dthetay[iy];
681

682
                for (iz=0;iz<order;iz++)
Peter Eastman's avatar
Peter Eastman committed
683
684
685
                {
                    /* Can be optimized, but we keep it simple here */
                    zindex               = (z0index + iz) % pme->ngrid[2];
686
                    /* bspline + derivative wrt z */
Peter Eastman's avatar
Peter Eastman committed
687
688
689
                    tz                   = thetaz[iz];
                    dtz                  = dthetaz[iz];
                    index                = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex;
690

691
692
                    /* Get the fft+convoluted+ifft:d data from the grid, which must be real by definition */
                    /* Checking that the imaginary part is indeed zero might be a good check :-) */
Peter Eastman's avatar
Peter Eastman committed
693
                    gridvalue            = pme->grid[index].real();
Peter Eastman's avatar
Peter Eastman committed
694
695
696
697
698
699
700
701

                    /* The d component of the force is calculated by taking the derived bspline in dimension d, normal bsplines in the other two */
                    fx                  += dtx*ty*tz*gridvalue;
                    fy                  += tx*dty*tz*gridvalue;
                    fz                  += tx*ty*dtz*gridvalue;
                }
            }
        }
702
        /* Update memory force, note that we multiply by charge and some box stuff */
703
704
705
        forces[i][0] -= q*(fx*nx*recipBoxVectors[0][0]);
        forces[i][1] -= q*(fx*nx*recipBoxVectors[1][0]+fy*ny*recipBoxVectors[1][1]);
        forces[i][2] -= q*(fx*nx*recipBoxVectors[2][0]+fy*ny*recipBoxVectors[2][1]+fz*nz*recipBoxVectors[2][2]);
Peter Eastman's avatar
Peter Eastman committed
706
    }
707
708
709
710
711
712
}



/* EXPORTED ROUTINES */

713
int
714
pme_init(pme_t *       ppme,
peastman's avatar
peastman committed
715
         double        ewaldcoeff,
Peter Eastman's avatar
Peter Eastman committed
716
         int           natoms,
peastman's avatar
peastman committed
717
         const int     ngrid[3],
Peter Eastman's avatar
Peter Eastman committed
718
         int           pme_order,
peastman's avatar
peastman committed
719
         double        epsilon_r)
720
721
722
{
    pme_t pme;
    int   d;
723

724
725
726
727
728
    pme = (pme_t) malloc(sizeof(struct pme));

    pme->order       = pme_order;
    pme->epsilon_r   = epsilon_r;
    pme->ewaldcoeff  = ewaldcoeff;
Peter Eastman's avatar
Peter Eastman committed
729
    pme->natoms      = natoms;
730

731
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
732
733
    {
        pme->ngrid[d]            = ngrid[d];
peastman's avatar
peastman committed
734
735
        pme->bsplines_theta[d]   = (double *)malloc(sizeof(double)*pme_order*natoms);
        pme->bsplines_dtheta[d]  = (double *)malloc(sizeof(double)*pme_order*natoms);
Peter Eastman's avatar
Peter Eastman committed
736
    }
737

Peter Eastman's avatar
Peter Eastman committed
738
739
    pme->particlefraction = (rvec *)malloc(sizeof(rvec)*natoms);
    pme->particleindex    = (ivec *)malloc(sizeof(ivec)*natoms);
740

Peter Eastman's avatar
Peter Eastman committed
741
    /* Allocate charge grid storage */
Peter Eastman's avatar
Peter Eastman committed
742
    pme->grid        = (complex<double> *)malloc(sizeof(complex<double>)*ngrid[0]*ngrid[1]*ngrid[2]);
743

Peter Eastman's avatar
Peter Eastman committed
744
745
    /* Setup bspline moduli (see Essman paper) */
    pme_calculate_bsplines_moduli(pme);
746

Peter Eastman's avatar
Peter Eastman committed
747
    *ppme = pme;
748

749
750
751
752
753
754
755
756
    return 0;
}





int pme_exec(pme_t       pme,
peastman's avatar
peastman committed
757
758
759
760
761
             const vector<Vec3>& atomCoordinates,
             vector<Vec3>& forces,
             const vector<double>& charges,
             const Vec3 periodicBoxVectors[3],
             double* energy)
762
763
{
    /* Routine is called with coordinates in x, a box, and charges in q */
764

peastman's avatar
peastman committed
765
    Vec3 recipBoxVectors[3];
766
    ReferenceForce::invertBoxVectors(periodicBoxVectors, recipBoxVectors);
767
    
768
769
770
771
    /* Before we can do the actual interpolation, we need to recalculate and update
     * the indices for each particle in the charge grid (initialized in pme_init()),
     * and what its fractional offset in this grid cell is.
     */
772

Peter Eastman's avatar
Peter Eastman committed
773
    /* Update charge grid indices and fractional offsets for each atom.
774
     * The indices/fractions are stored internally in the pme datatype
775
     */
776
    pme_update_grid_index_and_fraction(pme,atomCoordinates,periodicBoxVectors,recipBoxVectors);
777

Peter Eastman's avatar
Peter Eastman committed
778
779
    /* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */
    pme_update_bsplines(pme);
780

Peter Eastman's avatar
Peter Eastman committed
781
    /* Spread the charges on grid (using newly calculated bsplines in the pme structure) */
peastman's avatar
peastman committed
782
    pme_grid_spread_charge(pme, charges);
783

Peter Eastman's avatar
Peter Eastman committed
784
    /* do 3d-fft */
Peter Eastman's avatar
Peter Eastman committed
785
786
787
788
789
790
    vector<size_t> shape = {(size_t) pme->ngrid[0], (size_t) pme->ngrid[1], (size_t) pme->ngrid[2]};
    vector<size_t> axes = {0, 1, 2};
    vector<ptrdiff_t> stride = {(ptrdiff_t) (pme->ngrid[1]*pme->ngrid[2]*sizeof(complex<double>)),
                                (ptrdiff_t) (pme->ngrid[2]*sizeof(complex<double>)),
                                (ptrdiff_t) sizeof(complex<double>)};
    pocketfft::c2c(shape, stride, stride, axes, true, pme->grid, pme->grid, 1.0, 0);
791

Peter Eastman's avatar
Peter Eastman committed
792
    /* solve in k-space */
793
    pme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy);
794

Peter Eastman's avatar
Peter Eastman committed
795
    /* do 3d-invfft */
Peter Eastman's avatar
Peter Eastman committed
796
    pocketfft::c2c(shape, stride, stride, axes, false, pme->grid, pme->grid, 1.0, 0);
797

Peter Eastman's avatar
Peter Eastman committed
798
    /* Get the particle forces from the grid and bsplines in the pme structure */
799
    pme_grid_interpolate_force(pme,recipBoxVectors,charges,forces);
800
801
802
803
804

    return 0;
}


805
int pme_exec_dpme(pme_t       pme,
peastman's avatar
peastman committed
806
807
808
809
810
             const vector<Vec3>& atomCoordinates,
             vector<Vec3>& forces,
             const vector<double>& c6s,
             const Vec3 periodicBoxVectors[3],
             double* energy)
811
812
813
{
    /* Routine is called with coordinates in x, a box, and charges in q */

peastman's avatar
peastman committed
814
    Vec3 recipBoxVectors[3];
815
    ReferenceForce::invertBoxVectors(periodicBoxVectors, recipBoxVectors);
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833

    /* Before we can do the actual interpolation, we need to recalculate and update
     * the indices for each particle in the charge grid (initialized in pme_init()),
     * and what its fractional offset in this grid cell is.
     */

    /* Update charge grid indices and fractional offsets for each atom.
     * The indices/fractions are stored internally in the pme datatype
     */
    pme_update_grid_index_and_fraction(pme,atomCoordinates,periodicBoxVectors,recipBoxVectors);

    /* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */
    pme_update_bsplines(pme);

    /* Spread the charges on grid (using newly calculated bsplines in the pme structure) */
    pme_grid_spread_charge(pme, c6s);

    /* do 3d-fft */
Peter Eastman's avatar
Peter Eastman committed
834
835
836
837
838
839
    vector<size_t> shape = {(size_t) pme->ngrid[0], (size_t) pme->ngrid[1], (size_t) pme->ngrid[2]};
    vector<size_t> axes = {0, 1, 2};
    vector<ptrdiff_t> stride = {(ptrdiff_t) (pme->ngrid[1]*pme->ngrid[2]*sizeof(complex<double>)),
                                (ptrdiff_t) (pme->ngrid[2]*sizeof(complex<double>)),
                                (ptrdiff_t) sizeof(complex<double>)};
    pocketfft::c2c(shape, stride, stride, axes, true, pme->grid, pme->grid, 1.0, 0);
840
841
842
843
844

    /* solve in k-space */
    dpme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy);

    /* do 3d-invfft */
Peter Eastman's avatar
Peter Eastman committed
845
    pocketfft::c2c(shape, stride, stride, axes, false, pme->grid, pme->grid, 1.0, 0);
846
847
848
849
850
851
852

    /* Get the particle forces from the grid and bsplines in the pme structure */
    pme_grid_interpolate_force(pme,recipBoxVectors,c6s,forces);

    return 0;
}

853
854
855
856

int
pme_destroy(pme_t    pme)
{
Peter Eastman's avatar
Peter Eastman committed
857
    int d;
858

Peter Eastman's avatar
Peter Eastman committed
859
    free(pme->grid);
860

861
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
862
863
864
865
866
    {
        free(pme->bsplines_moduli[d]);
        free(pme->bsplines_theta[d]);
        free(pme->bsplines_dtheta[d]);
    }
867

Peter Eastman's avatar
Peter Eastman committed
868
869
    free(pme->particlefraction);
    free(pme->particleindex);
870

Peter Eastman's avatar
Peter Eastman committed
871
872
    /* destroy structure itself */
    free(pme);
873

Peter Eastman's avatar
Peter Eastman committed
874
    return 0;
875
}
876
877

} // namespace OpenMM