"platforms/cuda/tests/TestCudaPythonForce.cpp" did not exist on "83d57922865f4fc7714611e172970377a65a7c00"
ReferencePME.cpp 30.8 KB
Newer Older
1
/*
2
 * Reference implementation of PME reciprocal space interactions.
3
 *
Peter Eastman's avatar
Peter Eastman committed
4
 * Copyright (c) 2009-2022, Erik Lindahl, Rossen Apostolov, Szilard Pall, Peter Eastman
5
6
 * All rights reserved.
 * Contact: lindahl@cbr.su.se Stockholm University, Sweden.
7
8
 *
 * Redistribution and use in source and binary forms, with or without
9
10
 * modification, are permitted provided that the following conditions are met:
 *
11
12
13
14
 * Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer. Redistributions in binary
 * form must reproduce the above copyright notice, this list of conditions and
 * the following disclaimer in the documentation and/or other materials provided
15
 * with the distribution.
16
17
 * Neither the name of the author/university nor the names of its contributors may
 * be used to endorse or promote products derived from this software without
18
19
 * specific prior written permission.
 *
20
21
22
23
24
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
26
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
30
31
32
33
34
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
Peter Eastman's avatar
Peter Eastman committed
35
#include <complex>
36

37
#include "ReferencePME.h"
peastman's avatar
peastman committed
38
#include "SimTKOpenMMRealType.h"
39

Peter Eastman's avatar
Peter Eastman committed
40
41
42
43
44
45
#ifdef _MSC_VER
  #define POCKETFFT_NO_VECTORS
#endif
#include "pocketfft_hdronly.h"

using namespace std;
46
47
48

typedef int    ivec[3];

49
namespace OpenMM {
50
51
52

struct pme
{
Peter Eastman's avatar
Peter Eastman committed
53
    int          natoms;
peastman's avatar
peastman committed
54
    double       ewaldcoeff;
55

Peter Eastman's avatar
Peter Eastman committed
56
    complex<double>* grid;             /* Memory for the grid we spread charges on.
57
58
59
                                        * Element (i,j,k) is accessed as:
                                        * grid[i*ngrid[1]*ngrid[2] + j*ngrid[2] + k]
                                        */
Peter Eastman's avatar
Peter Eastman committed
60
    int          ngrid[3];             /* Total grid dimensions (all data is complex!) */
61

Peter Eastman's avatar
Peter Eastman committed
62
    int          order;                /* PME interpolation order. Almost always 4 */
63

64
    /* Data for bspline interpolation, see the Essman PME paper */
peastman's avatar
peastman committed
65
66
67
    double *     bsplines_moduli[3];   /* 3 pointers, to x/y/z bspline moduli, each of length ngrid[x/y/z]   */
    double *     bsplines_theta[3];    /* each of x/y/z has length order*natoms */
    double *     bsplines_dtheta[3];   /* each of x/y/z has length order*natoms */
68

Peter Eastman's avatar
Peter Eastman committed
69
    ivec *       particleindex;        /* Array of length natoms. Each element is
70
71
72
                                        * an ivec (3 ints) that specify the grid
                                        * indices for that particular atom. Updated every step!
                                        */
73
    rvec *       particlefraction;     /* Array of length natoms. Fractional offset in the grid for
74
75
                                        * each atom in all three dimensions.
                                        */
76

77
78
79
80
81
82
83
84
    /* Further explanation of index/fraction:
     *
     * Assume we have a cell of size 10*10*10nm, and a total grid dimension of 100*100*100 cells.
     * In other words, each cell is 0.1*0.1*0.1 nm.
     *
     * If particle i has coordinates { 0.543 , 6.235 , -0.73 }, we will get:
     *
     * particleindex[i]    = { 5 , 62 , 92 }         (-0.73 + 10 = 9.27, we always apply PBC for grid calculations!)
85
     * particlefraction[i] = { 0.43 , 0.35 , 0.7 }   (this is the fraction of the cell length where the atom is)
86
87
88
89
90
91
     *
     * (The reason for precaculating / storing these is that it gets a bit more complex for triclinic cells :-)
     *
     * In the current code version we might assume that a particle is not more than a whole box length away from
     * the central cell, i.e., in this case we would assume all coordinates fall in -10 nm < x,y,z < 20 nm.
     */
92

peastman's avatar
peastman committed
93
    double       epsilon_r;             /* Dielectric coefficient to use, typically 1.0 */
94
};
95
96
97
98
99
100
101


/* Internal setup routines */



/* Only called once from init_pme(), performance does not matter! */
102
static void
103
104
pme_calculate_bsplines_moduli(pme_t pme)
{
Peter Eastman's avatar
Peter Eastman committed
105
106
107
108
    int       nmax;
    int       i,j,k,l,d;
    int       order;
    int       ndata;
peastman's avatar
peastman committed
109
110
111
112
113
    double *  data;
    double *  ddata;
    double *  bsplines_data;
    double    div;
    double    sc,ss,arg;
Peter Eastman's avatar
Peter Eastman committed
114
115

    nmax = 0;
116
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
117
118
    {
        nmax = (pme->ngrid[d] > nmax) ? pme->ngrid[d] : nmax;
peastman's avatar
peastman committed
119
        pme->bsplines_moduli[d] = (double *) malloc(sizeof(double)*pme->ngrid[d]);
120
    }
121

Peter Eastman's avatar
Peter Eastman committed
122
123
124
    order = pme->order;

    /* temp storage in this routine */
peastman's avatar
peastman committed
125
126
127
    data          = (double *) malloc(sizeof(double)*order);
    ddata         = (double *) malloc(sizeof(double)*order);
    bsplines_data = (double *) malloc(sizeof(double)*nmax);
Peter Eastman's avatar
Peter Eastman committed
128
129
130
131
132

    data[order-1]=0;
    data[1]=0;
    data[0]=1;

133
    for (k=3;k<order;k++)
Peter Eastman's avatar
Peter Eastman committed
134
    {
peastman's avatar
peastman committed
135
        div=1.0/(k-1.0);
Peter Eastman's avatar
Peter Eastman committed
136
        data[k-1]=0;
137
        for (l=1;l<(k-1);l++)
Peter Eastman's avatar
Peter Eastman committed
138
139
140
141
142
143
144
145
        {
            data[k-l-1]=div*(l*data[k-l-2]+(k-l)*data[k-l-1]);
        }
        data[0]=div*data[0];
    }

    /* differentiate */
    ddata[0]=-data[0];
146
    for (k=1;k<order;k++)
Peter Eastman's avatar
Peter Eastman committed
147
148
149
150
    {
        ddata[k]=data[k-1]-data[k];
    }

peastman's avatar
peastman committed
151
    div=1.0/(order-1);
Peter Eastman's avatar
Peter Eastman committed
152
153
    data[order-1]=0;

154
    for (l=1;l<(order-1);l++)
Peter Eastman's avatar
Peter Eastman committed
155
156
157
158
159
    {
        data[order-l-1]=div*(l*data[order-l-2]+(order-l)*data[order-l-1]);
    }
    data[0]=div*data[0];

160
    for (i=0;i<nmax;i++)
Peter Eastman's avatar
Peter Eastman committed
161
162
163
    {
        bsplines_data[i]=0;
    }
164
    for (i=1;i<=order;i++)
Peter Eastman's avatar
Peter Eastman committed
165
166
167
168
169
    {
        bsplines_data[i]=data[i-1];
    }

    /* Evaluate the actual bspline moduli for X/Y/Z */
170
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
171
172
    {
        ndata = pme->ngrid[d];
173
        for (i=0;i<ndata;i++)
Peter Eastman's avatar
Peter Eastman committed
174
175
        {
            sc=ss=0;
176
            for (j=0;j<ndata;j++)
Peter Eastman's avatar
Peter Eastman committed
177
            {
peastman's avatar
peastman committed
178
                arg=(2.0*M_PI*i*j)/ndata;
Peter Eastman's avatar
Peter Eastman committed
179
180
181
182
183
                sc+=bsplines_data[j]*cos(arg);
                ss+=bsplines_data[j]*sin(arg);
            }
            pme->bsplines_moduli[d][i]=sc*sc+ss*ss;
        }
184
        for (i=0;i<ndata;i++)
Peter Eastman's avatar
Peter Eastman committed
185
        {
186
            if (pme->bsplines_moduli[d][i]<1.0e-7)
Peter Eastman's avatar
Peter Eastman committed
187
            {
188
                pme->bsplines_moduli[d][i]=(pme->bsplines_moduli[d][(i-1+ndata)%ndata]+pme->bsplines_moduli[d][(i+1)%ndata])/2;
Peter Eastman's avatar
Peter Eastman committed
189
190
191
192
193
194
195
196
            }
        }
    }

    /* Release temp storage */
    free(data);
    free(ddata);
    free(bsplines_data);
197
198
199
}


peastman's avatar
peastman committed
200
static void invert_box_vectors(const Vec3 boxVectors[3], Vec3 recipBoxVectors[3])
201
{
peastman's avatar
peastman committed
202
    double determinant = boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2];
203
    assert(determinant > 0);
peastman's avatar
peastman committed
204
205
206
207
    double scale = 1.0/determinant;
    recipBoxVectors[0] = Vec3(boxVectors[1][1]*boxVectors[2][2], 0, 0)*scale;
    recipBoxVectors[1] = Vec3(-boxVectors[1][0]*boxVectors[2][2], boxVectors[0][0]*boxVectors[2][2], 0)*scale;
    recipBoxVectors[2] = Vec3(boxVectors[1][0]*boxVectors[2][1]-boxVectors[1][1]*boxVectors[2][0], -boxVectors[0][0]*boxVectors[2][1], boxVectors[0][0]*boxVectors[1][1])*scale;
208
}
209
210
211

static void
pme_update_grid_index_and_fraction(pme_t    pme,
peastman's avatar
peastman committed
212
213
214
                                   const vector<Vec3>& atomCoordinates,
                                   const Vec3 periodicBoxVectors[3],
                                   const Vec3 recipBoxVectors[3])
215
{
Peter Eastman's avatar
Peter Eastman committed
216
217
    int    i;
    int    d;
peastman's avatar
peastman committed
218
    double t;
Peter Eastman's avatar
Peter Eastman committed
219
220
    int    ti;

221
    for (i=0;i<pme->natoms;i++)
Peter Eastman's avatar
Peter Eastman committed
222
    {
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
        /* Index calculation (Look mom, no conditionals!):
         *
         * Both for Cuda and modern CPUs it is nice to avoid conditionals, but we still need to apply periodic boundary conditions.
         * Instead of having loops to add/subtract the box dimension, we do it this way:
         *
         * 1. First add the box size, to make sure this atom coordinate isnt -0.1 or something.
         *    After this we assume all fractional box positions are *positive*.
         *    The reason for this is that we always want to round coordinates _down_ to get
         *    their grid index, and when taking the integer part of -3.4 we would get -3, not -4 as we want.
         *    Since we anyway need the grid indices to fall in the central box, it is more convenient
         *    to first manipulate the coordinates to be positive.
         * 2. Convert to integer grid index
         *    Since we have added a whole box unit in step 1, this index might actually be larger than
         *    the grid dimension. Examples, assuming 10*10*10nm box and grid dimension 100*100*100 (spacing 0.1 nm):
         *
         *    coordinate is { 0.543 , 6.235 , -0.73 }
         *
         *    x[i][d]/box[d]                      becomes   { 0.0543 , 0.6235 , -0.073 }
         *    (x[i][d]/box[d] + 1.0)              becomes   { 1.0543 , 1.6235 , 0.927 }
         *    (x[i][d]/box[d] + 1.0)*ngrid[d]     becomes   { 105.43 , 162.35 , 92.7 }
         *
         *    integer part is now { 105 , 162 , 92 }
         *
         *    The fraction is calculates as t-ti, which becomes { 0.43 , 0.35 , 0.7 }
         *
         * 3. Take the first integer index part (which can be larger than the grid) modulo the grid dimension
         *
         *    Now we get { 5 , 62 , 92 }
         *
         *    Voila, both index and fraction, entirely without conditionals. The one limitation here is that
         *    we only add one box length, so if the particle had a coordinate <=-10.0, we would be screwed.
         *    In principle we can of course add 100.0, but that just moves the problem, it doesnt solve it.
         *    In practice, MD programs will apply PBC to reset particles inside the central box to avoid
         *    numerical problems, so this shouldnt cause any problems.
         *    (And, by adding 100.0 box lengths, we would lose a bit of numerical accuracy here!)
         */
peastman's avatar
peastman committed
259
        Vec3 coord = atomCoordinates[i];
260
        for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
261
        {
peastman's avatar
peastman committed
262
263
            t = coord[0]*recipBoxVectors[0][d]+coord[1]*recipBoxVectors[1][d]+coord[2]*recipBoxVectors[2][d];
            t = (t-floor(t))*pme->ngrid[d];
Peter Eastman's avatar
Peter Eastman committed
264
265
266
267
268
269
            ti = (int) t;

            pme->particlefraction[i][d] = t - ti;
            pme->particleindex[i][d]    = ti % pme->ngrid[d];
        }
    }
270
271
272
}


273
/* Ugly bspline calculation taken from Tom Dardens reference equations.
274
275
276
277
 * This probably very sub-optimal in Cuda? Separate kernel?
 *
 * In practice, it might help to require order=4 for the cuda port.
 */
278
static void
279
280
pme_update_bsplines(pme_t    pme)
{
Peter Eastman's avatar
Peter Eastman committed
281
282
    int       i,j,k,l;
    int       order;
peastman's avatar
peastman committed
283
284
285
    double    dr,div;
    double *  data;
    double *  ddata;
286

Peter Eastman's avatar
Peter Eastman committed
287
    order = pme->order;
288

289
    for (i=0; (i<pme->natoms); i++)
Peter Eastman's avatar
Peter Eastman committed
290
    {
291
        for (j=0; j<3; j++)
Peter Eastman's avatar
Peter Eastman committed
292
293
294
295
296
297
298
299
300
301
        {
            /* dr is relative offset from lower cell limit */
            dr = pme->particlefraction[i][j];

            data  = &(pme->bsplines_theta[j][i*order]);
            ddata = &(pme->bsplines_dtheta[j][i*order]);
            data[order-1] = 0;
            data[1]       = dr;
            data[0]       = 1-dr;

302
            for (k=3; k<order; k++)
Peter Eastman's avatar
Peter Eastman committed
303
            {
peastman's avatar
peastman committed
304
                div = 1.0/(k-1.0);
Peter Eastman's avatar
Peter Eastman committed
305
                data[k-1] = div*dr*data[k-2];
306
                for (l=1; l<(k-1); l++)
Peter Eastman's avatar
Peter Eastman committed
307
308
309
310
311
312
313
314
315
                {
                    data[k-l-1] = div*((dr+l)*data[k-l-2]+(k-l-dr)*data[k-l-1]);
                }
                data[0] = div*(1-dr)*data[0];
            }

            /* differentiate */
            ddata[0] = -data[0];

316
            for (k=1; k<order; k++)
Peter Eastman's avatar
Peter Eastman committed
317
318
319
320
            {
                ddata[k] = data[k-1]-data[k];
            }

peastman's avatar
peastman committed
321
            div           = 1.0/(order-1);
Peter Eastman's avatar
Peter Eastman committed
322
323
            data[order-1] = div*dr*data[order-2];

324
            for (l=1; l<(order-1); l++)
Peter Eastman's avatar
Peter Eastman committed
325
326
327
328
329
            {
                data[order-l-1] = div*((dr+l)*data[order-l-2]+(order-l-dr)*data[order-l-1]);
            }
            data[0] = div*(1-dr)*data[0];
        }
330
331
332
333
334
    }
}


static void
peastman's avatar
peastman committed
335
pme_grid_spread_charge(pme_t pme, const vector<double>& charges)
336
{
Peter Eastman's avatar
Peter Eastman committed
337
338
339
340
341
342
    int       order;
    int       i;
    int       ix,iy,iz;
    int       x0index,y0index,z0index;
    int       xindex,yindex,zindex;
    int       index;
peastman's avatar
peastman committed
343
344
345
346
    double    q;
    double *  thetax;
    double *  thetay;
    double *  thetaz;
Peter Eastman's avatar
Peter Eastman committed
347
348

    order = pme->order;
349

350
    /* Reset the grid */
351
    for (i=0;i<pme->ngrid[0]*pme->ngrid[1]*pme->ngrid[2];i++)
Peter Eastman's avatar
Peter Eastman committed
352
    {
Peter Eastman's avatar
Peter Eastman committed
353
        pme->grid[i] = complex<double>(0, 0);
Peter Eastman's avatar
Peter Eastman committed
354
    }
355

356
    for (i=0;i<pme->natoms;i++)
Peter Eastman's avatar
Peter Eastman committed
357
    {
peastman's avatar
peastman committed
358
        q = charges[i];
359

360
        /* Grid index for the actual atom position */
Peter Eastman's avatar
Peter Eastman committed
361
362
363
        x0index = pme->particleindex[i][0];
        y0index = pme->particleindex[i][1];
        z0index = pme->particleindex[i][2];
364

365
        /* Bspline factors for this atom in each dimension , calculated from fractional coordinates */
Peter Eastman's avatar
Peter Eastman committed
366
367
368
        thetax  = &(pme->bsplines_theta[0][i*order]);
        thetay  = &(pme->bsplines_theta[1][i*order]);
        thetaz  = &(pme->bsplines_theta[2][i*order]);
369

370
        /* Loop over norder*norder*norder (typically 5*5*5) neighbor cells.
371
372
         *
         * As a neat optimization, we only spread in the forward direction, but apply PBC!
373
         *
374
         * Since we are going to do an FFT on the grid, it doesn't matter where the data is,
375
         * in frequency space the result will be the same.
376
377
         *
         * So, the influence function (bsplines) will probably be something like (0.15,0.35,0.35,0.15),
378
         * with largest weight 2-3 steps forward (you don't need to understand that for the implementation :-)
379
380
         * Effectively, you can look at this as translating the entire grid.
         *
381
         * Why do we do this stupid thing?
382
         *
383
         * 1) The loops get much simpler
384
385
386
         * 2) Just looking forward will hopefully get us more cache hits
         * 3) When we parallelize things, we only need to communicate in one direction instead of two!
         */
387

388
        for (ix=0;ix<order;ix++)
Peter Eastman's avatar
Peter Eastman committed
389
        {
390
            /* Calculate index, apply PBC so we spread to index 0/1/2 when a particle is close to the upper limit of the grid */
Peter Eastman's avatar
Peter Eastman committed
391
            xindex = (x0index + ix) % pme->ngrid[0];
392

393
            for (iy=0;iy<order;iy++)
Peter Eastman's avatar
Peter Eastman committed
394
395
            {
                yindex = (y0index + iy) % pme->ngrid[1];
396

397
                for (iz=0;iz<order;iz++)
Peter Eastman's avatar
Peter Eastman committed
398
399
                {
                    /* Can be optimized, but we keep it simple here */
Peter Eastman's avatar
Peter Eastman committed
400
                    zindex            = (z0index + iz) % pme->ngrid[2];
401
                    /* Calculate index in the charge grid */
Peter Eastman's avatar
Peter Eastman committed
402
                    index             = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex;
403
                    /* Add the charge times the bspline spread/interpolation factors to this grid position */
Peter Eastman's avatar
Peter Eastman committed
404
                    pme->grid[index] += q*thetax[ix]*thetay[iy]*thetaz[iz];
Peter Eastman's avatar
Peter Eastman committed
405
406
407
408
                }
            }
        }
    }
409
410
411
412
413
414
}



static void
pme_reciprocal_convolution(pme_t     pme,
peastman's avatar
peastman committed
415
416
417
                           const Vec3 periodicBoxVectors[3],
                           const Vec3 recipBoxVectors[3],
                           double *  energy)
418
{
Peter Eastman's avatar
Peter Eastman committed
419
420
    int kx,ky,kz;
    int nx,ny,nz;
peastman's avatar
peastman committed
421
422
423
424
425
426
427
428
429
430
431
432
    double mx,my,mz;
    double mhx,mhy,mhz,m2;
    double one_4pi_eps;
    double virxx,virxy,virxz,viryy,viryz,virzz;
    double bx,by,bz;
    double d1,d2;
    double eterm,vfactor,struct2,ets2;
    double esum;
    double factor;
    double denom;
    double boxfactor;
    double maxkx,maxky,maxkz;
Peter Eastman's avatar
Peter Eastman committed
433

Peter Eastman's avatar
Peter Eastman committed
434
    complex<double> *ptr;
Peter Eastman's avatar
Peter Eastman committed
435
436
437
438
439

    nx = pme->ngrid[0];
    ny = pme->ngrid[1];
    nz = pme->ngrid[2];

peastman's avatar
peastman committed
440
441
442
    one_4pi_eps = ONE_4PI_EPS0/pme->epsilon_r;
    factor = M_PI*M_PI/(pme->ewaldcoeff*pme->ewaldcoeff);
    boxfactor = M_PI*periodicBoxVectors[0][0]*periodicBoxVectors[1][1]*periodicBoxVectors[2][2];
Peter Eastman's avatar
Peter Eastman committed
443
444
445
446
447
448
449
450
451

    esum = 0;
    virxx = 0;
    virxy = 0;
    virxz = 0;
    viryy = 0;
    viryz = 0;
    virzz = 0;

peastman's avatar
peastman committed
452
453
454
    maxkx = (nx+1)/2;
    maxky = (ny+1)/2;
    maxkz = (nz+1)/2;
455

456
    for (kx=0;kx<nx;kx++)
457
458
    {
        /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
459
        mx  = (kx<maxkx) ? kx : (kx-nx);
460
        mhx = mx*recipBoxVectors[0][0];
461
462
        bx  = boxfactor*pme->bsplines_moduli[0][kx];

463
        for (ky=0;ky<ny;ky++)
464
465
        {
            /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
466
            my  = (ky<maxky) ? ky : (ky-ny);
467
            mhy = mx*recipBoxVectors[1][0]+my*recipBoxVectors[1][1];
468
            by  = pme->bsplines_moduli[1][ky];
469

470
            for (kz=0;kz<nz;kz++)
Peter Eastman's avatar
Peter Eastman committed
471
            {
472
473
474
475
476
477
478
479
                /* If the net charge of the system is 0.0, there will not be any DC (direct current, zero frequency) component. However,
                 * we can still handle charged systems through a charge correction, in which case the DC
                 * component should be excluded from recprocal space. We will anyway run into problems below when dividing with the
                 * frequency if it is zero...
                 *
                 * In cuda you could probably work around this by setting something to 0.0 instead, but the short story is that we
                 * should skip the zero frequency case!
                 */
Peter Eastman's avatar
Peter Eastman committed
480
481
482
483
                if (kx==0 && ky==0 && kz==0)
                {
                    continue;
                }
484
485

                /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
486
                mz        = (kz<maxkz) ? kz : (kz-nz);
487
                mhz       = mx*recipBoxVectors[2][0]+my*recipBoxVectors[2][1]+mz*recipBoxVectors[2][2];
488
489

                /* Pointer to the grid cell in question */
Peter Eastman's avatar
Peter Eastman committed
490
                ptr       = pme->grid + kx*ny*nz + ky*nz + kz;
491
492

                /* Get grid data for this frequency */
Peter Eastman's avatar
Peter Eastman committed
493
494
                d1        = ptr->real();
                d2        = ptr->imag();
495

496
                /* Calculate the convolution - see the Essman/Darden paper for the equation! */
Peter Eastman's avatar
Peter Eastman committed
497
498
499
                m2        = mhx*mhx+mhy*mhy+mhz*mhz;
                bz        = pme->bsplines_moduli[2][kz];
                denom     = m2*bx*by*bz;
500

Peter Eastman's avatar
Peter Eastman committed
501
                eterm     = one_4pi_eps*exp(-factor*m2)/denom;
502
503

                /* write back convolution data to grid */
Peter Eastman's avatar
Peter Eastman committed
504
505
                ptr->real(d1*eterm);
                ptr->imag(d2*eterm);
506

Peter Eastman's avatar
Peter Eastman committed
507
                struct2   = (d1*d1+d2*d2);
508

Peter Eastman's avatar
Peter Eastman committed
509
510
511
512
513
514
                /* Long-range PME contribution to the energy for this frequency */
                ets2      = eterm*struct2;
                esum     += ets2;
            }
        }
    }
515

516
    /* The factor 0.5 is nothing special, but it is better to have it here than inside the loop :-) */
peastman's avatar
peastman committed
517
    *energy = 0.5*esum;
518
519
520
}


521
static void
peastman's avatar
peastman committed
522
523
524
525
dpme_reciprocal_convolution(pme_t pme,
                           const Vec3 periodicBoxVectors[3],
                           const Vec3 recipBoxVectors[3],
                           double* energy)
526
527
528
{
    int kx,ky,kz;
    int nx,ny,nz;
peastman's avatar
peastman committed
529
530
531
532
533
534
535
536
537
    double mx,my,mz;
    double mhx,mhy,mhz,m2;
    double bx,by,bz;
    double d1,d2;
    double eterm,struct2,ets2;
    double esum;
    double denom;
    double boxfactor;
    double maxkx,maxky,maxkz;
538

Peter Eastman's avatar
Peter Eastman committed
539
    complex<double> *ptr;
540
541
542
543
544

    nx = pme->ngrid[0];
    ny = pme->ngrid[1];
    nz = pme->ngrid[2];

peastman's avatar
peastman committed
545
    boxfactor = -2*M_PI*sqrt(M_PI) / (6.0*periodicBoxVectors[0][0]*periodicBoxVectors[1][1]*periodicBoxVectors[2][2]);
546
547
548

    esum = 0;

peastman's avatar
peastman committed
549
550
551
    maxkx = (nx+1)/2;
    maxky = (ny+1)/2;
    maxkz = (nz+1)/2;
552

peastman's avatar
peastman committed
553
554
555
556
557
    double bfac = M_PI / pme->ewaldcoeff;
    double fac1 = 2.0*M_PI*M_PI*M_PI*sqrt(M_PI);
    double fac2 = pme->ewaldcoeff*pme->ewaldcoeff*pme->ewaldcoeff;
    double fac3 = -2.0*pme->ewaldcoeff*M_PI*M_PI;
    double b, m, m3, expfac, expterm, erfcterm;
558
559
560
561

    for (kx=0;kx<nx;kx++)
    {
        /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
562
        mx  = ((kx<maxkx) ? kx : (kx-nx));
563
564
565
566
567
568
        mhx = mx*recipBoxVectors[0][0];
        bx  = pme->bsplines_moduli[0][kx];

        for (ky=0;ky<ny;ky++)
        {
            /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
569
            my  = ((ky<maxky) ? ky : (ky-ny));
570
571
572
573
574
575
576
577
578
579
            mhy = mx*recipBoxVectors[1][0]+my*recipBoxVectors[1][1];
            by  = pme->bsplines_moduli[1][ky];

            for (kz=0;kz<nz;kz++)
            {
                /*
                 * Unlike the Coulombic case, there's an m=0 term so all terms are considered here.
                 */

                /* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
peastman's avatar
peastman committed
580
                mz        = ((kz<maxkz) ? kz : (kz-nz));
581
582
583
584
585
586
                mhz       = mx*recipBoxVectors[2][0]+my*recipBoxVectors[2][1]+mz*recipBoxVectors[2][2];

                /* Pointer to the grid cell in question */
                ptr       = pme->grid + kx*ny*nz + ky*nz + kz;

                /* Get grid data for this frequency */
Peter Eastman's avatar
Peter Eastman committed
587
588
                d1        = ptr->real();
                d2        = ptr->imag();
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604

                /* Calculate the convolution - see the Essman/Darden paper for the equation! */
                m2        = mhx*mhx+mhy*mhy+mhz*mhz;
                bz        = pme->bsplines_moduli[2][kz];
                denom     = boxfactor / (bx*by*bz);

                m = sqrt(m2);
                m3 = m*m2;
                b = bfac*m;
                expfac = -b*b;
                erfcterm = erfc(b);
                expterm = exp(expfac);

                eterm     = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;

                /* write back convolution data to grid */
Peter Eastman's avatar
Peter Eastman committed
605
606
                ptr->real(d1*eterm);
                ptr->imag(d2*eterm);
607
608
609
610
611
612
613
614
615
616

                struct2   = (d1*d1+d2*d2);

                /* Long-range PME contribution to the energy for this frequency */
                ets2      = eterm*struct2;
                esum     += ets2;
            }
        }
    }
    // Remember the C6 energy is attractive, hence the negative sign.
peastman's avatar
peastman committed
617
    *energy = 0.5*esum;
618
619
620
}


621
static void
622
pme_grid_interpolate_force(pme_t pme,
peastman's avatar
peastman committed
623
624
625
                           const Vec3 recipBoxVectors[3],
                           const vector<double>& charges,
                           vector<Vec3>& forces)
626
{
Peter Eastman's avatar
Peter Eastman committed
627
628
629
630
631
632
    int       i;
    int       ix,iy,iz;
    int       x0index,y0index,z0index;
    int       xindex,yindex,zindex;
    int       index;
    int       order;
peastman's avatar
peastman committed
633
634
635
636
637
638
639
640
641
642
643
    double    q;
    double *  thetax;
    double *  thetay;
    double *  thetaz;
    double *  dthetax;
    double *  dthetay;
    double *  dthetaz;
    double    tx,ty,tz;
    double    dtx,dty,dtz;
    double    fx,fy,fz;
    double    gridvalue;
Peter Eastman's avatar
Peter Eastman committed
644
645
646
647
648
649
650
    int       nx,ny,nz;

    nx    = pme->ngrid[0];
    ny    = pme->ngrid[1];
    nz    = pme->ngrid[2];

    order = pme->order;
651

652
    /* This is almost identical to the charge spreading routine! */
653

654
    for (i=0;i<pme->natoms;i++)
Peter Eastman's avatar
Peter Eastman committed
655
656
    {
        fx = fy = fz = 0;
657

peastman's avatar
peastman committed
658
        q = charges[i];
659

660
        /* Grid index for the actual atom position */
Peter Eastman's avatar
Peter Eastman committed
661
662
663
        x0index = pme->particleindex[i][0];
        y0index = pme->particleindex[i][1];
        z0index = pme->particleindex[i][2];
664

665
        /* Bspline factors for this atom in each dimension , calculated from fractional coordinates */
Peter Eastman's avatar
Peter Eastman committed
666
667
668
669
670
671
        thetax  = &(pme->bsplines_theta[0][i*order]);
        thetay  = &(pme->bsplines_theta[1][i*order]);
        thetaz  = &(pme->bsplines_theta[2][i*order]);
        dthetax = &(pme->bsplines_dtheta[0][i*order]);
        dthetay = &(pme->bsplines_dtheta[1][i*order]);
        dthetaz = &(pme->bsplines_dtheta[2][i*order]);
672

Peter Eastman's avatar
Peter Eastman committed
673
        /* See pme_grid_spread_charge() for comments about the order here, and only interpolation in one direction */
674

675
        /* Since we will add order^3 (typically 5*5*5=125) terms to the force on each particle, we use temporary fx/fy/fz
676
677
         * variables, and only add it to memory forces[] at the end.
         */
678
        for (ix=0;ix<order;ix++)
Peter Eastman's avatar
Peter Eastman committed
679
680
        {
            xindex = (x0index + ix) % pme->ngrid[0];
681
            /* Get both the bspline factor and its derivative with respect to the x coordinate! */
Peter Eastman's avatar
Peter Eastman committed
682
683
            tx     = thetax[ix];
            dtx    = dthetax[ix];
684

685
            for (iy=0;iy<order;iy++)
Peter Eastman's avatar
Peter Eastman committed
686
687
            {
                yindex = (y0index + iy) % pme->ngrid[1];
688
                /* bspline + derivative wrt y */
Peter Eastman's avatar
Peter Eastman committed
689
690
                ty     = thetay[iy];
                dty    = dthetay[iy];
691

692
                for (iz=0;iz<order;iz++)
Peter Eastman's avatar
Peter Eastman committed
693
694
695
                {
                    /* Can be optimized, but we keep it simple here */
                    zindex               = (z0index + iz) % pme->ngrid[2];
696
                    /* bspline + derivative wrt z */
Peter Eastman's avatar
Peter Eastman committed
697
698
699
                    tz                   = thetaz[iz];
                    dtz                  = dthetaz[iz];
                    index                = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex;
700

701
702
                    /* Get the fft+convoluted+ifft:d data from the grid, which must be real by definition */
                    /* Checking that the imaginary part is indeed zero might be a good check :-) */
Peter Eastman's avatar
Peter Eastman committed
703
                    gridvalue            = pme->grid[index].real();
Peter Eastman's avatar
Peter Eastman committed
704
705
706
707
708
709
710
711

                    /* The d component of the force is calculated by taking the derived bspline in dimension d, normal bsplines in the other two */
                    fx                  += dtx*ty*tz*gridvalue;
                    fy                  += tx*dty*tz*gridvalue;
                    fz                  += tx*ty*dtz*gridvalue;
                }
            }
        }
712
        /* Update memory force, note that we multiply by charge and some box stuff */
713
714
715
        forces[i][0] -= q*(fx*nx*recipBoxVectors[0][0]);
        forces[i][1] -= q*(fx*nx*recipBoxVectors[1][0]+fy*ny*recipBoxVectors[1][1]);
        forces[i][2] -= q*(fx*nx*recipBoxVectors[2][0]+fy*ny*recipBoxVectors[2][1]+fz*nz*recipBoxVectors[2][2]);
Peter Eastman's avatar
Peter Eastman committed
716
    }
717
718
719
720
721
722
}



/* EXPORTED ROUTINES */

723
int
724
pme_init(pme_t *       ppme,
peastman's avatar
peastman committed
725
         double        ewaldcoeff,
Peter Eastman's avatar
Peter Eastman committed
726
         int           natoms,
peastman's avatar
peastman committed
727
         const int     ngrid[3],
Peter Eastman's avatar
Peter Eastman committed
728
         int           pme_order,
peastman's avatar
peastman committed
729
         double        epsilon_r)
730
731
732
{
    pme_t pme;
    int   d;
733

734
735
736
737
738
    pme = (pme_t) malloc(sizeof(struct pme));

    pme->order       = pme_order;
    pme->epsilon_r   = epsilon_r;
    pme->ewaldcoeff  = ewaldcoeff;
Peter Eastman's avatar
Peter Eastman committed
739
    pme->natoms      = natoms;
740

741
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
742
743
    {
        pme->ngrid[d]            = ngrid[d];
peastman's avatar
peastman committed
744
745
        pme->bsplines_theta[d]   = (double *)malloc(sizeof(double)*pme_order*natoms);
        pme->bsplines_dtheta[d]  = (double *)malloc(sizeof(double)*pme_order*natoms);
Peter Eastman's avatar
Peter Eastman committed
746
    }
747

Peter Eastman's avatar
Peter Eastman committed
748
749
    pme->particlefraction = (rvec *)malloc(sizeof(rvec)*natoms);
    pme->particleindex    = (ivec *)malloc(sizeof(ivec)*natoms);
750

Peter Eastman's avatar
Peter Eastman committed
751
    /* Allocate charge grid storage */
Peter Eastman's avatar
Peter Eastman committed
752
    pme->grid        = (complex<double> *)malloc(sizeof(complex<double>)*ngrid[0]*ngrid[1]*ngrid[2]);
753

Peter Eastman's avatar
Peter Eastman committed
754
755
    /* Setup bspline moduli (see Essman paper) */
    pme_calculate_bsplines_moduli(pme);
756

Peter Eastman's avatar
Peter Eastman committed
757
    *ppme = pme;
758

759
760
761
762
763
764
765
766
    return 0;
}





int pme_exec(pme_t       pme,
peastman's avatar
peastman committed
767
768
769
770
771
             const vector<Vec3>& atomCoordinates,
             vector<Vec3>& forces,
             const vector<double>& charges,
             const Vec3 periodicBoxVectors[3],
             double* energy)
772
773
{
    /* Routine is called with coordinates in x, a box, and charges in q */
774

peastman's avatar
peastman committed
775
    Vec3 recipBoxVectors[3];
776
777
    invert_box_vectors(periodicBoxVectors, recipBoxVectors);
    
778
779
780
781
    /* Before we can do the actual interpolation, we need to recalculate and update
     * the indices for each particle in the charge grid (initialized in pme_init()),
     * and what its fractional offset in this grid cell is.
     */
782

Peter Eastman's avatar
Peter Eastman committed
783
    /* Update charge grid indices and fractional offsets for each atom.
784
     * The indices/fractions are stored internally in the pme datatype
785
     */
786
    pme_update_grid_index_and_fraction(pme,atomCoordinates,periodicBoxVectors,recipBoxVectors);
787

Peter Eastman's avatar
Peter Eastman committed
788
789
    /* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */
    pme_update_bsplines(pme);
790

Peter Eastman's avatar
Peter Eastman committed
791
    /* Spread the charges on grid (using newly calculated bsplines in the pme structure) */
peastman's avatar
peastman committed
792
    pme_grid_spread_charge(pme, charges);
793

Peter Eastman's avatar
Peter Eastman committed
794
    /* do 3d-fft */
Peter Eastman's avatar
Peter Eastman committed
795
796
797
798
799
800
    vector<size_t> shape = {(size_t) pme->ngrid[0], (size_t) pme->ngrid[1], (size_t) pme->ngrid[2]};
    vector<size_t> axes = {0, 1, 2};
    vector<ptrdiff_t> stride = {(ptrdiff_t) (pme->ngrid[1]*pme->ngrid[2]*sizeof(complex<double>)),
                                (ptrdiff_t) (pme->ngrid[2]*sizeof(complex<double>)),
                                (ptrdiff_t) sizeof(complex<double>)};
    pocketfft::c2c(shape, stride, stride, axes, true, pme->grid, pme->grid, 1.0, 0);
801

Peter Eastman's avatar
Peter Eastman committed
802
    /* solve in k-space */
803
    pme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy);
804

Peter Eastman's avatar
Peter Eastman committed
805
    /* do 3d-invfft */
Peter Eastman's avatar
Peter Eastman committed
806
    pocketfft::c2c(shape, stride, stride, axes, false, pme->grid, pme->grid, 1.0, 0);
807

Peter Eastman's avatar
Peter Eastman committed
808
    /* Get the particle forces from the grid and bsplines in the pme structure */
809
    pme_grid_interpolate_force(pme,recipBoxVectors,charges,forces);
810
811
812
813
814

    return 0;
}


815
int pme_exec_dpme(pme_t       pme,
peastman's avatar
peastman committed
816
817
818
819
820
             const vector<Vec3>& atomCoordinates,
             vector<Vec3>& forces,
             const vector<double>& c6s,
             const Vec3 periodicBoxVectors[3],
             double* energy)
821
822
823
{
    /* Routine is called with coordinates in x, a box, and charges in q */

peastman's avatar
peastman committed
824
    Vec3 recipBoxVectors[3];
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
    invert_box_vectors(periodicBoxVectors, recipBoxVectors);

    /* Before we can do the actual interpolation, we need to recalculate and update
     * the indices for each particle in the charge grid (initialized in pme_init()),
     * and what its fractional offset in this grid cell is.
     */

    /* Update charge grid indices and fractional offsets for each atom.
     * The indices/fractions are stored internally in the pme datatype
     */
    pme_update_grid_index_and_fraction(pme,atomCoordinates,periodicBoxVectors,recipBoxVectors);

    /* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */
    pme_update_bsplines(pme);

    /* Spread the charges on grid (using newly calculated bsplines in the pme structure) */
    pme_grid_spread_charge(pme, c6s);

    /* do 3d-fft */
Peter Eastman's avatar
Peter Eastman committed
844
845
846
847
848
849
    vector<size_t> shape = {(size_t) pme->ngrid[0], (size_t) pme->ngrid[1], (size_t) pme->ngrid[2]};
    vector<size_t> axes = {0, 1, 2};
    vector<ptrdiff_t> stride = {(ptrdiff_t) (pme->ngrid[1]*pme->ngrid[2]*sizeof(complex<double>)),
                                (ptrdiff_t) (pme->ngrid[2]*sizeof(complex<double>)),
                                (ptrdiff_t) sizeof(complex<double>)};
    pocketfft::c2c(shape, stride, stride, axes, true, pme->grid, pme->grid, 1.0, 0);
850
851
852
853
854

    /* solve in k-space */
    dpme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy);

    /* do 3d-invfft */
Peter Eastman's avatar
Peter Eastman committed
855
    pocketfft::c2c(shape, stride, stride, axes, false, pme->grid, pme->grid, 1.0, 0);
856
857
858
859
860
861
862

    /* Get the particle forces from the grid and bsplines in the pme structure */
    pme_grid_interpolate_force(pme,recipBoxVectors,c6s,forces);

    return 0;
}

863
864
865
866

int
pme_destroy(pme_t    pme)
{
Peter Eastman's avatar
Peter Eastman committed
867
    int d;
868

Peter Eastman's avatar
Peter Eastman committed
869
    free(pme->grid);
870

871
    for (d=0;d<3;d++)
Peter Eastman's avatar
Peter Eastman committed
872
873
874
875
876
    {
        free(pme->bsplines_moduli[d]);
        free(pme->bsplines_theta[d]);
        free(pme->bsplines_dtheta[d]);
    }
877

Peter Eastman's avatar
Peter Eastman committed
878
879
    free(pme->particlefraction);
    free(pme->particleindex);
880

Peter Eastman's avatar
Peter Eastman committed
881
882
    /* destroy structure itself */
    free(pme);
883

Peter Eastman's avatar
Peter Eastman committed
884
    return 0;
885
}
886
887

} // namespace OpenMM