/* * Reference implementation of PME reciprocal space interactions. * * Copyright (c) 2009, Erik Lindahl, Rossen Apostolov, Szilard Pall * All rights reserved. * Contact: lindahl@cbr.su.se Stockholm University, Sweden. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. Redistributions in binary * form must reproduce the above copyright notice, this list of conditions and * the following disclaimer in the documentation and/or other materials provided * with the distribution. * Neither the name of the author/university nor the names of its contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include "ReferencePME.h" #include "fftpack.h" #include "SimTKOpenMMRealType.h" using std::vector; typedef int ivec[3]; namespace OpenMM { struct pme { int natoms; double ewaldcoeff; t_complex * grid; /* Memory for the grid we spread charges on. * Element (i,j,k) is accessed as: * grid[i*ngrid[1]*ngrid[2] + j*ngrid[2] + k] */ int ngrid[3]; /* Total grid dimensions (all data is complex!) */ fftpack_t fftplan; /* Handle to fourier transform setup */ int order; /* PME interpolation order. Almost always 4 */ /* Data for bspline interpolation, see the Essman PME paper */ double * bsplines_moduli[3]; /* 3 pointers, to x/y/z bspline moduli, each of length ngrid[x/y/z] */ double * bsplines_theta[3]; /* each of x/y/z has length order*natoms */ double * bsplines_dtheta[3]; /* each of x/y/z has length order*natoms */ ivec * particleindex; /* Array of length natoms. Each element is * an ivec (3 ints) that specify the grid * indices for that particular atom. Updated every step! */ rvec * particlefraction; /* Array of length natoms. Fractional offset in the grid for * each atom in all three dimensions. */ /* Further explanation of index/fraction: * * Assume we have a cell of size 10*10*10nm, and a total grid dimension of 100*100*100 cells. * In other words, each cell is 0.1*0.1*0.1 nm. * * If particle i has coordinates { 0.543 , 6.235 , -0.73 }, we will get: * * particleindex[i] = { 5 , 62 , 92 } (-0.73 + 10 = 9.27, we always apply PBC for grid calculations!) * particlefraction[i] = { 0.43 , 0.35 , 0.7 } (this is the fraction of the cell length where the atom is) * * (The reason for precaculating / storing these is that it gets a bit more complex for triclinic cells :-) * * In the current code version we might assume that a particle is not more than a whole box length away from * the central cell, i.e., in this case we would assume all coordinates fall in -10 nm < x,y,z < 20 nm. */ double epsilon_r; /* Dielectric coefficient to use, typically 1.0 */ }; /* Internal setup routines */ /* Only called once from init_pme(), performance does not matter! */ static void pme_calculate_bsplines_moduli(pme_t pme) { int nmax; int i,j,k,l,d; int order; int ndata; double * data; double * ddata; double * bsplines_data; double div; double sc,ss,arg; nmax = 0; for (d=0;d<3;d++) { nmax = (pme->ngrid[d] > nmax) ? pme->ngrid[d] : nmax; pme->bsplines_moduli[d] = (double *) malloc(sizeof(double)*pme->ngrid[d]); } order = pme->order; /* temp storage in this routine */ data = (double *) malloc(sizeof(double)*order); ddata = (double *) malloc(sizeof(double)*order); bsplines_data = (double *) malloc(sizeof(double)*nmax); data[order-1]=0; data[1]=0; data[0]=1; for (k=3;kngrid[d]; for (i=0;ibsplines_moduli[d][i]=sc*sc+ss*ss; } for (i=0;ibsplines_moduli[d][i]<1.0e-7) { pme->bsplines_moduli[d][i]=(pme->bsplines_moduli[d][(i-1+ndata)%ndata]+pme->bsplines_moduli[d][(i+1)%ndata])/2; } } } /* Release temp storage */ free(data); free(ddata); free(bsplines_data); } static void invert_box_vectors(const Vec3 boxVectors[3], Vec3 recipBoxVectors[3]) { double determinant = boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]; assert(determinant > 0); double scale = 1.0/determinant; recipBoxVectors[0] = Vec3(boxVectors[1][1]*boxVectors[2][2], 0, 0)*scale; recipBoxVectors[1] = Vec3(-boxVectors[1][0]*boxVectors[2][2], boxVectors[0][0]*boxVectors[2][2], 0)*scale; recipBoxVectors[2] = Vec3(boxVectors[1][0]*boxVectors[2][1]-boxVectors[1][1]*boxVectors[2][0], -boxVectors[0][0]*boxVectors[2][1], boxVectors[0][0]*boxVectors[1][1])*scale; } static void pme_update_grid_index_and_fraction(pme_t pme, const vector& atomCoordinates, const Vec3 periodicBoxVectors[3], const Vec3 recipBoxVectors[3]) { int i; int d; double t; int ti; for (i=0;inatoms;i++) { /* Index calculation (Look mom, no conditionals!): * * Both for Cuda and modern CPUs it is nice to avoid conditionals, but we still need to apply periodic boundary conditions. * Instead of having loops to add/subtract the box dimension, we do it this way: * * 1. First add the box size, to make sure this atom coordinate isnt -0.1 or something. * After this we assume all fractional box positions are *positive*. * The reason for this is that we always want to round coordinates _down_ to get * their grid index, and when taking the integer part of -3.4 we would get -3, not -4 as we want. * Since we anyway need the grid indices to fall in the central box, it is more convenient * to first manipulate the coordinates to be positive. * 2. Convert to integer grid index * Since we have added a whole box unit in step 1, this index might actually be larger than * the grid dimension. Examples, assuming 10*10*10nm box and grid dimension 100*100*100 (spacing 0.1 nm): * * coordinate is { 0.543 , 6.235 , -0.73 } * * x[i][d]/box[d] becomes { 0.0543 , 0.6235 , -0.073 } * (x[i][d]/box[d] + 1.0) becomes { 1.0543 , 1.6235 , 0.927 } * (x[i][d]/box[d] + 1.0)*ngrid[d] becomes { 105.43 , 162.35 , 92.7 } * * integer part is now { 105 , 162 , 92 } * * The fraction is calculates as t-ti, which becomes { 0.43 , 0.35 , 0.7 } * * 3. Take the first integer index part (which can be larger than the grid) modulo the grid dimension * * Now we get { 5 , 62 , 92 } * * Voila, both index and fraction, entirely without conditionals. The one limitation here is that * we only add one box length, so if the particle had a coordinate <=-10.0, we would be screwed. * In principle we can of course add 100.0, but that just moves the problem, it doesnt solve it. * In practice, MD programs will apply PBC to reset particles inside the central box to avoid * numerical problems, so this shouldnt cause any problems. * (And, by adding 100.0 box lengths, we would lose a bit of numerical accuracy here!) */ Vec3 coord = atomCoordinates[i]; for (d=0;d<3;d++) { t = coord[0]*recipBoxVectors[0][d]+coord[1]*recipBoxVectors[1][d]+coord[2]*recipBoxVectors[2][d]; t = (t-floor(t))*pme->ngrid[d]; ti = (int) t; pme->particlefraction[i][d] = t - ti; pme->particleindex[i][d] = ti % pme->ngrid[d]; } } } /* Ugly bspline calculation taken from Tom Dardens reference equations. * This probably very sub-optimal in Cuda? Separate kernel? * * In practice, it might help to require order=4 for the cuda port. */ static void pme_update_bsplines(pme_t pme) { int i,j,k,l; int order; double dr,div; double * data; double * ddata; order = pme->order; for (i=0; (inatoms); i++) { for (j=0; j<3; j++) { /* dr is relative offset from lower cell limit */ dr = pme->particlefraction[i][j]; data = &(pme->bsplines_theta[j][i*order]); ddata = &(pme->bsplines_dtheta[j][i*order]); data[order-1] = 0; data[1] = dr; data[0] = 1-dr; for (k=3; k& charges) { int order; int i; int ix,iy,iz; int x0index,y0index,z0index; int xindex,yindex,zindex; int index; double q; double * thetax; double * thetay; double * thetaz; order = pme->order; /* Reset the grid */ for (i=0;ingrid[0]*pme->ngrid[1]*pme->ngrid[2];i++) { pme->grid[i].re = pme->grid[i].im = 0; } for (i=0;inatoms;i++) { q = charges[i]; /* Grid index for the actual atom position */ x0index = pme->particleindex[i][0]; y0index = pme->particleindex[i][1]; z0index = pme->particleindex[i][2]; /* Bspline factors for this atom in each dimension , calculated from fractional coordinates */ thetax = &(pme->bsplines_theta[0][i*order]); thetay = &(pme->bsplines_theta[1][i*order]); thetaz = &(pme->bsplines_theta[2][i*order]); /* Loop over norder*norder*norder (typically 4*4*4) neighbor cells. * * As a neat optimization, we only spread in the forward direction, but apply PBC! * * Since we are going to do an FFT on the grid, it doesnt matter where the data is, * in frequency space the result will be the same. * * So, the influence function (bsplines) will probably be something like (0.15,0.35,0.35,0.15), * with largest weight 2-3 steps forward (you dont need to understand that for the implementation :-) * Effectively, you can look at this as translating the entire grid. * * Why do we do this stupid thing? * * 1) The loops get much simpler * 2) Just looking forward will hopefully get us more cache hits * 3) When we parallelize things, we only need to communicate in one direction instead of two! */ for (ix=0;ixngrid[0]; for (iy=0;iyngrid[1]; for (iz=0;izngrid[2]; /* Calculate index in the charge grid */ index = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex; /* Add the charge times the bspline spread/interpolation factors to this grid position */ pme->grid[index].re += q*thetax[ix]*thetay[iy]*thetaz[iz]; } } } } } static void pme_reciprocal_convolution(pme_t pme, const Vec3 periodicBoxVectors[3], const Vec3 recipBoxVectors[3], double * energy) { int kx,ky,kz; int nx,ny,nz; double mx,my,mz; double mhx,mhy,mhz,m2; double one_4pi_eps; double virxx,virxy,virxz,viryy,viryz,virzz; double bx,by,bz; double d1,d2; double eterm,vfactor,struct2,ets2; double esum; double factor; double denom; double boxfactor; double maxkx,maxky,maxkz; t_complex *ptr; nx = pme->ngrid[0]; ny = pme->ngrid[1]; nz = pme->ngrid[2]; one_4pi_eps = ONE_4PI_EPS0/pme->epsilon_r; factor = M_PI*M_PI/(pme->ewaldcoeff*pme->ewaldcoeff); boxfactor = M_PI*periodicBoxVectors[0][0]*periodicBoxVectors[1][1]*periodicBoxVectors[2][2]; esum = 0; virxx = 0; virxy = 0; virxz = 0; viryy = 0; viryz = 0; virzz = 0; maxkx = (nx+1)/2; maxky = (ny+1)/2; maxkz = (nz+1)/2; for (kx=0;kxbsplines_moduli[0][kx]; for (ky=0;kybsplines_moduli[1][ky]; for (kz=0;kzgrid + kx*ny*nz + ky*nz + kz; /* Get grid data for this frequency */ d1 = ptr->re; d2 = ptr->im; /* Calculate the convolution - see the Essman/Darden paper for the equation! */ m2 = mhx*mhx+mhy*mhy+mhz*mhz; bz = pme->bsplines_moduli[2][kz]; denom = m2*bx*by*bz; eterm = one_4pi_eps*exp(-factor*m2)/denom; /* write back convolution data to grid */ ptr->re = d1*eterm; ptr->im = d2*eterm; struct2 = (d1*d1+d2*d2); /* Long-range PME contribution to the energy for this frequency */ ets2 = eterm*struct2; esum += ets2; } } } /* The factor 0.5 is nothing special, but it is better to have it here than inside the loop :-) */ *energy = 0.5*esum; } static void dpme_reciprocal_convolution(pme_t pme, const Vec3 periodicBoxVectors[3], const Vec3 recipBoxVectors[3], double* energy) { int kx,ky,kz; int nx,ny,nz; double mx,my,mz; double mhx,mhy,mhz,m2; double bx,by,bz; double d1,d2; double eterm,struct2,ets2; double esum; double denom; double boxfactor; double maxkx,maxky,maxkz; t_complex *ptr; nx = pme->ngrid[0]; ny = pme->ngrid[1]; nz = pme->ngrid[2]; boxfactor = -2*M_PI*sqrt(M_PI) / (6.0*periodicBoxVectors[0][0]*periodicBoxVectors[1][1]*periodicBoxVectors[2][2]); esum = 0; maxkx = (nx+1)/2; maxky = (ny+1)/2; maxkz = (nz+1)/2; double bfac = M_PI / pme->ewaldcoeff; double fac1 = 2.0*M_PI*M_PI*M_PI*sqrt(M_PI); double fac2 = pme->ewaldcoeff*pme->ewaldcoeff*pme->ewaldcoeff; double fac3 = -2.0*pme->ewaldcoeff*M_PI*M_PI; double b, m, m3, expfac, expterm, erfcterm; for (kx=0;kxbsplines_moduli[0][kx]; for (ky=0;kybsplines_moduli[1][ky]; for (kz=0;kzgrid + kx*ny*nz + ky*nz + kz; /* Get grid data for this frequency */ d1 = ptr->re; d2 = ptr->im; /* Calculate the convolution - see the Essman/Darden paper for the equation! */ m2 = mhx*mhx+mhy*mhy+mhz*mhz; bz = pme->bsplines_moduli[2][kz]; denom = boxfactor / (bx*by*bz); m = sqrt(m2); m3 = m*m2; b = bfac*m; expfac = -b*b; erfcterm = erfc(b); expterm = exp(expfac); eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom; /* write back convolution data to grid */ ptr->re = d1*eterm; ptr->im = d2*eterm; struct2 = (d1*d1+d2*d2); /* Long-range PME contribution to the energy for this frequency */ ets2 = eterm*struct2; esum += ets2; } } } // Remember the C6 energy is attractive, hence the negative sign. *energy = 0.5*esum; } static void pme_grid_interpolate_force(pme_t pme, const Vec3 recipBoxVectors[3], const vector& charges, vector& forces) { int i; int ix,iy,iz; int x0index,y0index,z0index; int xindex,yindex,zindex; int index; int order; double q; double * thetax; double * thetay; double * thetaz; double * dthetax; double * dthetay; double * dthetaz; double tx,ty,tz; double dtx,dty,dtz; double fx,fy,fz; double gridvalue; int nx,ny,nz; nx = pme->ngrid[0]; ny = pme->ngrid[1]; nz = pme->ngrid[2]; order = pme->order; /* This is almost identical to the charge spreading routine! */ for (i=0;inatoms;i++) { fx = fy = fz = 0; q = charges[i]; /* Grid index for the actual atom position */ x0index = pme->particleindex[i][0]; y0index = pme->particleindex[i][1]; z0index = pme->particleindex[i][2]; /* Bspline factors for this atom in each dimension , calculated from fractional coordinates */ thetax = &(pme->bsplines_theta[0][i*order]); thetay = &(pme->bsplines_theta[1][i*order]); thetaz = &(pme->bsplines_theta[2][i*order]); dthetax = &(pme->bsplines_dtheta[0][i*order]); dthetay = &(pme->bsplines_dtheta[1][i*order]); dthetaz = &(pme->bsplines_dtheta[2][i*order]); /* See pme_grid_spread_charge() for comments about the order here, and only interpolation in one direction */ /* Since we will add order^3 (typically 4*4*4=64) terms to the force on each particle, we use temporary fx/fy/fz * variables, and only add it to memory forces[] at the end. */ for (ix=0;ixngrid[0]; /* Get both the bspline factor and its derivative with respect to the x coordinate! */ tx = thetax[ix]; dtx = dthetax[ix]; for (iy=0;iyngrid[1]; /* bspline + derivative wrt y */ ty = thetay[iy]; dty = dthetay[iy]; for (iz=0;izngrid[2]; /* bspline + derivative wrt z */ tz = thetaz[iz]; dtz = dthetaz[iz]; index = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex; /* Get the fft+convoluted+ifft:d data from the grid, which must be real by definition */ /* Checking that the imaginary part is indeed zero might be a good check :-) */ gridvalue = pme->grid[index].re; /* The d component of the force is calculated by taking the derived bspline in dimension d, normal bsplines in the other two */ fx += dtx*ty*tz*gridvalue; fy += tx*dty*tz*gridvalue; fz += tx*ty*dtz*gridvalue; } } } /* Update memory force, note that we multiply by charge and some box stuff */ forces[i][0] -= q*(fx*nx*recipBoxVectors[0][0]); forces[i][1] -= q*(fx*nx*recipBoxVectors[1][0]+fy*ny*recipBoxVectors[1][1]); forces[i][2] -= q*(fx*nx*recipBoxVectors[2][0]+fy*ny*recipBoxVectors[2][1]+fz*nz*recipBoxVectors[2][2]); } } /* EXPORTED ROUTINES */ int pme_init(pme_t * ppme, double ewaldcoeff, int natoms, const int ngrid[3], int pme_order, double epsilon_r) { pme_t pme; int d; pme = (pme_t) malloc(sizeof(struct pme)); pme->order = pme_order; pme->epsilon_r = epsilon_r; pme->ewaldcoeff = ewaldcoeff; pme->natoms = natoms; for (d=0;d<3;d++) { pme->ngrid[d] = ngrid[d]; pme->bsplines_theta[d] = (double *)malloc(sizeof(double)*pme_order*natoms); pme->bsplines_dtheta[d] = (double *)malloc(sizeof(double)*pme_order*natoms); } pme->particlefraction = (rvec *)malloc(sizeof(rvec)*natoms); pme->particleindex = (ivec *)malloc(sizeof(ivec)*natoms); /* Allocate charge grid storage */ pme->grid = (t_complex *)malloc(sizeof(t_complex)*ngrid[0]*ngrid[1]*ngrid[2]); fftpack_init_3d(&pme->fftplan,ngrid[0],ngrid[1],ngrid[2]); /* Setup bspline moduli (see Essman paper) */ pme_calculate_bsplines_moduli(pme); *ppme = pme; return 0; } int pme_exec(pme_t pme, const vector& atomCoordinates, vector& forces, const vector& charges, const Vec3 periodicBoxVectors[3], double* energy) { /* Routine is called with coordinates in x, a box, and charges in q */ Vec3 recipBoxVectors[3]; invert_box_vectors(periodicBoxVectors, recipBoxVectors); /* Before we can do the actual interpolation, we need to recalculate and update * the indices for each particle in the charge grid (initialized in pme_init()), * and what its fractional offset in this grid cell is. */ /* Update charge grid indices and fractional offsets for each atom. * The indices/fractions are stored internally in the pme datatype */ pme_update_grid_index_and_fraction(pme,atomCoordinates,periodicBoxVectors,recipBoxVectors); /* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */ pme_update_bsplines(pme); /* Spread the charges on grid (using newly calculated bsplines in the pme structure) */ pme_grid_spread_charge(pme, charges); /* do 3d-fft */ fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid); /* solve in k-space */ pme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy); /* do 3d-invfft */ fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid); /* Get the particle forces from the grid and bsplines in the pme structure */ pme_grid_interpolate_force(pme,recipBoxVectors,charges,forces); return 0; } int pme_exec_dpme(pme_t pme, const vector& atomCoordinates, vector& forces, const vector& c6s, const Vec3 periodicBoxVectors[3], double* energy) { /* Routine is called with coordinates in x, a box, and charges in q */ Vec3 recipBoxVectors[3]; invert_box_vectors(periodicBoxVectors, recipBoxVectors); /* Before we can do the actual interpolation, we need to recalculate and update * the indices for each particle in the charge grid (initialized in pme_init()), * and what its fractional offset in this grid cell is. */ /* Update charge grid indices and fractional offsets for each atom. * The indices/fractions are stored internally in the pme datatype */ pme_update_grid_index_and_fraction(pme,atomCoordinates,periodicBoxVectors,recipBoxVectors); /* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */ pme_update_bsplines(pme); /* Spread the charges on grid (using newly calculated bsplines in the pme structure) */ pme_grid_spread_charge(pme, c6s); /* do 3d-fft */ fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid); /* solve in k-space */ dpme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy); /* do 3d-invfft */ fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid); /* Get the particle forces from the grid and bsplines in the pme structure */ pme_grid_interpolate_force(pme,recipBoxVectors,c6s,forces); return 0; } int pme_destroy(pme_t pme) { int d; free(pme->grid); for (d=0;d<3;d++) { free(pme->bsplines_moduli[d]); free(pme->bsplines_theta[d]); free(pme->bsplines_dtheta[d]); } free(pme->particlefraction); free(pme->particleindex); fftpack_destroy(pme->fftplan); /* destroy structure itself */ free(pme); return 0; } } // namespace OpenMM