/* * Reference implementation of PME reciprocal space interactions. * * Copyright (c) 2009, Erik Lindahl, Rossen Apostolov, Szilard Pall * All rights reserved. * Contact: lindahl@cbr.su.se Stockholm University, Sweden. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. Redistributions in binary * form must reproduce the above copyright notice, this list of conditions and * the following disclaimer in the documentation and/or other materials provided * with the distribution. * Neither the name of the author/university nor the names of its contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include "ReferencePME.h" #include "fftpack.h" using std::vector; using OpenMM::RealVec; typedef int ivec[3]; struct pme { int natoms; RealOpenMM ewaldcoeff; t_complex * grid; /* Memory for the grid we spread charges on. * Element (i,j,k) is accessed as: * grid[i*ngrid[1]*ngrid[2] + j*ngrid[2] + k] */ int ngrid[3]; /* Total grid dimensions (all data is complex!) */ fftpack_t fftplan; /* Handle to fourier transform setup */ int order; /* PME interpolation order. Almost always 4 */ /* Data for bspline interpolation, see the Essman PME paper */ RealOpenMM * bsplines_moduli[3]; /* 3 pointers, to x/y/z bspline moduli, each of length ngrid[x/y/z] */ RealOpenMM * bsplines_theta[3]; /* each of x/y/z has length order*natoms */ RealOpenMM * bsplines_dtheta[3]; /* each of x/y/z has length order*natoms */ ivec * particleindex; /* Array of length natoms. Each element is * an ivec (3 ints) that specify the grid * indices for that particular atom. Updated every step! */ rvec * particlefraction; /* Array of length natoms. Fractional offset in the grid for * each atom in all three dimensions. */ /* Further explanation of index/fraction: * * Assume we have a cell of size 10*10*10nm, and a total grid dimension of 100*100*100 cells. * In other words, each cell is 0.1*0.1*0.1 nm. * * If particle i has coordinates { 0.543 , 6.235 , -0.73 }, we will get: * * particleindex[i] = { 5 , 62 , 92 } (-0.73 + 10 = 9.27, we always apply PBC for grid calculations!) * particlefraction[i] = { 0.43 , 0.35 , 0.7 } ( this is the fraction of the cell length where the atom is) * * (The reason for precaculating / storing these is that it gets a bit more complex for triclinic cells :-) * * In the current code version we might assume that a particle is not more than a whole box length away from * the central cell, i.e., in this case we would assume all coordinates fall in -10 nm < x,y,z < 20 nm. */ RealOpenMM epsilon_r; /* Dielectric coefficient to use, typically 1.0 */ }; /* Internal setup routines */ /* Only called once from init_pme(), performance does not matter! */ static void pme_calculate_bsplines_moduli(pme_t pme) { int nmax; int i,j,k,l,d; int order; int ndata; RealOpenMM * data; RealOpenMM * ddata; RealOpenMM * bsplines_data; RealOpenMM div; RealOpenMM sc,ss,arg; nmax = 0; for(d=0;d<3;d++) { nmax = (pme->ngrid[d] > nmax) ? pme->ngrid[d] : nmax; pme->bsplines_moduli[d] = (RealOpenMM *) malloc(sizeof(RealOpenMM)*pme->ngrid[d]); } order = pme->order; /* temp storage in this routine */ data = (RealOpenMM *) malloc(sizeof(RealOpenMM)*order); ddata = (RealOpenMM *) malloc(sizeof(RealOpenMM)*order); bsplines_data = (RealOpenMM *) malloc(sizeof(RealOpenMM)*nmax); data[order-1]=0; data[1]=0; data[0]=1; for(k=3;kngrid[d]; for(i=0;ibsplines_moduli[d][i]=sc*sc+ss*ss; } for(i=0;ibsplines_moduli[d][i]<1.0e-7) { pme->bsplines_moduli[d][i]=(pme->bsplines_moduli[d][i-1]+pme->bsplines_moduli[d][i+1])/2; } } } /* Release temp storage */ free(data); free(ddata); free(bsplines_data); } static void pme_update_grid_index_and_fraction(pme_t pme, vector& atomCoordinates, const RealOpenMM periodicBoxSize[3]) { int i; int d; RealOpenMM t; int ti; for(i=0;inatoms;i++) { for(d=0;d<3;d++) { /* Index calculation (Look mom, no conditionals!): * * Both for Cuda and modern CPUs it is nice to avoid conditionals, but we still need to apply periodic boundary conditions. * Instead of having loops to add/subtract the box dimension, we do it this way: * * 1. First add the box size, to make sure this atom coordinate isnt -0.1 or something. * After this we assume all fractional box positions are *positive*. * The reason for this is that we always want to round coordinates _down_ to get * their grid index, and when taking the integer part of -3.4 we would get -3, not -4 as we want. * Since we anyway need the grid indices to fall in the central box, it is more convenient * to first manipulate the coordinates to be positive. * 2. Convert to integer grid index * Since we have added a whole box unit in step 1, this index might actually be larger than * the grid dimension. Examples, assuming 10*10*10nm box and grid dimension 100*100*100 (spacing 0.1 nm): * * coordinate is { 0.543 , 6.235 , -0.73 } * * x[i][d]/box[d] becomes { 0.0543 , 0.6235 , -0.073 } * (x[i][d]/box[d] + 1.0) becomes { 1.0543 , 1.6235 , 0.927 } * (x[i][d]/box[d] + 1.0)*ngrid[d] becomes { 105.43 , 162.35 , 92.7 } * * integer part is now { 105 , 162 , 92 } * * The fraction is calculates as t-ti, which becomes { 0.43 , 0.35 , 0.7 } * * 3. Take the first integer index part (which can be larger than the grid) modulo the grid dimension * * Now we get { 5 , 62 , 92 } * * Voila, both index and fraction, entirely without conditionals. The one limitation here is that * we only add one box length, so if the particle had a coordinate <=-10.0, we would be screwed. * In principle we can of course add 100.0, but that just moves the problem, it doesnt solve it. * In practice, MD programs will apply PBC to reset particles inside the central box to avoid * numerical problems, so this shouldnt cause any problems. * (And, by adding 100.0 box lengths, we would lose a bit of numerical accuracy here!) */ RealOpenMM coord = atomCoordinates[i][d]; coord -= floor(coord/periodicBoxSize[d])*periodicBoxSize[d]; t = (coord / periodicBoxSize[d])*pme->ngrid[d]; ti = (int) t; pme->particlefraction[i][d] = t - ti; pme->particleindex[i][d] = ti % pme->ngrid[d]; } } } /* Ugly bspline calculation taken from Tom Dardens reference equations. * This probably very sub-optimal in Cuda? Separate kernel? * * In practice, it might help to require order=4 for the cuda port. */ static void pme_update_bsplines(pme_t pme) { int i,j,k,l; int order; RealOpenMM dr,div; RealOpenMM * data; RealOpenMM * ddata; order = pme->order; for(i=0; (inatoms); i++) { for(j=0; j<3; j++) { /* dr is relative offset from lower cell limit */ dr = pme->particlefraction[i][j]; data = &(pme->bsplines_theta[j][i*order]); ddata = &(pme->bsplines_dtheta[j][i*order]); data[order-1] = 0; data[1] = dr; data[0] = 1-dr; for(k=3; k& charges) { int order; int i; int ix,iy,iz; int x0index,y0index,z0index; int xindex,yindex,zindex; int index; RealOpenMM q; RealOpenMM * thetax; RealOpenMM * thetay; RealOpenMM * thetaz; order = pme->order; /* Reset the grid */ for(i=0;ingrid[0]*pme->ngrid[1]*pme->ngrid[2];i++) { pme->grid[i].re = pme->grid[i].im = 0; } for(i=0;inatoms;i++) { q = charges[i]; /* Grid index for the actual atom position */ x0index = pme->particleindex[i][0]; y0index = pme->particleindex[i][1]; z0index = pme->particleindex[i][2]; /* Bspline factors for this atom in each dimension , calculated from fractional coordinates */ thetax = &(pme->bsplines_theta[0][i*order]); thetay = &(pme->bsplines_theta[1][i*order]); thetaz = &(pme->bsplines_theta[2][i*order]); /* Loop over norder*norder*norder (typically 4*4*4) neighbor cells. * * As a neat optimization, we only spread in the forward direction, but apply PBC! * * Since we are going to do an FFT on the grid, it doesnt matter where the data is, * in frequency space the result will be the same. * * So, the influence function (bsplines) will probably be something like (0.15,0.35,0.35,0.15), * with largest weight 2-3 steps forward (you dont need to understand that for the implementation :-) * Effectively, you can look at this as translating the entire grid. * * Why do we do this stupid thing? * * 1) The loops get much simpler * 2) Just looking forward will hopefully get us more cache hits * 3) When we parallelize things, we only need to communicate in one direction instead of two! */ for(ix=0;ixngrid[0]; for(iy=0;iyngrid[1]; for(iz=0;izngrid[2]; /* Calculate index in the charge grid */ index = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex; /* Add the charge times the bspline spread/interpolation factors to this grid position */ pme->grid[index].re += q*thetax[ix]*thetay[iy]*thetaz[iz]; } } } } } static void pme_reciprocal_convolution(pme_t pme, const RealOpenMM periodicBoxSize[3], RealOpenMM * energy, RealOpenMM pme_virial[3][3]) { int kx,ky,kz; int nx,ny,nz; RealOpenMM mx,my,mz; RealOpenMM mhx,mhy,mhz,m2; RealOpenMM one_4pi_eps; RealOpenMM virxx,virxy,virxz,viryy,viryz,virzz; RealOpenMM bx,by,bz; RealOpenMM d1,d2; RealOpenMM eterm,vfactor,struct2,ets2; RealOpenMM esum; RealOpenMM factor; RealOpenMM denom; RealOpenMM boxfactor; RealOpenMM maxkx,maxky,maxkz; t_complex *ptr; nx = pme->ngrid[0]; ny = pme->ngrid[1]; nz = pme->ngrid[2]; one_4pi_eps = (RealOpenMM) (ONE_4PI_EPS0/pme->epsilon_r); factor = (RealOpenMM) (M_PI*M_PI/(pme->ewaldcoeff*pme->ewaldcoeff)); boxfactor = (RealOpenMM) (M_PI*periodicBoxSize[0]*periodicBoxSize[1]*periodicBoxSize[2]); esum = 0; virxx = 0; virxy = 0; virxz = 0; viryy = 0; viryz = 0; virzz = 0; maxkx = (RealOpenMM) ((nx+1)/2); maxky = (RealOpenMM) ((ny+1)/2); maxkz = (RealOpenMM) ((nz+1)/2); for(kx=0;kxbsplines_moduli[0][kx]; for(ky=0;kybsplines_moduli[1][ky]; for(kz=0;kzgrid + kx*ny*nz + ky*nz + kz; /* Get grid data for this frequency */ d1 = ptr->re; d2 = ptr->im; /* Calculate the convolution - see the Essman/Darden paper for the equation! */ m2 = mhx*mhx+mhy*mhy+mhz*mhz; bz = pme->bsplines_moduli[2][kz]; denom = m2*bx*by*bz; eterm = one_4pi_eps*exp(-factor*m2)/denom; /* write back convolution data to grid */ ptr->re = d1*eterm; ptr->im = d2*eterm; struct2 = (d1*d1+d2*d2); /* Long-range PME contribution to the energy for this frequency */ ets2 = eterm*struct2; esum += ets2; /* PME long-range contribution to atomic virial. Since it is symmetric, we only calculate half the matrix inside this loop. */ vfactor = (factor*m2+1)*2/m2; virxx += ets2*(vfactor*mhx*mhx-1); virxy += ets2*vfactor*mhx*mhy; virxz += ets2*vfactor*mhx*mhz; viryy += ets2*(vfactor*mhy*mhy-1); viryz += ets2*vfactor*mhy*mhz; virzz += ets2*(vfactor*mhz*mhz-1); } } } pme_virial[0][0] = (RealOpenMM) (0.25*virxx); pme_virial[1][1] = (RealOpenMM) (0.25*viryy); pme_virial[2][2] = (RealOpenMM) (0.25*virzz); pme_virial[0][1] = pme_virial[1][0] = (RealOpenMM) (0.25*virxy); pme_virial[0][2] = pme_virial[2][0] = (RealOpenMM) (0.25*virxz); pme_virial[1][2] = pme_virial[2][1] = (RealOpenMM) (0.25*viryz); /* The factor 0.5 is nothing special, but it is better to have it here than inside the loop :-) */ *energy = (RealOpenMM) (0.5*esum); } static void pme_grid_interpolate_force(pme_t pme, const RealOpenMM periodicBoxSize[3], vector& charges, vector& forces) { int i; int ix,iy,iz; int x0index,y0index,z0index; int xindex,yindex,zindex; int index; int order; RealOpenMM q; RealOpenMM * thetax; RealOpenMM * thetay; RealOpenMM * thetaz; RealOpenMM * dthetax; RealOpenMM * dthetay; RealOpenMM * dthetaz; RealOpenMM tx,ty,tz; RealOpenMM dtx,dty,dtz; RealOpenMM fx,fy,fz; RealOpenMM gridvalue; int nx,ny,nz; nx = pme->ngrid[0]; ny = pme->ngrid[1]; nz = pme->ngrid[2]; order = pme->order; /* This is almost identical to the charge spreading routine! */ for(i=0;inatoms;i++) { fx = fy = fz = 0; q = charges[i]; /* Grid index for the actual atom position */ x0index = pme->particleindex[i][0]; y0index = pme->particleindex[i][1]; z0index = pme->particleindex[i][2]; /* Bspline factors for this atom in each dimension , calculated from fractional coordinates */ thetax = &(pme->bsplines_theta[0][i*order]); thetay = &(pme->bsplines_theta[1][i*order]); thetaz = &(pme->bsplines_theta[2][i*order]); dthetax = &(pme->bsplines_dtheta[0][i*order]); dthetay = &(pme->bsplines_dtheta[1][i*order]); dthetaz = &(pme->bsplines_dtheta[2][i*order]); /* See pme_grid_spread_charge() for comments about the order here, and only interpolation in one direction */ /* Since we will add order^3 (typically 4*4*4=64) terms to the force on each particle, we use temporary fx/fy/fz * variables, and only add it to memory forces[] at the end. */ for(ix=0;ixngrid[0]; /* Get both the bspline factor and its derivative with respect to the x coordinate! */ tx = thetax[ix]; dtx = dthetax[ix]; for(iy=0;iyngrid[1]; /* bspline + derivative wrt y */ ty = thetay[iy]; dty = dthetay[iy]; for(iz=0;izngrid[2]; /* bspline + derivative wrt z */ tz = thetaz[iz]; dtz = dthetaz[iz]; index = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex; /* Get the fft+convoluted+ifft:d data from the grid, which must be real by definition */ /* Checking that the imaginary part is indeed zero might be a good check :-) */ gridvalue = pme->grid[index].re; /* The d component of the force is calculated by taking the derived bspline in dimension d, normal bsplines in the other two */ fx += dtx*ty*tz*gridvalue; fy += tx*dty*tz*gridvalue; fz += tx*ty*dtz*gridvalue; } } } /* Update memory force, note that we multiply by charge and some box stuff */ forces[i][0] -= q*fx*nx/periodicBoxSize[0]; forces[i][1] -= q*fy*ny/periodicBoxSize[1]; forces[i][2] -= q*fz*nz/periodicBoxSize[2]; } } /* EXPORTED ROUTINES */ int pme_init(pme_t * ppme, RealOpenMM ewaldcoeff, int natoms, const int ngrid[3], int pme_order, RealOpenMM epsilon_r) { pme_t pme; int d; pme = (pme_t) malloc(sizeof(struct pme)); pme->order = pme_order; pme->epsilon_r = epsilon_r; pme->ewaldcoeff = ewaldcoeff; pme->natoms = natoms; for(d=0;d<3;d++) { pme->ngrid[d] = ngrid[d]; pme->bsplines_theta[d] = (RealOpenMM *)malloc(sizeof(RealOpenMM)*pme_order*natoms); pme->bsplines_dtheta[d] = (RealOpenMM *)malloc(sizeof(RealOpenMM)*pme_order*natoms); } pme->particlefraction = (rvec *)malloc(sizeof(rvec)*natoms); pme->particleindex = (ivec *)malloc(sizeof(ivec)*natoms); /* Allocate charge grid storage */ pme->grid = (t_complex *)malloc(sizeof(t_complex)*ngrid[0]*ngrid[1]*ngrid[2]); fftpack_init_3d(&pme->fftplan,ngrid[0],ngrid[1],ngrid[2]); /* Setup bspline moduli (see Essman paper) */ pme_calculate_bsplines_moduli(pme); *ppme = pme; return 0; } int pme_exec(pme_t pme, vector& atomCoordinates, vector& forces, vector& charges, const RealOpenMM periodicBoxSize[3], RealOpenMM * energy, RealOpenMM pme_virial[3][3]) { /* Routine is called with coordinates in x, a box, and charges in q */ /* Before we can do the actual interpolation, we need to recalculate and update * the indices for each particle in the charge grid (initialized in pme_init()), * and what its fractional offset in this grid cell is. */ /* Update charge grid indices and fractional offsets for each atom. * The indices/fractions are stored internally in the pme datatype */ pme_update_grid_index_and_fraction(pme,atomCoordinates,periodicBoxSize); /* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */ pme_update_bsplines(pme); /* Spread the charges on grid (using newly calculated bsplines in the pme structure) */ pme_grid_spread_charge(pme, charges); /* do 3d-fft */ fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid); /* solve in k-space */ pme_reciprocal_convolution(pme,periodicBoxSize,energy,pme_virial); /* do 3d-invfft */ fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid); /* Get the particle forces from the grid and bsplines in the pme structure */ pme_grid_interpolate_force(pme,periodicBoxSize,charges,forces); return 0; } int pme_destroy(pme_t pme) { int d; free(pme->grid); for(d=0;d<3;d++) { free(pme->bsplines_moduli[d]); free(pme->bsplines_theta[d]); free(pme->bsplines_dtheta[d]); } free(pme->particlefraction); free(pme->particleindex); fftpack_destroy(pme->fftplan); /* destroy structure itself */ free(pme); return 0; }