Commit c66dd5a3 authored by Rossen Apostolov's avatar Rossen Apostolov
Browse files

Implemented the PME method in the reference platform.

parent a5f8fb35
......@@ -246,7 +246,8 @@ public:
NoCutoff = 0,
CutoffNonPeriodic = 1,
CutoffPeriodic = 2,
Ewald = 3
Ewald = 3,
PME = 4
};
static std::string Name() {
return "CalcNonbondedForce";
......
......@@ -91,7 +91,12 @@ public:
* Periodic boundary conditions are used, and Ewald summation is used to compute the interaction of each particle
* with all periodic copies of every other particle.
*/
Ewald = 3
Ewald = 3,
/**
* Periodic boundary conditions are used, and Particle-Mesh Ewald (PME) summation is used to compute the interaction of each particle
* with all periodic copies of every other particle.
*/
PME = 4
};
/**
* Create a NonbondedForce.
......
......@@ -392,7 +392,7 @@ void ReferenceCalcNonbondedForceKernel::initialize(const System& system, const N
neighborList = NULL;
else
neighborList = new NeighborList();
if (nonbondedMethod == Ewald) {
if (nonbondedMethod == Ewald || nonbondedMethod == PME) {
RealOpenMM ewaldErrorTol = (RealOpenMM) force.getEwaldErrorTolerance();
ewaldAlpha = (RealOpenMM) (std::sqrt(-std::log(ewaldErrorTol))/nonbondedCutoff);
RealOpenMM mx = periodicBoxSize[0]/nonbondedCutoff;
......@@ -417,14 +417,17 @@ void ReferenceCalcNonbondedForceKernel::executeForces(OpenMMContextImpl& context
ReferenceLJCoulombIxn clj;
bool periodic = (nonbondedMethod == CutoffPeriodic);
bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME);
if (nonbondedMethod != NoCutoff) {
computeNeighborListVoxelHash(*neighborList, numParticles, posData, exclusions, (periodic || ewald) ? periodicBoxSize : NULL, nonbondedCutoff, 0.0);
computeNeighborListVoxelHash(*neighborList, numParticles, posData, exclusions, (periodic || ewald || pme) ? periodicBoxSize : NULL, nonbondedCutoff, 0.0);
clj.setUseCutoff(nonbondedCutoff, *neighborList, 78.3f);
}
if (periodic||ewald)
if (periodic||ewald||pme)
clj.setPeriodic(periodicBoxSize);
if (ewald)
clj.setUseEwald(ewaldAlpha, kmax[0], kmax[1], kmax[2]);
if (pme)
clj.setUsePME(ewaldAlpha);
clj.calculatePairIxn(numParticles, posData, particleParamArray, exclusionArray, 0, forceData, 0, 0);
ReferenceBondForce refBondForce;
ReferenceLJCoulomb14 nonbonded14;
......@@ -440,14 +443,17 @@ double ReferenceCalcNonbondedForceKernel::executeEnergy(OpenMMContextImpl& conte
ReferenceLJCoulombIxn clj;
bool periodic = (nonbondedMethod == CutoffPeriodic);
bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME);
if (nonbondedMethod != NoCutoff) {
computeNeighborListVoxelHash(*neighborList, numParticles, posData, exclusions, (periodic || ewald) ? periodicBoxSize : NULL, nonbondedCutoff, 0.0);
computeNeighborListVoxelHash(*neighborList, numParticles, posData, exclusions, (periodic || ewald || pme) ? periodicBoxSize : NULL, nonbondedCutoff, 0.0);
clj.setUseCutoff(nonbondedCutoff, *neighborList, 78.3f);
}
if (periodic || ewald)
if (periodic || ewald || pme)
clj.setPeriodic(periodicBoxSize);
if (ewald)
clj.setUseEwald(ewaldAlpha, kmax[0], kmax[1], kmax[2]);
if (pme)
clj.setUsePME(ewaldAlpha);
clj.calculatePairIxn(numParticles, posData, particleParamArray, exclusionArray, 0, forceData, 0, &energy);
ReferenceBondForce refBondForce;
ReferenceLJCoulomb14 nonbonded14;
......
......@@ -44,7 +44,7 @@ using std::vector;
--------------------------------------------------------------------------------------- */
ReferenceLJCoulombIxn::ReferenceLJCoulombIxn( ) : cutoff(false), periodic(false), ewald(false) {
ReferenceLJCoulombIxn::ReferenceLJCoulombIxn( ) : cutoff(false), periodic(false), ewald(false), pme(false) {
// ---------------------------------------------------------------------------------------
......@@ -138,6 +138,19 @@ ReferenceLJCoulombIxn::~ReferenceLJCoulombIxn( ){
ewald = true;
}
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation.
@param alpha the Ewald separation parameter
--------------------------------------------------------------------------------------- */
void ReferenceLJCoulombIxn::setUsePME(RealOpenMM alpha) {
alphaEwald = alpha;
pme = true;
}
/**---------------------------------------------------------------------------------------
Calculate parameters for LJ Coulomb ixn
......@@ -222,7 +235,6 @@ int ReferenceLJCoulombIxn::calculateEwaldIxn( int numberOfAtoms, RealOpenMM** at
RealOpenMM factorEwald = -1 / (4*alphaEwald*alphaEwald);
RealOpenMM SQRT_PI = sqrt(PI);
RealOpenMM TWO_PI = 2.0 * PI;
static const RealOpenMM epsilon = 1.0;
......@@ -415,6 +427,143 @@ int ReferenceLJCoulombIxn::calculateEwaldIxn( int numberOfAtoms, RealOpenMM** at
}
/**---------------------------------------------------------------------------------------
Calculate PME ixn
@param numberOfAtoms number of atoms
@param atomCoordinates atom coordinates
@param atomParameters atom parameters atomParameters[atomIndex][paramterIndex]
@param exclusions atom exclusion indices exclusions[atomIndex][atomToExcludeIndex]
exclusions[atomIndex][0] = number of exclusions
exclusions[atomIndex][1-no.] = atom indices of atoms to excluded from
interacting w/ atom atomIndex
@param fixedParameters non atom parameters (not currently used)
@param forces force array (forces added)
@param energyByAtom atom energy
@param totalEnergy total energy
@return ReferenceForce::DefaultReturn
--------------------------------------------------------------------------------------- */
int ReferenceLJCoulombIxn::calculatePMEIxn( int numberOfAtoms, RealOpenMM** atomCoordinates,
RealOpenMM** atomParameters, int** exclusions,
RealOpenMM* fixedParameters, RealOpenMM** forces,
RealOpenMM* energyByAtom, RealOpenMM* totalEnergy) const {
#include "../SimTKUtilities/RealTypeSimTk.h"
#include "pme.h"
RealOpenMM SQRT_PI = sqrt(PI);
static const RealOpenMM one = 1.0;
RealOpenMM selfEwaldEnergy = 0.0;
RealOpenMM realSpaceEwaldEnergy = 0.0;
RealOpenMM recipEnergy = 0.0;
// **************************************************************************************
// SELF ENERGY
// **************************************************************************************
for( int atomID = 0; atomID < numberOfAtoms; atomID++ ){
selfEwaldEnergy = selfEwaldEnergy + atomParameters[atomID][QIndex]*atomParameters[atomID][QIndex];
}
selfEwaldEnergy = selfEwaldEnergy * alphaEwald/SQRT_PI ;
// **************************************************************************************
// RECIPROCAL SPACE EWALD ENERGY AND FORCES
// **************************************************************************************
pme_t pmedata; /* abstract handle for PME data */
int ngrid[3];
RealOpenMM virial[3][3];
/* PME grid dimensions.
* We typically want to set this as the spacing rather than absolute dimensions, but
* to be able to reproduce results from other programs (e.g. Gromacs) we need to be
* able to set exact grid dimenisions occasionally.
*/
ngrid[0] = 16;
ngrid[1] = 16;
ngrid[2] = 16;
pme_init(&pmedata,alphaEwald,numberOfAtoms,ngrid,4,1);
pme_exec(pmedata,atomCoordinates,forces,atomParameters,periodicBoxSize,&recipEnergy,virial);
// **************************************************************************************
// SHORT-RANGE ENERGY AND FORCES
// **************************************************************************************
RealOpenMM deltaR[2][ReferenceForce::LastDeltaRIndex];
for( int atomID1 = 0; atomID1 < numberOfAtoms; atomID1++ ){
for( int atomID2 = atomID1 + 1; atomID2 < numberOfAtoms; atomID2++ ){
ReferenceForce::getDeltaRPeriodic( atomCoordinates[atomID2], atomCoordinates[atomID1], periodicBoxSize, deltaR[0] );
RealOpenMM r = deltaR[0][ReferenceForce::RIndex];
RealOpenMM r2 = deltaR[0][ReferenceForce::R2Index];
RealOpenMM inverseR = one/(deltaR[0][ReferenceForce::RIndex]);
realSpaceEwaldEnergy =
(RealOpenMM)(realSpaceEwaldEnergy + atomParameters[atomID1][QIndex]*atomParameters[atomID2][QIndex]*inverseR*erfc(alphaEwald*r));
}
}
// allocate and initialize exclusion array
vector<int> exclusionIndices(numberOfAtoms);
for( int ii = 0; ii < numberOfAtoms; ii++ ){
exclusionIndices[ii] = -1;
}
for( int ii = 0; ii < numberOfAtoms; ii++ ){
// set exclusions
for( int jj = 1; jj <= exclusions[ii][0]; jj++ ){
exclusionIndices[exclusions[ii][jj]] = ii;
}
// loop over atom pairs
for( int jj = ii+1; jj < numberOfAtoms; jj++ ){
if( exclusionIndices[jj] != ii ){
ReferenceForce::getDeltaRPeriodic( atomCoordinates[jj], atomCoordinates[ii], periodicBoxSize, deltaR[0] );
RealOpenMM r = deltaR[0][ReferenceForce::RIndex];
RealOpenMM r2 = deltaR[0][ReferenceForce::R2Index];
RealOpenMM inverseR = one/(deltaR[0][ReferenceForce::RIndex]);
RealOpenMM alphaR = alphaEwald * r;
realSpaceEwaldEnergy =
(RealOpenMM)(realSpaceEwaldEnergy + atomParameters[ii][QIndex]*atomParameters[jj][QIndex]*inverseR*erfc(alphaR));
RealOpenMM dEdR = atomParameters[ii][QIndex] * atomParameters[jj][QIndex] * inverseR * inverseR * inverseR;
dEdR = (RealOpenMM)(dEdR * (erfc(alphaR) + 2 * alphaR * exp ( - alphaR * alphaR) / SQRT_PI ));
for( int kk = 0; kk < 3; kk++ ){
RealOpenMM force = dEdR*deltaR[0][kk];
forces[ii][kk] += force;
forces[jj][kk] -= force;
}
}
}
}
// ***********************************************************************
if( totalEnergy ) {
*totalEnergy += recipEnergy + realSpaceEwaldEnergy - selfEwaldEnergy;
}
return ReferenceForce::DefaultReturn;
}
/**---------------------------------------------------------------------------------------
Calculate LJ Coulomb pair ixn
......@@ -442,6 +591,8 @@ int ReferenceLJCoulombIxn::calculatePairIxn( int numberOfAtoms, RealOpenMM** ato
if (ewald)
return calculateEwaldIxn(numberOfAtoms, atomCoordinates, atomParameters, exclusions, fixedParameters, forces, energyByAtom, totalEnergy);
if (pme)
return calculatePMEIxn(numberOfAtoms, atomCoordinates, atomParameters, exclusions, fixedParameters, forces, energyByAtom, totalEnergy);
if (cutoff) {
for (int i = 0; i < (int) neighborList->size(); i++) {
OpenMM::AtomPair pair = (*neighborList)[i];
......
......@@ -37,6 +37,7 @@ class ReferenceLJCoulombIxn : public ReferencePairIxn {
bool cutoff;
bool periodic;
bool ewald;
bool pme;
const OpenMM::NeighborList* neighborList;
RealOpenMM periodicBoxSize[3];
RealOpenMM cutoffDistance;
......@@ -131,6 +132,17 @@ class ReferenceLJCoulombIxn : public ReferencePairIxn {
void setUseEwald(RealOpenMM alpha, int kmaxx, int kmaxy, int kmaxz);
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation.
@param alpha the Ewald separation parameter
--------------------------------------------------------------------------------------- */
void setUsePME(RealOpenMM alpha);
/**---------------------------------------------------------------------------------------
Calculate parameters for LJ 1-4 ixn
......@@ -203,6 +215,31 @@ private:
RealOpenMM* fixedParameters, RealOpenMM** forces,
RealOpenMM* energyByAtom, RealOpenMM* totalEnergy ) const;
/**---------------------------------------------------------------------------------------
Calculate PME ixn
@param numberOfAtoms number of atoms
@param atomCoordinates atom coordinates
@param atomParameters atom parameters (charges, c6, c12, ...) atomParameters[atomIndex][paramterIndex]
@param exclusions atom exclusion indices exclusions[atomIndex][atomToExcludeIndex]
exclusions[atomIndex][0] = number of exclusions
exclusions[atomIndex][1-no.] = atom indices of atoms to excluded from
interacting w/ atom atomIndex
@param fixedParameters non atom parameters (not currently used)
@param forces force array (forces added)
@param energyByAtom atom energy
@param totalEnergy total energy
@return ReferenceForce::DefaultReturn
--------------------------------------------------------------------------------------- */
int calculatePMEIxn( int numberOfAtoms, RealOpenMM** atomCoordinates,
RealOpenMM** atomParameters, int** exclusions,
RealOpenMM* fixedParameters, RealOpenMM** forces,
RealOpenMM* energyByAtom, RealOpenMM* totalEnergy ) const;
};
// ---------------------------------------------------------------------------------------
......
/*
*
* 1D transformations based on F77 to C translation of FFTPACK.
* FFTPACK was originally written by paul n swarztrauber
* at the national center for atmospheric research
*
* FFTPACK is in the public domain.
*
* Higher-dimension transforms copyright Erik Lindahl, 2008-2009.
* Just as FFTPACK, this file may be redistributed freely, and can be
* considered to be in the public domain.
*
* Any errors in this (threadsafe, but not threaded) C version
* are due to the f2c translator, or hacks by Erik Lindahl...
*
* Erik Lindahl, lindahl@cbr.su.se
* Center for Biomembrane Research
* Stockholm University, Sweden
*/
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include "fftpack.h"
/** Contents of the FFTPACK fft datatype.
*
* FFTPACK only does 1d transforms, so we use a pointers to another fft for
* the transform in the next dimension.
* Thus, a 3d-structure contains a pointer to a 2d one, which in turns contains
* a pointer to a 1d. The 1d structure has next==NULL.
*/
struct fftpack
{
int ndim; /**< Dimensions, including our subdimensions. */
int n; /**< Number of points in this dimension. */
int ifac[15]; /**< 15 bytes needed for cfft and rfft */
struct fftpack * next; /**< Pointer to next dimension, or NULL. */
RealOpenMM * work; /**< 1st 4n reserved for cfft, 1st 2n for rfft */
};
static void
fftpack_passf2(int ido,
int l1,
RealOpenMM cc[],
RealOpenMM ch[],
RealOpenMM wa1[],
int isign)
{
int i, k, ah, ac;
RealOpenMM ti2, tr2;
if (ido <= 2)
{
for (k=0; k<l1; k++)
{
ah = k*ido;
ac = 2*k*ido;
ch[ah] = cc[ac] + cc[ac + ido];
ch[ah + ido*l1] = cc[ac] - cc[ac + ido];
ch[ah+1] = cc[ac+1] + cc[ac + ido + 1];
ch[ah + ido*l1 + 1] = cc[ac+1] - cc[ac + ido + 1];
}
}
else
{
for (k=0; k<l1; k++)
{
for (i=0; i<ido-1; i+=2)
{
ah = i + k*ido;
ac = i + 2*k*ido;
ch[ah] = cc[ac] + cc[ac + ido];
tr2 = cc[ac] - cc[ac + ido];
ch[ah+1] = cc[ac+1] + cc[ac + 1 + ido];
ti2 = cc[ac+1] - cc[ac + 1 + ido];
ch[ah+l1*ido+1] = wa1[i]*ti2 + isign*wa1[i+1]*tr2;
ch[ah+l1*ido] = wa1[i]*tr2 - isign*wa1[i+1]*ti2;
}
}
}
}
static void
fftpack_passf3(int ido,
int l1,
RealOpenMM cc[],
RealOpenMM ch[],
RealOpenMM wa1[],
RealOpenMM wa2[],
int isign)
{
const RealOpenMM taur = -0.5;
const RealOpenMM taui = 0.866025403784439;
int i, k, ac, ah;
RealOpenMM ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
if (ido == 2)
{
for (k=1; k<=l1; k++)
{
ac = (3*k - 2)*ido;
tr2 = cc[ac] + cc[ac + ido];
cr2 = cc[ac - ido] + taur*tr2;
ah = (k - 1)*ido;
ch[ah] = cc[ac - ido] + tr2;
ti2 = cc[ac + 1] + cc[ac + ido + 1];
ci2 = cc[ac - ido + 1] + taur*ti2;
ch[ah + 1] = cc[ac - ido + 1] + ti2;
cr3 = isign*taui*(cc[ac] - cc[ac + ido]);
ci3 = isign*taui*(cc[ac + 1] - cc[ac + ido + 1]);
ch[ah + l1*ido] = cr2 - ci3;
ch[ah + 2*l1*ido] = cr2 + ci3;
ch[ah + l1*ido + 1] = ci2 + cr3;
ch[ah + 2*l1*ido + 1] = ci2 - cr3;
}
}
else
{
for (k=1; k<=l1; k++)
{
for (i=0; i<ido-1; i+=2)
{
ac = i + (3*k - 2)*ido;
tr2 = cc[ac] + cc[ac + ido];
cr2 = cc[ac - ido] + taur*tr2;
ah = i + (k-1)*ido;
ch[ah] = cc[ac - ido] + tr2;
ti2 = cc[ac + 1] + cc[ac + ido + 1];
ci2 = cc[ac - ido + 1] + taur*ti2;
ch[ah + 1] = cc[ac - ido + 1] + ti2;
cr3 = isign*taui*(cc[ac] - cc[ac + ido]);
ci3 = isign*taui*(cc[ac + 1] - cc[ac + ido + 1]);
dr2 = cr2 - ci3;
dr3 = cr2 + ci3;
di2 = ci2 + cr3;
di3 = ci2 - cr3;
ch[ah + l1*ido + 1] = wa1[i]*di2 + isign*wa1[i+1]*dr2;
ch[ah + l1*ido] = wa1[i]*dr2 - isign*wa1[i+1]*di2;
ch[ah + 2*l1*ido + 1] = wa2[i]*di3 + isign*wa2[i+1]*dr3;
ch[ah + 2*l1*ido] = wa2[i]*dr3 - isign*wa2[i+1]*di3;
}
}
}
}
static void
fftpack_passf4(int ido,
int l1,
RealOpenMM cc[],
RealOpenMM ch[],
RealOpenMM wa1[],
RealOpenMM wa2[],
RealOpenMM wa3[],
int isign)
{
int i, k, ac, ah;
RealOpenMM ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
if (ido == 2)
{
for (k=0; k<l1; k++)
{
ac = 4*k*ido + 1;
ti1 = cc[ac] - cc[ac + 2*ido];
ti2 = cc[ac] + cc[ac + 2*ido];
tr4 = cc[ac + 3*ido] - cc[ac + ido];
ti3 = cc[ac + ido] + cc[ac + 3*ido];
tr1 = cc[ac - 1] - cc[ac + 2*ido - 1];
tr2 = cc[ac - 1] + cc[ac + 2*ido - 1];
ti4 = cc[ac + ido - 1] - cc[ac + 3*ido - 1];
tr3 = cc[ac + ido - 1] + cc[ac + 3*ido - 1];
ah = k*ido;
ch[ah] = tr2 + tr3;
ch[ah + 2*l1*ido] = tr2 - tr3;
ch[ah + 1] = ti2 + ti3;
ch[ah + 2*l1*ido + 1] = ti2 - ti3;
ch[ah + l1*ido] = tr1 + isign*tr4;
ch[ah + 3*l1*ido] = tr1 - isign*tr4;
ch[ah + l1*ido + 1] = ti1 + isign*ti4;
ch[ah + 3*l1*ido + 1] = ti1 - isign*ti4;
}
}
else
{
for (k=0; k<l1; k++)
{
for (i=0; i<ido-1; i+=2)
{
ac = i + 1 + 4*k*ido;
ti1 = cc[ac] - cc[ac + 2*ido];
ti2 = cc[ac] + cc[ac + 2*ido];
ti3 = cc[ac + ido] + cc[ac + 3*ido];
tr4 = cc[ac + 3*ido] - cc[ac + ido];
tr1 = cc[ac - 1] - cc[ac + 2*ido - 1];
tr2 = cc[ac - 1] + cc[ac + 2*ido - 1];
ti4 = cc[ac + ido - 1] - cc[ac + 3*ido - 1];
tr3 = cc[ac + ido - 1] + cc[ac + 3*ido - 1];
ah = i + k*ido;
ch[ah] = tr2 + tr3;
cr3 = tr2 - tr3;
ch[ah + 1] = ti2 + ti3;
ci3 = ti2 - ti3;
cr2 = tr1 + isign*tr4;
cr4 = tr1 - isign*tr4;
ci2 = ti1 + isign*ti4;
ci4 = ti1 - isign*ti4;
ch[ah + l1*ido] = wa1[i]*cr2 - isign*wa1[i + 1]*ci2;
ch[ah + l1*ido + 1] = wa1[i]*ci2 + isign*wa1[i + 1]*cr2;
ch[ah + 2*l1*ido] = wa2[i]*cr3 - isign*wa2[i + 1]*ci3;
ch[ah + 2*l1*ido + 1] = wa2[i]*ci3 + isign*wa2[i + 1]*cr3;
ch[ah + 3*l1*ido] = wa3[i]*cr4 -isign*wa3[i + 1]*ci4;
ch[ah + 3*l1*ido + 1] = wa3[i]*ci4 + isign*wa3[i + 1]*cr4;
}
}
}
}
static void
fftpack_passf5(int ido,
int l1,
RealOpenMM cc[],
RealOpenMM ch[],
RealOpenMM wa1[],
RealOpenMM wa2[],
RealOpenMM wa3[],
RealOpenMM wa4[],
int isign)
{
const RealOpenMM tr11 = 0.309016994374947;
const RealOpenMM ti11 = 0.951056516295154;
const RealOpenMM tr12 = -0.809016994374947;
const RealOpenMM ti12 = 0.587785252292473;
int i, k, ac, ah;
RealOpenMM ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3,
ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
if (ido == 2)
{
for (k = 1; k <= l1; ++k)
{
ac = (5*k - 4)*ido + 1;
ti5 = cc[ac] - cc[ac + 3*ido];
ti2 = cc[ac] + cc[ac + 3*ido];
ti4 = cc[ac + ido] - cc[ac + 2*ido];
ti3 = cc[ac + ido] + cc[ac + 2*ido];
tr5 = cc[ac - 1] - cc[ac + 3*ido - 1];
tr2 = cc[ac - 1] + cc[ac + 3*ido - 1];
tr4 = cc[ac + ido - 1] - cc[ac + 2*ido - 1];
tr3 = cc[ac + ido - 1] + cc[ac + 2*ido - 1];
ah = (k - 1)*ido;
ch[ah] = cc[ac - ido - 1] + tr2 + tr3;
ch[ah + 1] = cc[ac - ido] + ti2 + ti3;
cr2 = cc[ac - ido - 1] + tr11*tr2 + tr12*tr3;
ci2 = cc[ac - ido] + tr11*ti2 + tr12*ti3;
cr3 = cc[ac - ido - 1] + tr12*tr2 + tr11*tr3;
ci3 = cc[ac - ido] + tr12*ti2 + tr11*ti3;
cr5 = isign*(ti11*tr5 + ti12*tr4);
ci5 = isign*(ti11*ti5 + ti12*ti4);
cr4 = isign*(ti12*tr5 - ti11*tr4);
ci4 = isign*(ti12*ti5 - ti11*ti4);
ch[ah + l1*ido] = cr2 - ci5;
ch[ah + 4*l1*ido] = cr2 + ci5;
ch[ah + l1*ido + 1] = ci2 + cr5;
ch[ah + 2*l1*ido + 1] = ci3 + cr4;
ch[ah + 2*l1*ido] = cr3 - ci4;
ch[ah + 3*l1*ido] = cr3 + ci4;
ch[ah + 3*l1*ido + 1] = ci3 - cr4;
ch[ah + 4*l1*ido + 1] = ci2 - cr5;
}
}
else
{
for (k=1; k<=l1; k++)
{
for (i=0; i<ido-1; i+=2)
{
ac = i + 1 + (k*5 - 4)*ido;
ti5 = cc[ac] - cc[ac + 3*ido];
ti2 = cc[ac] + cc[ac + 3*ido];
ti4 = cc[ac + ido] - cc[ac + 2*ido];
ti3 = cc[ac + ido] + cc[ac + 2*ido];
tr5 = cc[ac - 1] - cc[ac + 3*ido - 1];
tr2 = cc[ac - 1] + cc[ac + 3*ido - 1];
tr4 = cc[ac + ido - 1] - cc[ac + 2*ido - 1];
tr3 = cc[ac + ido - 1] + cc[ac + 2*ido - 1];
ah = i + (k - 1)*ido;
ch[ah] = cc[ac - ido - 1] + tr2 + tr3;
ch[ah + 1] = cc[ac - ido] + ti2 + ti3;
cr2 = cc[ac - ido - 1] + tr11*tr2 + tr12*tr3;
ci2 = cc[ac - ido] + tr11*ti2 + tr12*ti3;
cr3 = cc[ac - ido - 1] + tr12*tr2 + tr11*tr3;
ci3 = cc[ac - ido] + tr12*ti2 + tr11*ti3;
cr5 = isign*(ti11*tr5 + ti12*tr4);
ci5 = isign*(ti11*ti5 + ti12*ti4);
cr4 = isign*(ti12*tr5 - ti11*tr4);
ci4 = isign*(ti12*ti5 - ti11*ti4);
dr3 = cr3 - ci4;
dr4 = cr3 + ci4;
di3 = ci3 + cr4;
di4 = ci3 - cr4;
dr5 = cr2 + ci5;
dr2 = cr2 - ci5;
di5 = ci2 - cr5;
di2 = ci2 + cr5;
ch[ah + l1*ido] = wa1[i]*dr2 - isign*wa1[i+1]*di2;
ch[ah + l1*ido + 1] = wa1[i]*di2 + isign*wa1[i+1]*dr2;
ch[ah + 2*l1*ido] = wa2[i]*dr3 - isign*wa2[i+1]*di3;
ch[ah + 2*l1*ido + 1] = wa2[i]*di3 + isign*wa2[i+1]*dr3;
ch[ah + 3*l1*ido] = wa3[i]*dr4 - isign*wa3[i+1]*di4;
ch[ah + 3*l1*ido + 1] = wa3[i]*di4 + isign*wa3[i+1]*dr4;
ch[ah + 4*l1*ido] = wa4[i]*dr5 - isign*wa4[i+1]*di5;
ch[ah + 4*l1*ido + 1] = wa4[i]*di5 + isign*wa4[i+1]*dr5;
}
}
}
}
static void
fftpack_passf(int * nac,
int ido,
int ip,
int l1,
int idl1,
RealOpenMM cc[],
RealOpenMM ch[],
RealOpenMM wa[],
int isign)
{
int idij, idlj, idot, ipph, i, j, k, l, jc, lc, ik, nt, idj, idl, inc,idp;
RealOpenMM wai, war;
idot = ido / 2;
nt = ip*idl1;
ipph = (ip + 1) / 2;
idp = ip*ido;
if (ido >= l1)
{
for (j=1; j<ipph; j++)
{
jc = ip - j;
for (k=0; k<l1; k++)
{
for (i=0; i<ido; i++)
{
ch[i + (k + j*l1)*ido] = cc[i + (j + k*ip)*ido] + cc[i + (jc + k*ip)*ido];
ch[i + (k + jc*l1)*ido] = cc[i + (j + k*ip)*ido] - cc[i + (jc + k*ip)*ido];
}
}
}
for (k=0; k<l1; k++)
for (i=0; i<ido; i++)
ch[i + k*ido] = cc[i + k*ip*ido];
}
else
{
for (j=1; j<ipph; j++)
{
jc = ip - j;
for (i=0; i<ido; i++)
{
for (k=0; k<l1; k++)
{
ch[i + (k + j*l1)*ido] = cc[i + (j + k*ip)*ido] + cc[i + (jc + k*ip)*ido];
ch[i + (k + jc*l1)*ido] = cc[i + (j + k*ip)*ido] - cc[i + (jc + k*ip)*ido];
}
}
}
for (i=0; i<ido; i++)
for (k=0; k<l1; k++)
ch[i + k*ido] = cc[i + k*ip*ido];
}
idl = 2 - ido;
inc = 0;
for (l=1; l<ipph; l++)
{
lc = ip - l;
idl += ido;
for (ik=0; ik<idl1; ik++)
{
cc[ik + l*idl1] = ch[ik] + wa[idl - 2]*ch[ik + idl1];
cc[ik + lc*idl1] = isign*wa[idl-1]*ch[ik + (ip-1)*idl1];
}
idlj = idl;
inc += ido;
for (j=2; j<ipph; j++)
{
jc = ip - j;
idlj += inc;
if (idlj > idp) idlj -= idp;
war = wa[idlj - 2];
wai = wa[idlj-1];
for (ik=0; ik<idl1; ik++)
{
cc[ik + l*idl1] += war*ch[ik + j*idl1];
cc[ik + lc*idl1] += isign*wai*ch[ik + jc*idl1];
}
}
}
for (j=1; j<ipph; j++)
for (ik=0; ik<idl1; ik++)
ch[ik] += ch[ik + j*idl1];
for (j=1; j<ipph; j++)
{
jc = ip - j;
for (ik=1; ik<idl1; ik+=2)
{
ch[ik - 1 + j*idl1] = cc[ik - 1 + j*idl1] - cc[ik + jc*idl1];
ch[ik - 1 + jc*idl1] = cc[ik - 1 + j*idl1] + cc[ik + jc*idl1];
ch[ik + j*idl1] = cc[ik + j*idl1] + cc[ik - 1 + jc*idl1];
ch[ik + jc*idl1] = cc[ik + j*idl1] - cc[ik - 1 + jc*idl1];
}
}
*nac = 1;
if (ido == 2)
return;
*nac = 0;
for (ik=0; ik<idl1; ik++)
{
cc[ik] = ch[ik];
}
for (j=1; j<ip; j++)
{
for (k=0; k<l1; k++)
{
cc[(k + j*l1)*ido + 0] = ch[(k + j*l1)*ido + 0];
cc[(k + j*l1)*ido + 1] = ch[(k + j*l1)*ido + 1];
}
}
if (idot <= l1)
{
idij = 0;
for (j=1; j<ip; j++)
{
idij += 2;
for (i=3; i<ido; i+=2)
{
idij += 2;
for (k=0; k<l1; k++)
{
cc[i - 1 + (k + j*l1)*ido] =
wa[idij - 2]*ch[i - 1 + (k + j*l1)*ido] -
isign*wa[idij-1]*ch[i + (k + j*l1)*ido];
cc[i + (k + j*l1)*ido] =
wa[idij - 2]*ch[i + (k + j*l1)*ido] +
isign*wa[idij-1]*ch[i - 1 + (k + j*l1)*ido];
}
}
}
}
else
{
idj = 2 - ido;
for (j=1; j<ip; j++)
{
idj += ido;
for (k = 0; k < l1; k++)
{
idij = idj;
for (i=3; i<ido; i+=2)
{
idij += 2;
cc[i - 1 + (k + j*l1)*ido] =
wa[idij - 2]*ch[i - 1 + (k + j*l1)*ido] -
isign*wa[idij-1]*ch[i + (k + j*l1)*ido];
cc[i + (k + j*l1)*ido] =
wa[idij - 2]*ch[i + (k + j*l1)*ido] +
isign*wa[idij-1]*ch[i - 1 + (k + j*l1)*ido];
}
}
}
}
}
static void
fftpack_cfftf1(int n,
RealOpenMM c[],
RealOpenMM ch[],
RealOpenMM wa[],
int ifac[15],
int isign)
{
int idot, i;
int k1, l1, l2;
int na, nf, ip, iw, ix2, ix3, ix4, nac, ido, idl1;
RealOpenMM *cinput, *coutput;
nf = ifac[1];
na = 0;
l1 = 1;
iw = 0;
for (k1=2; k1<=nf+1; k1++)
{
ip = ifac[k1];
l2 = ip*l1;
ido = n / l2;
idot = ido + ido;
idl1 = idot*l1;
if (na)
{
cinput = ch;
coutput = c;
}
else
{
cinput = c;
coutput = ch;
}
switch (ip)
{
case 4:
ix2 = iw + idot;
ix3 = ix2 + idot;
fftpack_passf4(idot, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3], isign);
na = !na;
break;
case 2:
fftpack_passf2(idot, l1, cinput, coutput, &wa[iw], isign);
na = !na;
break;
case 3:
ix2 = iw + idot;
fftpack_passf3(idot, l1, cinput, coutput, &wa[iw], &wa[ix2], isign);
na = !na;
break;
case 5:
ix2 = iw + idot;
ix3 = ix2 + idot;
ix4 = ix3 + idot;
fftpack_passf5(idot, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign);
na = !na;
break;
default:
fftpack_passf(&nac, idot, ip, l1, idl1, cinput, coutput, &wa[iw], isign);
if (nac != 0) na = !na;
}
l1 = l2;
iw += (ip - 1)*idot;
}
if (na == 0)
return;
for (i=0; i<2*n; i++)
c[i] = ch[i];
}
static void
fftpack_factorize(int n,
int ifac[15])
{
static const int ntryh[4] = { 3,4,2,5 };
int ntry=3, i, j=0, ib, nf=0, nl=n, nq, nr;
startloop:
if (j < 4)
ntry = ntryh[j];
else
ntry+= 2;
j++;
do
{
nq = nl / ntry;
nr = nl - ntry*nq;
if (nr != 0) goto startloop;
nf++;
ifac[nf + 1] = ntry;
nl = nq;
if (ntry == 2 && nf != 1)
{
for (i=2; i<=nf; i++)
{
ib = nf - i + 2;
ifac[ib + 1] = ifac[ib];
}
ifac[2] = 2;
}
}
while (nl != 1);
ifac[0] = n;
ifac[1] = nf;
}
static void
fftpack_cffti1(int n,
RealOpenMM wa[],
int ifac[15])
{
const RealOpenMM twopi = 6.28318530717959;
RealOpenMM arg, argh, argld, fi;
int idot, i, j;
int i1, k1, l1, l2;
int ld, ii, nf, ip;
int ido, ipm;
fftpack_factorize(n,ifac);
nf = ifac[1];
argh = twopi/(RealOpenMM)n;
i = 1;
l1 = 1;
for (k1=1; k1<=nf; k1++)
{
ip = ifac[k1+1];
ld = 0;
l2 = l1*ip;
ido = n / l2;
idot = ido + ido + 2;
ipm = ip - 1;
for (j=1; j<=ipm; j++)
{
i1 = i;
wa[i-1] = 1;
wa[i] = 0;
ld += l1;
fi = 0;
argld = ld*argh;
for (ii=4; ii<=idot; ii+=2)
{
i+= 2;
fi+= 1;
arg = fi*argld;
wa[i-1] = cos(arg);
wa[i] = sin(arg);
}
if (ip > 5)
{
wa[i1-1] = wa[i-1];
wa[i1] = wa[i];
}
}
l1 = l2;
}
}
static int
fftpack_transpose_2d(t_complex * in_data,
t_complex * out_data,
int nx,
int ny)
{
t_complex * src;
int i,j;
if(nx<2 || ny<2)
{
if(in_data != out_data)
{
memcpy(out_data,in_data,sizeof(t_complex)*nx*ny);
}
return 0;
}
if(in_data == out_data)
{
src = (t_complex *)malloc(sizeof(t_complex)*nx*ny);
memcpy(src,in_data,sizeof(t_complex)*nx*ny);
}
else
{
src = in_data;
}
for(i=0;i<nx;i++)
{
for(j=0;j<ny;j++)
{
out_data[j*nx+i].re = src[i*ny+j].re;
out_data[j*nx+i].im = src[i*ny+j].im;
}
}
if(src != in_data)
{
free(src);
}
return 0;
}
static int
fftpack_transpose_2d_nelem(t_complex * in_data,
t_complex * out_data,
int nx,
int ny,
int nelem)
{
t_complex * src;
int ncopy;
int i,j;
ncopy = nelem*sizeof(t_complex);
if(nx<2 || ny<2)
{
if(in_data != out_data)
{
memcpy(out_data,in_data,nx*ny*ncopy);
}
return 0;
}
if(in_data == out_data)
{
src = (t_complex *)malloc(nx*ny*ncopy);
memcpy(src,in_data,nx*ny*ncopy);
}
else
{
src = in_data;
}
for(i=0;i<nx;i++)
{
for(j=0;j<ny;j++)
{
memcpy(out_data + (j*nx+i)*nelem , src + (i*ny+j)*nelem , ncopy);
}
}
if(src != in_data)
{
free(src);
}
return 0;
}
int
fftpack_init_1d(fftpack_t * pfft,
int nx)
{
fftpack_t fft;
if(pfft==NULL)
{
fprintf(stderr,"Fatal error - Invalid FFT opaque type pointer.");
return EINVAL;
}
*pfft = NULL;
if( (fft = (struct fftpack *)malloc(sizeof(struct fftpack))) == NULL)
{
return ENOMEM;
}
fft->next = NULL;
fft->n = nx;
/* Need 4*n storage for 1D complex FFT */
if( (fft->work = (RealOpenMM *)malloc(sizeof(RealOpenMM)*(4*nx))) == NULL)
{
free(fft);
return ENOMEM;
}
if(fft->n>1)
fftpack_cffti1(nx,fft->work,fft->ifac);
*pfft = fft;
return 0;
};
int
fftpack_init_2d(fftpack_t * pfft,
int nx,
int ny)
{
fftpack_t fft;
int rc;
if(pfft==NULL)
{
fprintf(stderr,"Fatal error - Invalid FFT opaque type pointer.");
return EINVAL;
}
*pfft = NULL;
/* Create the X transform */
if( (rc = fftpack_init_1d(&fft,nx)) != 0)
{
return rc;
}
/* Create Y transform as a link from X */
if( (rc=fftpack_init_1d(&(fft->next),ny)) != 0)
{
free(fft);
return rc;
}
*pfft = fft;
return 0;
};
int
fftpack_init_3d(fftpack_t * pfft,
int nx,
int ny,
int nz)
{
fftpack_t fft;
int rc;
if(pfft==NULL)
{
fprintf(stderr,"Fatal error - Invalid FFT opaque type pointer.");
return EINVAL;
}
*pfft = NULL;
/* Create the X transform */
if( (fft = (struct fftpack *)malloc(sizeof(struct fftpack))) == NULL)
{
return ENOMEM;
}
fft->n = nx;
/* Need 4*nx storage for 1D complex FFT.
*/
if( (fft->work = (RealOpenMM *)malloc(sizeof(RealOpenMM)*(4*nx))) == NULL)
{
free(fft);
return ENOMEM;
}
fftpack_cffti1(nx,fft->work,fft->ifac);
/* Create 2D Y/Z transforms as a link from X */
if( (rc=fftpack_init_2d(&(fft->next),ny,nz)) != 0)
{
free(fft);
return rc;
}
*pfft = fft;
return 0;
};
int
fftpack_exec_1d (fftpack_t fft,
enum fftpack_direction dir,
t_complex * in_data,
t_complex * out_data)
{
int i,n;
RealOpenMM * p1;
RealOpenMM * p2;
n=fft->n;
if(n==1)
{
p1 = (RealOpenMM *)in_data;
p2 = (RealOpenMM *)out_data;
p2[0] = p1[0];
p2[1] = p1[1];
}
/* FFTPACK only does in-place transforms, so emulate out-of-place
* by copying data to the output array first.
*/
if( in_data != out_data )
{
p1 = (RealOpenMM *)in_data;
p2 = (RealOpenMM *)out_data;
/* n complex = 2*n RealOpenMM elements */
for(i=0;i<2*n;i++)
{
p2[i] = p1[i];
}
}
/* Elements 0 .. 2*n-1 in work are used for ffac values,
* Elements 2*n .. 4*n-1 are internal FFTPACK work space.
*/
if(dir == FFTPACK_FORWARD)
{
fftpack_cfftf1(n,(RealOpenMM *)out_data,fft->work+2*n,fft->work,fft->ifac, -1);
}
else if(dir == FFTPACK_BACKWARD)
{
fftpack_cfftf1(n,(RealOpenMM *)out_data,fft->work+2*n,fft->work,fft->ifac, 1);
}
else
{
fprintf(stderr,"FFT plan mismatch - bad plan or direction.");
return EINVAL;
}
return 0;
}
int
fftpack_exec_2d (fftpack_t fft,
enum fftpack_direction dir,
t_complex * in_data,
t_complex * out_data)
{
int i,nx,ny;
t_complex * data;
nx = fft->n;
ny = fft->next->n;
/* FFTPACK only does in-place transforms, so emulate out-of-place
* by copying data to the output array first.
* For 2D there is likely enough data to benefit from memcpy().
*/
if( in_data != out_data )
{
memcpy(out_data,in_data,sizeof(t_complex)*nx*ny);
}
/* Much easier to do pointer arithmetic when base has the correct type */
data = (t_complex *)out_data;
/* y transforms */
for(i=0;i<nx;i++)
{
fftpack_exec_1d(fft->next,dir,data+i*ny,data+i*ny);
}
/* Transpose in-place to get data in place for x transform now */
fftpack_transpose_2d(data,data,nx,ny);
/* x transforms */
for(i=0;i<ny;i++)
{
fftpack_exec_1d(fft,dir,data+i*nx,data+i*nx);
}
/* Transpose in-place to get data back in original order */
fftpack_transpose_2d(data,data,ny,nx);
return 0;
}
int
fftpack_exec_3d (fftpack_t fft,
enum fftpack_direction dir,
t_complex * in_data,
t_complex * out_data)
{
int i,nx,ny,nz,rc;
t_complex * data;
nx=fft->n;
ny=fft->next->n;
nz=fft->next->next->n;
/* FFTPACK only does in-place transforms, so emulate out-of-place
* by copying data to the output array first.
* For 3D there is likely enough data to benefit from memcpy().
*/
if( in_data != out_data )
{
memcpy(out_data,in_data,sizeof(t_complex)*nx*ny*nz);
}
/* Much easier to do pointer arithmetic when base has the correct type */
data = (t_complex *)out_data;
/* Perform z transforms */
for(i=0;i<nx*ny;i++)
fftpack_exec_1d(fft->next->next,dir,data+i*nz,data+i*nz);
/* For each X slice, transpose the y & z dimensions inside the slice */
for(i=0;i<nx;i++)
{
fftpack_transpose_2d(data+i*ny*nz,data+i*ny*nz,ny,nz);
}
/* Array is now (nx,nz,ny) - perform y transforms */
for(i=0;i<nx*nz;i++)
{
fftpack_exec_1d(fft->next,dir,data+i*ny,data+i*ny);
}
/* Transpose back to (nx,ny,nz) */
for(i=0;i<nx;i++)
{
fftpack_transpose_2d(data+i*ny*nz,data+i*ny*nz,nz,ny);
}
/* Transpose entire x & y slices to go from
* (nx,ny,nz) to (ny,nx,nz).
*/
rc=fftpack_transpose_2d_nelem(data,data,nx,ny,nz);
if( rc != 0)
{
fprintf(stderr,"Fatal error - cannot transpose X & Y/Z in fftpack_exec_3d().");
return rc;
}
/* Then go from (ny,nx,nz) to (ny,nz,nx) */
for(i=0;i<ny;i++)
{
fftpack_transpose_2d(data+i*nx*nz,data+i*nx*nz,nx,nz);
}
/* Perform x transforms */
for(i=0;i<ny*nz;i++)
{
fftpack_exec_1d(fft,dir,data+i*nx,data+i*nx);
}
/* Transpose back from (ny,nz,nx) to (ny,nx,nz) */
for(i=0;i<ny;i++)
{
fftpack_transpose_2d(data+i*nz*nx,data+i*nz*nx,nz,nx);
}
/* Transpose from (ny,nx,nz) to (nx,ny,nz).
*/
rc = fftpack_transpose_2d_nelem(data,data,ny,nx,nz);
if( rc != 0)
{
fprintf(stderr,"Fatal error - cannot transpose Y/Z & X in fftpack_exec_3d().");
return rc;
}
return 0;
}
void
fftpack_destroy(fftpack_t fft)
{
if(fft != NULL)
{
free(fft->work);
if(fft->next != NULL)
fftpack_destroy(fft->next);
free(fft);
}
}
/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
*
*
* Gromacs 4.0 Copyright (c) 1991-2003
* David van der Spoel, Erik Lindahl, University of Groningen.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org
*
* And Hey:
* Gnomes, ROck Monsters And Chili Sauce
*/
#include "../SimTKUtilities/SimTKOpenMMCommon.h"
#ifndef _FFTPACK_H_
#define _FFTPACK_H_
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
#if 0
} /* fixes auto-indentation problems */
#endif
typedef struct {
RealOpenMM re;
RealOpenMM im;
} t_complex;
/*! \brief Datatype for FFT setup
*
* The fftpack_t type contains all the setup information, e.g. twiddle
* factors, necessary to perform an FFT. Internally it is mapped to
* whatever FFT library we are using, or the built-in FFTPACK if no fast
* external library is available.
*/
typedef struct fftpack *
fftpack_t;
/*! \brief Specifier for FFT direction.
*
* The definition of the 1D forward transform from input x[] to output y[] is
* \f[
* y_{k} = \sum_{j=0}^{N-1} x_{j} \exp{-i 2 \pi j k /N}
* \f]
*
* while the corresponding backward transform is
*
* \f[
* y_{k} = \sum_{j=0}^{N-1} x_{j} \exp{i 2 \pi j k /N}
* \f]
*
* A forward-backward transform pair will this result in data scaled by N.
*
*/
typedef enum fftpack_direction
{
FFTPACK_FORWARD, /*!< Forward complex-to-complex transform */
FFTPACK_BACKWARD, /*!< Backward complex-to-complex transform */
} fftpack_direction;
/*! \brief Setup a 1-dimensional complex-to-complex transform
*
* \param fft Pointer to opaque Gromacs FFT datatype
* \param nx Length of transform
*
* \return status - 0 or a standard error message.
*/
int
fftpack_init_1d (fftpack_t * fft,
int nx);
/*! \brief Setup a 2-dimensional complex-to-complex transform
*
* \param fft Pointer to opaque Gromacs FFT datatype
* \param nx Length of transform in first dimension
* \param ny Length of transform in second dimension
*
* \return status - 0 or a standard error message.
*
*/
int
fftpack_init_2d (fftpack_t * fft,
int nx,
int ny);
/*! \brief Setup a 3-dimensional complex-to-complex transform
*
* \param fft Pointer to opaque Gromacs FFT datatype
* \param nx Length of transform in first dimension
* \param ny Length of transform in second dimension
* \param nz Length of transform in third dimension
*
* \return status - 0 or a standard error message.
*
*/
int
fftpack_init_3d (fftpack_t * fft,
int nx,
int ny,
int nz);
/*! \brief Perform a 1-dimensional complex-to-complex transform
*
* Performs an instance of a transform previously initiated.
*
* \param setup Setup returned from fftpack_init_1d()
* \param dir Forward or Backward
* \param in_data Input grid data.
* \param out_data Output grid data.
* You can provide the same pointer for in_data and out_data
* to perform an in-place transform.
*
* \return 0 on success, or an error code.
*
* \note Data pointers are declared as void, to avoid casting pointers
* depending on your grid type.
*/
int
fftpack_exec_1d (fftpack_t setup,
enum fftpack_direction dir,
t_complex * in_data,
t_complex * out_data);
/*! \brief Perform a 2-dimensional complex-to-complex transform
*
* Performs an instance of a transform previously initiated.
*
* \param setup Setup returned from fftpack_init_1d()
* \param dir Forward or Backward
* \param in_data Input grid data.
* \param out_data Output grid data.
* You can provide the same pointer for in_data and out_data
* to perform an in-place transform.
*
* \return 0 on success, or an error code.
*
* \note Data pointers are declared as void, to avoid casting pointers
* depending on your grid type.
*/
int
fftpack_exec_2d (fftpack_t setup,
enum fftpack_direction dir,
t_complex * in_data,
t_complex * out_data);
/*! \brief Perform a 3-dimensional complex-to-complex transform
*
* Performs an instance of a transform previously initiated.
*
* \param setup Setup returned from fftpack_init_1d()
* \param dir Forward or Backward
* \param in_data Input grid data.
* \param out_data Output grid data.
* You can provide the same pointer for in_data and out_data
* to perform an in-place transform.
*
* \return 0 on success, or an error code.
*
* \note Data pointers are declared as void, to avoid casting pointers
* depending on your grid type.
*/
int
fftpack_exec_3d (fftpack_t setup,
enum fftpack_direction dir,
t_complex * in_data,
t_complex * out_data);
/*! \brief Release an FFT setup structure
*
* Destroy setup and release all allocated memory.
*
* \param setup Setup returned from fftpack_init_1d(), or one
* of the other initializers.
*
*/
void
fftpack_destroy (fftpack_t setup);
#ifdef __cplusplus
}
#endif
#endif /* _FFTPACK_H_ */
/*
* Reference implementation of PME reciprocal space interactions.
*
* Copyright (c) 2009, Erik Lindahl, Rossen Apostolov, Szilard Pall
* All rights reserved.
* Contact: lindahl@cbr.su.se Stockholm University, Sweden.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer. Redistributions in binary
* form must reproduce the above copyright notice, this list of conditions and
* the following disclaimer in the documentation and/or other materials provided
* with the distribution.
* Neither the name of the author/university nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "pme.h"
#include "fftpack.h"
//#define ONE_4PI_EPS0 (33.20636930*4.184) /* Units of kJ/mol and nm */
// In OpenMM, atom charges are already scaled by sqrt(ONE_4PI_EPS0), so we don't need this
#define ONE_4PI_EPS0 (1.0) /* Units of kJ/mol and nm */
typedef int ivec[3];
struct pme
{
int natoms;
RealOpenMM ewaldcoeff;
t_complex * grid; /* Memory for the grid we spread charges on.
* Element (i,j,k) is accessed as:
* grid[i*ngrid[1]*ngrid[2] + j*ngrid[2] + k]
*/
int ngrid[3]; /* Total grid dimensions (all data is complex!) */
fftpack_t fftplan; /* Handle to fourier transform setup */
int order; /* PME interpolation order. Almost always 4 */
/* Data for bspline interpolation, see the Essman PME paper */
RealOpenMM * bsplines_moduli[3]; /* 3 pointers, to x/y/z bspline moduli, each of length ngrid[x/y/z] */
RealOpenMM * bsplines_theta[3]; /* each of x/y/z has length order*natoms */
RealOpenMM * bsplines_dtheta[3]; /* each of x/y/z has length order*natoms */
ivec * particleindex; /* Array of length natoms. Each element is
* an ivec (3 ints) that specify the grid
* indices for that particular atom. Updated every step!
*/
rvec * particlefraction; /* Array of length natoms. Fractional offset in the grid for
* each atom in all three dimensions.
*/
/* Further explanation of index/fraction:
*
* Assume we have a cell of size 10*10*10nm, and a total grid dimension of 100*100*100 cells.
* In other words, each cell is 0.1*0.1*0.1 nm.
*
* If particle i has coordinates { 0.543 , 6.235 , -0.73 }, we will get:
*
* particleindex[i] = { 5 , 62 , 92 } (-0.73 + 10 = 9.27, we always apply PBC for grid calculations!)
* particlefraction[i] = { 0.43 , 0.35 , 0.7 } ( this is the fraction of the cell length where the atom is)
*
* (The reason for precaculating / storing these is that it gets a bit more complex for triclinic cells :-)
*
* In the current code version we might assume that a particle is not more than a whole box length away from
* the central cell, i.e., in this case we would assume all coordinates fall in -10 nm < x,y,z < 20 nm.
*/
RealOpenMM epsilon_r; /* Dielectric coefficient to use, typically 1.0 */
};
/* Internal setup routines */
/* Only called once from init_pme(), performance does not matter! */
static void
pme_calculate_bsplines_moduli(pme_t pme)
{
int nmax;
int i,j,k,l,d;
int order;
int ndata;
RealOpenMM * data;
RealOpenMM * ddata;
RealOpenMM * bsplines_data;
RealOpenMM div;
RealOpenMM sc,ss,arg;
nmax = 0;
for(d=0;d<3;d++)
{
nmax = (pme->ngrid[d] > nmax) ? pme->ngrid[d] : nmax;
pme->bsplines_moduli[d] = (RealOpenMM *) malloc(sizeof(RealOpenMM)*pme->ngrid[d]);
}
order = pme->order;
/* temp storage in this routine */
data = (RealOpenMM *) malloc(sizeof(RealOpenMM)*order);
ddata = (RealOpenMM *) malloc(sizeof(RealOpenMM)*order);
bsplines_data = (RealOpenMM *) malloc(sizeof(RealOpenMM)*nmax);
data[order-1]=0;
data[1]=0;
data[0]=1;
for(k=3;k<order;k++)
{
div=1.0/(k-1.0);
data[k-1]=0;
for(l=1;l<(k-1);l++)
{
data[k-l-1]=div*(l*data[k-l-2]+(k-l)*data[k-l-1]);
}
data[0]=div*data[0];
}
/* differentiate */
ddata[0]=-data[0];
for(k=1;k<order;k++)
{
ddata[k]=data[k-1]-data[k];
}
div=1.0/(order-1);
data[order-1]=0;
for(l=1;l<(order-1);l++)
{
data[order-l-1]=div*(l*data[order-l-2]+(order-l)*data[order-l-1]);
}
data[0]=div*data[0];
for(i=0;i<nmax;i++)
{
bsplines_data[i]=0;
}
for(i=1;i<=order;i++)
{
bsplines_data[i]=data[i-1];
}
/* Evaluate the actual bspline moduli for X/Y/Z */
for(d=0;d<3;d++)
{
ndata = pme->ngrid[d];
for(i=0;i<ndata;i++)
{
sc=ss=0;
for(j=0;j<ndata;j++)
{
arg=(2.0*M_PI*i*j)/ndata;
sc+=bsplines_data[j]*cos(arg);
ss+=bsplines_data[j]*sin(arg);
}
pme->bsplines_moduli[d][i]=sc*sc+ss*ss;
}
for(i=0;i<ndata;i++)
{
if(pme->bsplines_moduli[d][i]<1.0e-7)
{
pme->bsplines_moduli[d][i]=(pme->bsplines_moduli[d][i-1]+pme->bsplines_moduli[d][i+1])*0.5;
}
}
}
/* Release temp storage */
free(data);
free(ddata);
free(bsplines_data);
}
static void
pme_update_grid_index_and_fraction(pme_t pme,
RealOpenMM ** atomCoordinates,
const RealOpenMM periodicBoxSize[3])
{
int i;
int d;
RealOpenMM t;
int ti;
for(i=0;i<pme->natoms;i++)
{
for(d=0;d<3;d++)
{
/* Index calculation (Look mom, no conditionals!):
*
* Both for Cuda and modern CPUs it is nice to avoid conditionals, but we still need to apply periodic boundary conditions.
* Instead of having loops to add/subtract the box dimension, we do it this way:
*
* 1. First add the box size, to make sure this atom coordinate isnt -0.1 or something.
* After this we assume all fractional box positions are *positive*.
* The reason for this is that we always want to round coordinates _down_ to get
* their grid index, and when taking the integer part of -3.4 we would get -3, not -4 as we want.
* Since we anyway need the grid indices to fall in the central box, it is more convenient
* to first manipulate the coordinates to be positive.
* 2. Convert to integer grid index
* Since we have added a whole box unit in step 1, this index might actually be larger than
* the grid dimension. Examples, assuming 10*10*10nm box and grid dimension 100*100*100 (spacing 0.1 nm):
*
* coordinate is { 0.543 , 6.235 , -0.73 }
*
* x[i][d]/box[d] becomes { 0.0543 , 0.6235 , -0.073 }
* (x[i][d]/box[d] + 1.0) becomes { 1.0543 , 1.6235 , 0.927 }
* (x[i][d]/box[d] + 1.0)*ngrid[d] becomes { 105.43 , 162.35 , 92.7 }
*
* integer part is now { 105 , 162 , 92 }
*
* The fraction is calculates as t-ti, which becomes { 0.43 , 0.35 , 0.7 }
*
* 3. Take the first integer index part (which can be larger than the grid) modulo the grid dimension
*
* Now we get { 5 , 62 , 92 }
*
* Voila, both index and fraction, entirely without conditionals. The one limitation here is that
* we only add one box length, so if the particle had a coordinate <=-10.0, we would be screwed.
* In principle we can of course add 100.0, but that just moves the problem, it doesnt solve it.
* In practice, MD programs will apply PBC to reset particles inside the central box to avoid
* numerical problems, so this shouldnt cause any problems.
* (And, by adding 100.0 box lengths, we would lose a bit of numerical accuracy here!)
*/
t = (atomCoordinates[i][d] / periodicBoxSize[d] + 1.0)*pme->ngrid[d];
ti = t;
pme->particlefraction[i][d] = t - ti;
pme->particleindex[i][d] = ti % pme->ngrid[d];
}
}
}
/* Ugly bspline calculation taken from Tom Dardens reference equations.
* This probably very sub-optimal in Cuda? Separate kernel?
*
* In practice, it might help to require order=4 for the cuda port.
*/
static void
pme_update_bsplines(pme_t pme)
{
int i,j,k,l;
int order;
RealOpenMM dr,div;
RealOpenMM * data;
RealOpenMM * ddata;
order = pme->order;
for(i=0; (i<pme->natoms); i++)
{
for(j=0; j<3; j++)
{
/* dr is relative offset from lower cell limit */
dr = pme->particlefraction[i][j];
data = &(pme->bsplines_theta[j][i*order]);
ddata = &(pme->bsplines_dtheta[j][i*order]);
data[order-1] = 0;
data[1] = dr;
data[0] = 1-dr;
for(k=3; k<order; k++)
{
div = 1.0/(k-1.0);
data[k-1] = div*dr*data[k-2];
for(l=1; l<(k-1); l++)
{
data[k-l-1] = div*((dr+l)*data[k-l-2]+(k-l-dr)*data[k-l-1]);
}
data[0] = div*(1-dr)*data[0];
}
/* differentiate */
ddata[0] = -data[0];
for(k=1; k<order; k++)
{
ddata[k] = data[k-1]-data[k];
}
div = 1.0/(order-1);
data[order-1] = div*dr*data[order-2];
for(l=1; l<(order-1); l++)
{
data[order-l-1] = div*((dr+l)*data[order-l-2]+(order-l-dr)*data[order-l-1]);
}
data[0] = div*(1-dr)*data[0];
}
}
}
static void
pme_grid_spread_charge(pme_t pme,
RealOpenMM ** atomParameters)
{
static const int QIndex = 2; // atom charges are stored in atomParameters[atomID][2]
int order;
int i;
int ix,iy,iz;
int x0index,y0index,z0index;
int xindex,yindex,zindex;
int index;
RealOpenMM q;
RealOpenMM * thetax;
RealOpenMM * thetay;
RealOpenMM * thetaz;
order = pme->order;
/* Reset the grid */
for(i=0;i<pme->ngrid[0]*pme->ngrid[1]*pme->ngrid[2];i++)
{
pme->grid[i].re = pme->grid[i].im = 0;
}
for(i=0;i<pme->natoms;i++)
{
q = atomParameters[i][QIndex];
/* Grid index for the actual atom position */
x0index = pme->particleindex[i][0];
y0index = pme->particleindex[i][1];
z0index = pme->particleindex[i][2];
/* Bspline factors for this atom in each dimension , calculated from fractional coordinates */
thetax = &(pme->bsplines_theta[0][i*order]);
thetay = &(pme->bsplines_theta[1][i*order]);
thetaz = &(pme->bsplines_theta[2][i*order]);
/* Loop over norder*norder*norder (typically 4*4*4) neighbor cells.
*
* As a neat optimization, we only spread in the forward direction, but apply PBC!
*
* Since we are going to do an FFT on the grid, it doesnt matter where the data is,
* in frequency space the result will be the same.
*
* So, the influence function (bsplines) will probably be something like (0.15,0.35,0.35,0.15),
* with largest weight 2-3 steps forward (you dont need to understand that for the implementation :-)
* Effectively, you can look at this as translating the entire grid.
*
* Why do we do this stupid thing?
*
* 1) The loops get much simpler
* 2) Just looking forward will hopefully get us more cache hits
* 3) When we parallelize things, we only need to communicate in one direction instead of two!
*/
for(ix=0;ix<order;ix++)
{
/* Calculate index, apply PBC so we spread to index 0/1/2 when a particle is close to the upper limit of the grid */
xindex = (x0index + ix) % pme->ngrid[0];
for(iy=0;iy<order;iy++)
{
yindex = (y0index + iy) % pme->ngrid[1];
for(iz=0;iz<order;iz++)
{
/* Can be optimized, but we keep it simple here */
zindex = (z0index + iz) % pme->ngrid[2];
/* Calculate index in the charge grid */
index = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex;
/* Add the charge times the bspline spread/interpolation factors to this grid position */
pme->grid[index].re += q*thetax[ix]*thetay[iy]*thetaz[iz];
}
}
}
}
}
static void
pme_reciprocal_convolution(pme_t pme,
const RealOpenMM periodicBoxSize[3],
RealOpenMM * energy,
RealOpenMM pme_virial[3][3])
{
int kx,ky,kz;
int nx,ny,nz;
RealOpenMM mx,my,mz;
RealOpenMM mhx,mhy,mhz,m2;
RealOpenMM one_4pi_eps;
RealOpenMM virxx,virxy,virxz,viryy,viryz,virzz;
RealOpenMM bx,by,bz;
RealOpenMM d1,d2;
RealOpenMM eterm,vfactor,struct2,ets2;
RealOpenMM esum;
RealOpenMM factor;
RealOpenMM denom;
RealOpenMM boxfactor;
RealOpenMM maxkx,maxky,maxkz;
t_complex *ptr;
nx = pme->ngrid[0];
ny = pme->ngrid[1];
nz = pme->ngrid[2];
one_4pi_eps=ONE_4PI_EPS0/pme->epsilon_r;
factor=M_PI*M_PI/(pme->ewaldcoeff*pme->ewaldcoeff);
boxfactor = M_PI*periodicBoxSize[0]*periodicBoxSize[1]*periodicBoxSize[2];
esum = 0;
virxx = 0;
virxy = 0;
virxz = 0;
viryy = 0;
viryz = 0;
virzz = 0;
maxkx = (nx+1)/2;
maxky = (ny+1)/2;
maxkz = (nz+1)/2;
for(kx=0;kx<nx;kx++)
{
/* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
mx = (kx<maxkx) ? kx : (kx-nx);
mhx = mx/periodicBoxSize[0];
bx = boxfactor*pme->bsplines_moduli[0][kx];
for(ky=0;ky<ny;ky++)
{
/* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
my = (ky<maxky) ? ky : (ky-ny);
mhy = my/periodicBoxSize[1];
by = pme->bsplines_moduli[1][ky];
for(kz=0;kz<nz;kz++)
{
/* If the net charge of the system is 0.0, there will not be any DC (direct current, zero frequency) component. However,
* we can still handle charged systems through a charge correction, in which case the DC
* component should be excluded from recprocal space. We will anyway run into problems below when dividing with the
* frequency if it is zero...
*
* In cuda you could probably work around this by setting something to 0.0 instead, but the short story is that we
* should skip the zero frequency case!
*/
if (kx==0 && ky==0 && kz==0)
{
continue;
}
/* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
mz = (kz<maxkz) ? kz : (kz-nz);
mhz = mz/periodicBoxSize[2];
/* Pointer to the grid cell in question */
ptr = pme->grid + kx*ny*nz + ky*nz + kz;
/* Get grid data for this frequency */
d1 = ptr->re;
d2 = ptr->im;
/* Calculate the convolution - see the Essman/Darden paper for the equation! */
m2 = mhx*mhx+mhy*mhy+mhz*mhz;
bz = pme->bsplines_moduli[2][kz];
denom = m2*bx*by*bz;
eterm = one_4pi_eps*exp(-factor*m2)/denom;
/* write back convolution data to grid */
ptr->re = d1*eterm;
ptr->im = d2*eterm;
struct2 = (d1*d1+d2*d2);
/* Long-range PME contribution to the energy for this frequency */
ets2 = eterm*struct2;
esum += ets2;
/* PME long-range contribution to atomic virial. Since it is symmetric, we only calculate half the matrix inside this loop. */
vfactor = (factor*m2+1.0)*2.0/m2;
virxx += ets2*(vfactor*mhx*mhx-1.0);
virxy += ets2*vfactor*mhx*mhy;
virxz += ets2*vfactor*mhx*mhz;
viryy += ets2*(vfactor*mhy*mhy-1.0);
viryz += ets2*vfactor*mhy*mhz;
virzz += ets2*(vfactor*mhz*mhz-1.0);
}
}
}
pme_virial[0][0] = 0.25*virxx;
pme_virial[1][1] = 0.25*viryy;
pme_virial[2][2] = 0.25*virzz;
pme_virial[0][1] = pme_virial[1][0] = 0.25*virxy;
pme_virial[0][2] = pme_virial[2][0] = 0.25*virxz;
pme_virial[1][2] = pme_virial[2][1] = 0.25*viryz;
/* The factor 0.5 is nothing special, but it is better to have it here than inside the loop :-) */
*energy = 0.5*esum;
}
static void
pme_grid_interpolate_force(pme_t pme,
const RealOpenMM periodicBoxSize[3],
RealOpenMM ** atomParameters,
RealOpenMM ** forces)
{
static const int QIndex = 2; // atom charges are stored in atomParameters[atomID][2]
int i;
int ix,iy,iz;
int x0index,y0index,z0index;
int xindex,yindex,zindex;
int index;
int order;
RealOpenMM q;
RealOpenMM * thetax;
RealOpenMM * thetay;
RealOpenMM * thetaz;
RealOpenMM * dthetax;
RealOpenMM * dthetay;
RealOpenMM * dthetaz;
RealOpenMM tx,ty,tz;
RealOpenMM dtx,dty,dtz;
RealOpenMM fx,fy,fz;
RealOpenMM gridvalue;
int nx,ny,nz;
nx = pme->ngrid[0];
ny = pme->ngrid[1];
nz = pme->ngrid[2];
order = pme->order;
/* This is almost identical to the charge spreading routine! */
for(i=0;i<pme->natoms;i++)
{
fx = fy = fz = 0;
q = atomParameters[i][QIndex];
/* Grid index for the actual atom position */
x0index = pme->particleindex[i][0];
y0index = pme->particleindex[i][1];
z0index = pme->particleindex[i][2];
/* Bspline factors for this atom in each dimension , calculated from fractional coordinates */
thetax = &(pme->bsplines_theta[0][i*order]);
thetay = &(pme->bsplines_theta[1][i*order]);
thetaz = &(pme->bsplines_theta[2][i*order]);
dthetax = &(pme->bsplines_dtheta[0][i*order]);
dthetay = &(pme->bsplines_dtheta[1][i*order]);
dthetaz = &(pme->bsplines_dtheta[2][i*order]);
/* See pme_grid_spread_charge() for comments about the order here, and only interpolation in one direction */
/* Since we will add order^3 (typically 4*4*4=64) terms to the force on each particle, we use temporary fx/fy/fz
* variables, and only add it to memory forces[] at the end.
*/
for(ix=0;ix<order;ix++)
{
xindex = (x0index + ix) % pme->ngrid[0];
/* Get both the bspline factor and its derivative with respect to the x coordinate! */
tx = thetax[ix];
dtx = dthetax[ix];
for(iy=0;iy<order;iy++)
{
yindex = (y0index + iy) % pme->ngrid[1];
/* bspline + derivative wrt y */
ty = thetay[iy];
dty = dthetay[iy];
for(iz=0;iz<order;iz++)
{
/* Can be optimized, but we keep it simple here */
zindex = (z0index + iz) % pme->ngrid[2];
/* bspline + derivative wrt z */
tz = thetaz[iz];
dtz = dthetaz[iz];
index = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex;
/* Get the fft+convoluted+ifft:d data from the grid, which must be real by definition */
/* Checking that the imaginary part is indeed zero might be a good check :-) */
gridvalue = pme->grid[index].re;
/* The d component of the force is calculated by taking the derived bspline in dimension d, normal bsplines in the other two */
fx += dtx*ty*tz*gridvalue;
fy += tx*dty*tz*gridvalue;
fz += tx*ty*dtz*gridvalue;
}
}
}
/* Update memory force, note that we multiply by charge and some box stuff */
forces[i][0] -= q*fx*nx/periodicBoxSize[0];
forces[i][1] -= q*fy*ny/periodicBoxSize[1];
forces[i][2] -= q*fz*nz/periodicBoxSize[2];
}
}
/* EXPORTED ROUTINES */
int
pme_init(pme_t * ppme,
RealOpenMM ewaldcoeff,
int natoms,
int ngrid[3],
int pme_order,
RealOpenMM epsilon_r)
{
pme_t pme;
int d;
pme = (pme_t) malloc(sizeof(struct pme));
pme->order = pme_order;
pme->epsilon_r = epsilon_r;
pme->ewaldcoeff = ewaldcoeff;
pme->natoms = natoms;
for(d=0;d<3;d++)
{
pme->ngrid[d] = ngrid[d];
pme->bsplines_theta[d] = (RealOpenMM *)malloc(sizeof(RealOpenMM)*pme_order*natoms);
pme->bsplines_dtheta[d] = (RealOpenMM *)malloc(sizeof(RealOpenMM)*pme_order*natoms);
}
pme->particlefraction = (rvec *)malloc(sizeof(rvec)*natoms);
pme->particleindex = (ivec *)malloc(sizeof(ivec)*natoms);
/* Allocate charge grid storage */
pme->grid = (t_complex *)malloc(sizeof(t_complex)*ngrid[0]*ngrid[1]*ngrid[2]);
fftpack_init_3d(&pme->fftplan,ngrid[0],ngrid[1],ngrid[2]);
/* Setup bspline moduli (see Essman paper) */
pme_calculate_bsplines_moduli(pme);
*ppme = pme;
return 0;
}
int pme_exec(pme_t pme,
RealOpenMM ** atomCoordinates,
RealOpenMM ** forces,
RealOpenMM ** atomParameters,
const RealOpenMM periodicBoxSize[3],
RealOpenMM * energy,
RealOpenMM pme_virial[3][3])
{
/* Routine is called with coordinates in x, a box, and charges in q */
/* Before we can do the actual interpolation, we need to recalculate and update
* the indices for each particle in the charge grid (initialized in pme_init()),
* and what its fractional offset in this grid cell is.
*/
/* Update charge grid indices and fractional offsets for each atom.
* The indices/fractions are stored internally in the pme datatype
*/
pme_update_grid_index_and_fraction(pme,atomCoordinates,periodicBoxSize);
/* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */
pme_update_bsplines(pme);
/* Spread the charges on grid (using newly calculated bsplines in the pme structure) */
pme_grid_spread_charge(pme,atomParameters);
/* do 3d-fft */
fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid);
/* solve in k-space */
pme_reciprocal_convolution(pme,periodicBoxSize,energy,pme_virial);
/* do 3d-invfft */
fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid);
/* Get the particle forces from the grid and bsplines in the pme structure */
pme_grid_interpolate_force(pme,periodicBoxSize,atomParameters,forces);
return 0;
}
int
pme_destroy(pme_t pme)
{
int d;
free(pme->grid);
for(d=0;d<3;d++)
{
free(pme->bsplines_moduli[d]);
free(pme->bsplines_theta[d]);
free(pme->bsplines_dtheta[d]);
}
free(pme->particlefraction);
free(pme->particleindex);
fftpack_destroy(pme->fftplan);
/* destroy structure itself */
free(pme);
return 0;
}
/*
* Reference implementation of PME reciprocal space interactions.
*
* Copyright (c) 2009, Erik Lindahl, Rossen Apostolov, Szilard Pall
* All rights reserved.
* Contact: lindahl@cbr.su.se Stockholm University, Sweden.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer. Redistributions in binary
* form must reproduce the above copyright notice, this list of conditions and
* the following disclaimer in the documentation and/or other materials provided
* with the distribution.
* Neither the name of the author/university nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "../SimTKUtilities/SimTKOpenMMCommon.h"
typedef RealOpenMM rvec[3];
typedef struct pme *
pme_t;
/*
* Initialize a PME calculation and set up data structures
*
* Arguments:
*
* ppme Pointer to an opaque pme_t object
* ewaldcoeff Coefficient derived from the beta factor to participate
* direct/reciprocal space. See gromacs code for documentation!
* We assume that you are using nm units...
* natoms Number of atoms to set up data structure sof
* ngrid Size of the full pme grid
* pme_order Interpolation order, almost always 4
* epsilon_r Dielectric coefficient, typically 1.0.
*/
int
pme_init(pme_t * ppme,
RealOpenMM ewaldcoeff,
int natoms,
int ngrid[3],
int pme_order,
RealOpenMM epsilon_r);
/*
* Evaluate reciprocal space PME energy and forces.
*
* Args:
*
* pme Opaque pme_t object, must have been initialized with pme_init()
* x Pointer to coordinate data array (nm)
* f Pointer to force data array (will be written as kJ/mol/nm)
* charge Array of charges (units of e)
* box Simulation cell dimensions (nm)
* energy Total energy (will be written in units of kJ/mol)
* pme_virial Long-range part of the virial, output.
*/
int
pme_exec(pme_t pme,
RealOpenMM ** atomCoordinates,
RealOpenMM ** forces,
RealOpenMM ** atomParameters,
const RealOpenMM periodicBoxSize[3],
RealOpenMM * energy,
RealOpenMM pme_virial[3][3]);
/* Release all memory in pme structure */
int
pme_destroy(pme_t pme);
......@@ -76,85 +76,88 @@ void testEwald() {
cout << "PotentialEnergy: " << state.getPotentialEnergy() << endl;
ASSERT_EQUAL_VEC(Vec3(-123.711, 64.1877, -302.716), forces[0], 10*TOL);
ASSERT_EQUAL_VEC(Vec3(123.711, -64.1877, 302.716), forces[1], 10*TOL);
// const double eps = 78.3;
// const double krf = (1.0/(cutoff*cutoff*cutoff))*(eps-1.0)/(2.0*eps+1.0);
// const double crf = (1.0/cutoff)*(3.0*eps)/(2.0*eps+1.0);
// const double force = 138.935485*(1.0)*(1.0-2.0*krf*1.0);
// ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[0], TOL);
// ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[1], TOL);
// ASSERT_EQUAL_VEC(Vec3(0, 0, 0), forces[2], TOL);
// ASSERT_EQUAL_TOL(2*138.935485*(1.0)*(1.0+krf*1.0-crf), state.getPotentialEnergy(), TOL);
}
/*
void testEwald4() {
void testPME() {
ReferencePlatform platform;
System system(4, 0);
System system;
for (int i = 0 ; i < 42 ; i++)
{
system.addParticle(1.0);
}
VerletIntegrator integrator(0.01);
NonbondedForce* nonbonded = new NonbondedForce(4, 0);
nonbonded->setParticleParameters(0, 1.0, 1, 0);
nonbonded->setParticleParameters(1, 1.0, 1, 0);
nonbonded->setParticleParameters(2, -1.0, 1, 0);
nonbonded->setParticleParameters(3, -1.0, 1, 0);
nonbonded->setNonbondedMethod(NonbondedForce::Ewald);
const double cutoff = 2.0;
NonbondedForce* nonbonded = new NonbondedForce();
for (int i = 0 ; i < 14 ; i++)
{
nonbonded->addParticle(-0.82, 1, 0);
nonbonded->addParticle(0.41, 1, 0);
nonbonded->addParticle(0.41, 1, 0);
}
nonbonded->setNonbondedMethod(NonbondedForce::PME);
const double cutoff = 0.8;
nonbonded->setCutoffDistance(cutoff);
nonbonded->setPeriodicBoxVectors(Vec3(6, 0, 0), Vec3(0, 6, 0), Vec3(0, 0, 6));
nonbonded->setPeriodicBoxVectors(Vec3(1.86206, 0, 0), Vec3(0, 1.86206, 0), Vec3(0, 0, 1.86206));
nonbonded->setEwaldErrorTolerance(TOL);
system.addForce(nonbonded);
OpenMMContext context(system, integrator, platform);
vector<Vec3> positions(4);
positions[0] = Vec3(3.048000,2.764000,3.156000);
positions[1] = Vec3(3.348000,2.764000,3.156000);
positions[2] = Vec3(2.809000,2.888000,2.571000);
positions[3] = Vec3(2.509000,2.888000,2.571000);
vector<Vec3> positions(42);
positions[0] = Vec3( 0.23,0.628,0.113);
positions[1] = Vec3(0.137,0.626, 0.15);
positions[2] = Vec3(0.231,0.589,0.021);
positions[3] = Vec3(-0.307,-0.351,0.703);
positions[4] = Vec3(-0.364,-0.367,0.784);
positions[5] = Vec3(-0.366,-0.341,0.623);
positions[6] = Vec3(-0.569,-0.634,-0.439);
positions[7] = Vec3(-0.532,-0.707,-0.497);
positions[8] = Vec3(-0.517,-0.629,-0.354);
positions[9] = Vec3(-0.871, 0.41,-0.62);
positions[10] = Vec3(-0.948,0.444,-0.566);
positions[11] = Vec3(-0.905,0.359,-0.699);
positions[12] = Vec3(0.249,-0.077,-0.621);
positions[13] = Vec3(0.306,-0.142,-0.571);
positions[14] = Vec3(0.233,-0.11,-0.714);
positions[15] = Vec3(0.561,0.222,-0.715);
positions[16] = Vec3(0.599,0.138,-0.678);
positions[17] = Vec3(0.473,0.241,-0.671);
positions[18] = Vec3(-0.515,-0.803,-0.628);
positions[19] = Vec3(-0.491,-0.866,-0.702);
positions[20] = Vec3(-0.605,-0.763,-0.646);
positions[21] = Vec3(-0.021,0.175,-0.899);
positions[22] = Vec3(0.018, 0.09,-0.935);
positions[23] = Vec3(-0.119,0.177,-0.918);
positions[24] = Vec3(-0.422,0.856,-0.464);
positions[25] = Vec3(-0.479,0.908,-0.527);
positions[26] = Vec3(-0.326,0.868,-0.488);
positions[27] = Vec3(-0.369,-0.095,-0.903);
positions[28] = Vec3(-0.336,-0.031,-0.972);
positions[29] = Vec3(-0.303,-0.101,-0.828);
positions[30] = Vec3(0.594,0.745,0.652);
positions[31] = Vec3(0.644, 0.83,0.633);
positions[32] = Vec3(0.506,0.747,0.604);
positions[33] = Vec3(-0.157,-0.375,-0.758);
positions[34] = Vec3(-0.25, -0.4,-0.785);
positions[35] = Vec3(-0.131,-0.425,-0.676);
positions[36] = Vec3(0.618,-0.295,-0.578);
positions[37] = Vec3(0.613,-0.213,-0.521);
positions[38] = Vec3(0.707,-0.298,-0.623);
positions[39] = Vec3(0.039,-0.785, 0.3);
positions[40] = Vec3(0.138,-0.796,0.291);
positions[41] = Vec3(-0.001,-0.871,0.332);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
// cout << "force 0: " << forces[0] << endl;
// cout << "force 1: " << forces[1] << endl;
cout << "energyPoten: " << state.getPotentialEnergy() << endl;
// const double eps = 78.3;
// const double krf = (1.0/(cutoff*cutoff*cutoff))*(eps-1.0)/(2.0*eps+1.0);
// const double crf = (1.0/cutoff)*(3.0*eps)/(2.0*eps+1.0);
// const double force = 138.935485*(1.0)*(1.0-2.0*krf*1.0);
// ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[0], TOL);
// ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[1], TOL);
// ASSERT_EQUAL_VEC(Vec3(0, 0, 0), forces[2], TOL);
// ASSERT_EQUAL_TOL(2*138.935485*(1.0)*(1.0+krf*1.0-crf), state.getPotentialEnergy(), TOL);
// for (int i = 0 ; i < 42 ; i++)
// cout << "f [" << i << " : ]" << forces[i] << endl;
// cout << "PotentialEnergy: " << state.getPotentialEnergy() << endl;
// ASSERT_EQUAL_VEC(Vec3(-123.711, 64.1877, -302.716), forces[0], 10*TOL);
// ASSERT_EQUAL_VEC(Vec3(123.711, -64.1877, 302.716), forces[1], 10*TOL);
}
*/
/*
void testPeriodic() {
ReferencePlatform platform;
System system(2, 0);
VerletIntegrator integrator(0.01);
NonbondedForce* nonbonded = new NonbondedForce(2, 0);
nonbonded->setParticleParameters(0, 1.0, 1, 0);
nonbonded->setParticleParameters(1, -1.0, 1, 0);
nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodci);
const double cutoff = 1.0;
nonbonded->setCutoffDistance(cutoff);
nonbonded->setPeriodicBoxVectors(Vec3(6, 0, 0), Vec3(0, 6, 0), Vec3(0, 0, 6));
system.addForce(nonbonded);
OpenMMContext context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(3.044293,2.765923,3.146914);
positions[1] = Vec3(2.812707,2.886077,2.580086);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
cout << "force 0: " << forces[0] << endl;
cout << "force 1: " << forces[1] << endl;
cout << "energyPoten: " << state.getPotentialEnergy() << endl;
}
*/
int main() {
try {
// testPeriodic();
testEwald();
// testEwald4();
testPME();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment