Unverified Commit 1dac981a authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Use PocketFFT (#3667)

* Use PocketFFT instead of FFTW

* Minor cleanup

* Use PocketFFT instead of fftpack for reference platform

* Remove FFTW as a dependency

* Converted a test case to use PocketFFT

* Fixed an incorrect comment
parent 583471a6
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. * * Portions copyright (c) 2011-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
/** /**
* This tests the OpenCL implementation of sorting. * This tests the OpenCL implementation of FFT.
*/ */
#include "openmm/internal/AssertionUtilities.h" #include "openmm/internal/AssertionUtilities.h"
...@@ -38,12 +38,16 @@ ...@@ -38,12 +38,16 @@
#include "OpenCLContext.h" #include "OpenCLContext.h"
#include "OpenCLFFT3D.h" #include "OpenCLFFT3D.h"
#include "OpenCLSort.h" #include "OpenCLSort.h"
#include "fftpack.h"
#include "sfmt/SFMT.h" #include "sfmt/SFMT.h"
#include "openmm/System.h" #include "openmm/System.h"
#include <complex>
#include <iostream> #include <iostream>
#include <cmath> #include <cmath>
#include <set> #include <set>
#ifdef _MSC_VER
#define POCKETFFT_NO_VECTORS
#endif
#include "pocketfft_hdronly.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -60,17 +64,17 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) { ...@@ -60,17 +64,17 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
OpenMM_SFMT::SFMT sfmt; OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt); init_gen_rand(0, sfmt);
vector<Real2> original(xsize*ysize*zsize); vector<Real2> original(xsize*ysize*zsize);
vector<t_complex> reference(original.size()); vector<complex<double> > reference(original.size());
for (int i = 0; i < (int) original.size(); i++) { for (int i = 0; i < (int) original.size(); i++) {
Real2 value = Real2((cl_float) genrand_real2(sfmt), (cl_float) genrand_real2(sfmt)); Real2 value = Real2((cl_float) genrand_real2(sfmt), (cl_float) genrand_real2(sfmt));
original[i] = value; original[i] = value;
reference[i] = t_complex(value.x, value.y); reference[i] = complex<double>(value.x, value.y);
} }
for (int i = 0; i < (int) reference.size(); i++) { for (int i = 0; i < (int) reference.size(); i++) {
if (realToComplex) if (realToComplex)
reference[i] = t_complex(i%2 == 0 ? original[i/2].x : original[i/2].y, 0); reference[i] = complex<double>(i%2 == 0 ? original[i/2].x : original[i/2].y, 0);
else else
reference[i] = t_complex(original[i].x, original[i].y); reference[i] = complex<double>(original[i].x, original[i].y);
} }
OpenCLArray grid1(context, original.size(), sizeof(Real2), "grid1"); OpenCLArray grid1(context, original.size(), sizeof(Real2), "grid1");
OpenCLArray grid2(context, original.size(), sizeof(Real2), "grid2"); OpenCLArray grid2(context, original.size(), sizeof(Real2), "grid2");
...@@ -82,19 +86,21 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) { ...@@ -82,19 +86,21 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
fft.execFFT(grid1, grid2, true); fft.execFFT(grid1, grid2, true);
vector<Real2> result; vector<Real2> result;
grid2.download(result); grid2.download(result);
fftpack_t plan; vector<size_t> shape = {(size_t) xsize, (size_t) ysize, (size_t) zsize};
fftpack_init_3d(&plan, xsize, ysize, zsize); vector<size_t> axes = {0, 1, 2};
fftpack_exec_3d(plan, FFTPACK_FORWARD, &reference[0], &reference[0]); vector<ptrdiff_t> stride = {(ptrdiff_t) (ysize*zsize*sizeof(complex<double>)),
(ptrdiff_t) (zsize*sizeof(complex<double>)),
(ptrdiff_t) sizeof(complex<double>)};
pocketfft::c2c(shape, stride, stride, axes, true, reference.data(), reference.data(), 1.0);
int outputZSize = (realToComplex ? zsize/2+1 : zsize); int outputZSize = (realToComplex ? zsize/2+1 : zsize);
for (int x = 0; x < xsize; x++) for (int x = 0; x < xsize; x++)
for (int y = 0; y < ysize; y++) for (int y = 0; y < ysize; y++)
for (int z = 0; z < outputZSize; z++) { for (int z = 0; z < outputZSize; z++) {
int index1 = x*ysize*zsize + y*zsize + z; int index1 = x*ysize*zsize + y*zsize + z;
int index2 = x*ysize*outputZSize + y*outputZSize + z; int index2 = x*ysize*outputZSize + y*outputZSize + z;
ASSERT_EQUAL_TOL(reference[index1].re, result[index2].x, 1e-3); ASSERT_EQUAL_TOL(reference[index1].real(), result[index2].x, 1e-3);
ASSERT_EQUAL_TOL(reference[index1].im, result[index2].y, 1e-3); ASSERT_EQUAL_TOL(reference[index1].imag(), result[index2].y, 1e-3);
} }
fftpack_destroy(plan);
// Perform a backward transform and see if we get the original values. // Perform a backward transform and see if we get the original values.
......
/*
* This file contains a Fortran to C translation of the 1D transformations
* based on the original FFTPACK, written by paul n swarztrauber
* at the national center for atmospheric research and available
* at www.netlib.org. FFTPACK is in the public domain.
*
* Higher-dimension transforms written by Erik Lindahl, 2008-2009.
* Just as FFTPACK, this file may be redistributed freely, and can be
* considered to be in the public domain.
*
* Any errors in this (threadsafe, but not threaded) C version
* are probably due to the f2c translator, or hacks by Erik Lindahl,
* rather than FFTPACK. If you find a bug, it would be great if you could
* drop a line to lindahl@cbr.su.se and let me know about it!
*/
#ifndef _FFTPACK_H_
#define _FFTPACK_H_
#include <stdio.h>
#include "openmm/internal/windowsExport.h"
#ifdef __cplusplus
extern "C" {
#endif
#if 0
} /* fixes auto-indentation problems */
#endif
class t_complex {
public:
double re;
double im;
t_complex() : re(0.0), im(0.0) {
}
t_complex(double re, double im) : re(re), im(im) {
}
t_complex(const t_complex& c) : re(c.re), im(c.im) {
}
t_complex operator*(double r) {
return t_complex(re*r, im*r);
}
t_complex operator+(const t_complex& c) const {
return t_complex(re+c.re, im+c.im);
}
t_complex operator-(const t_complex& c) const {
return t_complex(re-c.re, im-c.im);
}
t_complex& operator+=(const t_complex& c) {
re += c.re;
im += c.im;
return *this;
}
t_complex& operator-=(const t_complex& c) {
re -= c.re;
im -= c.im;
return *this;
}
};
/*! \brief Datatype for FFT setup
*
* The fftpack_t type contains all the setup information, e.g. twiddle
* factors, necessary to perform an FFT. Internally it is mapped to
* whatever FFT library we are using, or the built-in FFTPACK if no fast
* external library is available.
*/
typedef struct fftpack *
fftpack_t;
/*! \brief Specifier for FFT direction.
*
* The definition of the 1D forward transform from input x[] to output y[] is
* \f[
* y_{k} = \sum_{j=0}^{N-1} x_{j} \exp{-i 2 \pi j k /N}
* \f]
*
* while the corresponding backward transform is
*
* \f[
* y_{k} = \sum_{j=0}^{N-1} x_{j} \exp{i 2 \pi j k /N}
* \f]
*
* A forward-backward transform pair will this result in data scaled by N.
*
*/
typedef enum fftpack_direction
{
FFTPACK_FORWARD, /*!< Forward complex-to-complex transform */
FFTPACK_BACKWARD, /*!< Backward complex-to-complex transform */
} fftpack_direction;
/*! \brief Setup a 1-dimensional complex-to-complex transform
*
* \param fft Pointer to opaque Gromacs FFT datatype
* \param nx Length of transform
*
* \return status - 0 or a standard error message.
*/
int
OPENMM_EXPORT
fftpack_init_1d (fftpack_t * fft,
int nx);
/*! \brief Setup a 2-dimensional complex-to-complex transform
*
* \param fft Pointer to opaque Gromacs FFT datatype
* \param nx Length of transform in first dimension
* \param ny Length of transform in second dimension
*
* \return status - 0 or a standard error message.
*
*/
int
OPENMM_EXPORT
fftpack_init_2d (fftpack_t * fft,
int nx,
int ny);
/*! \brief Setup a 3-dimensional complex-to-complex transform
*
* \param fft Pointer to opaque Gromacs FFT datatype
* \param nx Length of transform in first dimension
* \param ny Length of transform in second dimension
* \param nz Length of transform in third dimension
*
* \return status - 0 or a standard error message.
*
*/
int
OPENMM_EXPORT
fftpack_init_3d (fftpack_t * fft,
int nx,
int ny,
int nz);
/*! \brief Perform a 1-dimensional complex-to-complex transform
*
* Performs an instance of a transform previously initiated.
*
* \param setup Setup returned from fftpack_init_1d()
* \param dir Forward or Backward
* \param in_data Input grid data.
* \param out_data Output grid data.
* You can provide the same pointer for in_data and out_data
* to perform an in-place transform.
*
* \return 0 on success, or an error code.
*
* \note Data pointers are declared as void, to avoid casting pointers
* depending on your grid type.
*/
int
OPENMM_EXPORT
fftpack_exec_1d (fftpack_t setup,
enum fftpack_direction dir,
t_complex * in_data,
t_complex * out_data);
/*! \brief Perform a 2-dimensional complex-to-complex transform
*
* Performs an instance of a transform previously initiated.
*
* \param setup Setup returned from fftpack_init_1d()
* \param dir Forward or Backward
* \param in_data Input grid data.
* \param out_data Output grid data.
* You can provide the same pointer for in_data and out_data
* to perform an in-place transform.
*
* \return 0 on success, or an error code.
*
* \note Data pointers are declared as void, to avoid casting pointers
* depending on your grid type.
*/
int
OPENMM_EXPORT
fftpack_exec_2d (fftpack_t setup,
enum fftpack_direction dir,
t_complex * in_data,
t_complex * out_data);
/*! \brief Perform a 3-dimensional complex-to-complex transform
*
* Performs an instance of a transform previously initiated.
*
* \param setup Setup returned from fftpack_init_1d()
* \param dir Forward or Backward
* \param in_data Input grid data.
* \param out_data Output grid data.
* You can provide the same pointer for in_data and out_data
* to perform an in-place transform.
*
* \return 0 on success, or an error code.
*
* \note Data pointers are declared as void, to avoid casting pointers
* depending on your grid type.
*/
int
OPENMM_EXPORT
fftpack_exec_3d (fftpack_t setup,
enum fftpack_direction dir,
t_complex * in_data,
t_complex * out_data);
/*! \brief Release an FFT setup structure
*
* Destroy setup and release all allocated memory.
*
* \param setup Setup returned from fftpack_init_1d(), or one
* of the other initializers.
*
*/
void
OPENMM_EXPORT
fftpack_destroy (fftpack_t setup);
#ifdef __cplusplus
}
#endif
#endif /* _FFTPACK_H_ */
/* /*
* Reference implementation of PME reciprocal space interactions. * Reference implementation of PME reciprocal space interactions.
* *
* Copyright (c) 2009, Erik Lindahl, Rossen Apostolov, Szilard Pall * Copyright (c) 2009-2022, Erik Lindahl, Rossen Apostolov, Szilard Pall, Peter Eastman
* All rights reserved. * All rights reserved.
* Contact: lindahl@cbr.su.se Stockholm University, Sweden. * Contact: lindahl@cbr.su.se Stockholm University, Sweden.
* *
...@@ -32,12 +32,17 @@ ...@@ -32,12 +32,17 @@
#include <math.h> #include <math.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <complex>
#include "ReferencePME.h" #include "ReferencePME.h"
#include "fftpack.h"
#include "SimTKOpenMMRealType.h" #include "SimTKOpenMMRealType.h"
using std::vector; #ifdef _MSC_VER
#define POCKETFFT_NO_VECTORS
#endif
#include "pocketfft_hdronly.h"
using namespace std;
typedef int ivec[3]; typedef int ivec[3];
...@@ -48,12 +53,11 @@ struct pme ...@@ -48,12 +53,11 @@ struct pme
int natoms; int natoms;
double ewaldcoeff; double ewaldcoeff;
t_complex * grid; /* Memory for the grid we spread charges on. complex<double>* grid; /* Memory for the grid we spread charges on.
* Element (i,j,k) is accessed as: * Element (i,j,k) is accessed as:
* grid[i*ngrid[1]*ngrid[2] + j*ngrid[2] + k] * grid[i*ngrid[1]*ngrid[2] + j*ngrid[2] + k]
*/ */
int ngrid[3]; /* Total grid dimensions (all data is complex!) */ int ngrid[3]; /* Total grid dimensions (all data is complex!) */
fftpack_t fftplan; /* Handle to fourier transform setup */
int order; /* PME interpolation order. Almost always 4 */ int order; /* PME interpolation order. Almost always 4 */
...@@ -346,7 +350,7 @@ pme_grid_spread_charge(pme_t pme, const vector<double>& charges) ...@@ -346,7 +350,7 @@ pme_grid_spread_charge(pme_t pme, const vector<double>& charges)
/* Reset the grid */ /* Reset the grid */
for (i=0;i<pme->ngrid[0]*pme->ngrid[1]*pme->ngrid[2];i++) for (i=0;i<pme->ngrid[0]*pme->ngrid[1]*pme->ngrid[2];i++)
{ {
pme->grid[i].re = pme->grid[i].im = 0; pme->grid[i] = complex<double>(0, 0);
} }
for (i=0;i<pme->natoms;i++) for (i=0;i<pme->natoms;i++)
...@@ -393,11 +397,11 @@ pme_grid_spread_charge(pme_t pme, const vector<double>& charges) ...@@ -393,11 +397,11 @@ pme_grid_spread_charge(pme_t pme, const vector<double>& charges)
for (iz=0;iz<order;iz++) for (iz=0;iz<order;iz++)
{ {
/* Can be optimized, but we keep it simple here */ /* Can be optimized, but we keep it simple here */
zindex = (z0index + iz) % pme->ngrid[2]; zindex = (z0index + iz) % pme->ngrid[2];
/* Calculate index in the charge grid */ /* Calculate index in the charge grid */
index = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex; index = xindex*pme->ngrid[1]*pme->ngrid[2] + yindex*pme->ngrid[2] + zindex;
/* Add the charge times the bspline spread/interpolation factors to this grid position */ /* Add the charge times the bspline spread/interpolation factors to this grid position */
pme->grid[index].re += q*thetax[ix]*thetay[iy]*thetaz[iz]; pme->grid[index] += q*thetax[ix]*thetay[iy]*thetaz[iz];
} }
} }
} }
...@@ -427,7 +431,7 @@ pme_reciprocal_convolution(pme_t pme, ...@@ -427,7 +431,7 @@ pme_reciprocal_convolution(pme_t pme,
double boxfactor; double boxfactor;
double maxkx,maxky,maxkz; double maxkx,maxky,maxkz;
t_complex *ptr; complex<double> *ptr;
nx = pme->ngrid[0]; nx = pme->ngrid[0];
ny = pme->ngrid[1]; ny = pme->ngrid[1];
...@@ -486,8 +490,8 @@ pme_reciprocal_convolution(pme_t pme, ...@@ -486,8 +490,8 @@ pme_reciprocal_convolution(pme_t pme,
ptr = pme->grid + kx*ny*nz + ky*nz + kz; ptr = pme->grid + kx*ny*nz + ky*nz + kz;
/* Get grid data for this frequency */ /* Get grid data for this frequency */
d1 = ptr->re; d1 = ptr->real();
d2 = ptr->im; d2 = ptr->imag();
/* Calculate the convolution - see the Essman/Darden paper for the equation! */ /* Calculate the convolution - see the Essman/Darden paper for the equation! */
m2 = mhx*mhx+mhy*mhy+mhz*mhz; m2 = mhx*mhx+mhy*mhy+mhz*mhz;
...@@ -497,8 +501,8 @@ pme_reciprocal_convolution(pme_t pme, ...@@ -497,8 +501,8 @@ pme_reciprocal_convolution(pme_t pme,
eterm = one_4pi_eps*exp(-factor*m2)/denom; eterm = one_4pi_eps*exp(-factor*m2)/denom;
/* write back convolution data to grid */ /* write back convolution data to grid */
ptr->re = d1*eterm; ptr->real(d1*eterm);
ptr->im = d2*eterm; ptr->imag(d2*eterm);
struct2 = (d1*d1+d2*d2); struct2 = (d1*d1+d2*d2);
...@@ -532,7 +536,7 @@ dpme_reciprocal_convolution(pme_t pme, ...@@ -532,7 +536,7 @@ dpme_reciprocal_convolution(pme_t pme,
double boxfactor; double boxfactor;
double maxkx,maxky,maxkz; double maxkx,maxky,maxkz;
t_complex *ptr; complex<double> *ptr;
nx = pme->ngrid[0]; nx = pme->ngrid[0];
ny = pme->ngrid[1]; ny = pme->ngrid[1];
...@@ -580,8 +584,8 @@ dpme_reciprocal_convolution(pme_t pme, ...@@ -580,8 +584,8 @@ dpme_reciprocal_convolution(pme_t pme,
ptr = pme->grid + kx*ny*nz + ky*nz + kz; ptr = pme->grid + kx*ny*nz + ky*nz + kz;
/* Get grid data for this frequency */ /* Get grid data for this frequency */
d1 = ptr->re; d1 = ptr->real();
d2 = ptr->im; d2 = ptr->imag();
/* Calculate the convolution - see the Essman/Darden paper for the equation! */ /* Calculate the convolution - see the Essman/Darden paper for the equation! */
m2 = mhx*mhx+mhy*mhy+mhz*mhz; m2 = mhx*mhx+mhy*mhy+mhz*mhz;
...@@ -598,8 +602,8 @@ dpme_reciprocal_convolution(pme_t pme, ...@@ -598,8 +602,8 @@ dpme_reciprocal_convolution(pme_t pme,
eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom; eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
/* write back convolution data to grid */ /* write back convolution data to grid */
ptr->re = d1*eterm; ptr->real(d1*eterm);
ptr->im = d2*eterm; ptr->imag(d2*eterm);
struct2 = (d1*d1+d2*d2); struct2 = (d1*d1+d2*d2);
...@@ -696,7 +700,7 @@ pme_grid_interpolate_force(pme_t pme, ...@@ -696,7 +700,7 @@ pme_grid_interpolate_force(pme_t pme,
/* Get the fft+convoluted+ifft:d data from the grid, which must be real by definition */ /* Get the fft+convoluted+ifft:d data from the grid, which must be real by definition */
/* Checking that the imaginary part is indeed zero might be a good check :-) */ /* Checking that the imaginary part is indeed zero might be a good check :-) */
gridvalue = pme->grid[index].re; gridvalue = pme->grid[index].real();
/* The d component of the force is calculated by taking the derived bspline in dimension d, normal bsplines in the other two */ /* The d component of the force is calculated by taking the derived bspline in dimension d, normal bsplines in the other two */
fx += dtx*ty*tz*gridvalue; fx += dtx*ty*tz*gridvalue;
...@@ -745,9 +749,7 @@ pme_init(pme_t * ppme, ...@@ -745,9 +749,7 @@ pme_init(pme_t * ppme,
pme->particleindex = (ivec *)malloc(sizeof(ivec)*natoms); pme->particleindex = (ivec *)malloc(sizeof(ivec)*natoms);
/* Allocate charge grid storage */ /* Allocate charge grid storage */
pme->grid = (t_complex *)malloc(sizeof(t_complex)*ngrid[0]*ngrid[1]*ngrid[2]); pme->grid = (complex<double> *)malloc(sizeof(complex<double>)*ngrid[0]*ngrid[1]*ngrid[2]);
fftpack_init_3d(&pme->fftplan,ngrid[0],ngrid[1],ngrid[2]);
/* Setup bspline moduli (see Essman paper) */ /* Setup bspline moduli (see Essman paper) */
pme_calculate_bsplines_moduli(pme); pme_calculate_bsplines_moduli(pme);
...@@ -790,13 +792,18 @@ int pme_exec(pme_t pme, ...@@ -790,13 +792,18 @@ int pme_exec(pme_t pme,
pme_grid_spread_charge(pme, charges); pme_grid_spread_charge(pme, charges);
/* do 3d-fft */ /* do 3d-fft */
fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid); vector<size_t> shape = {(size_t) pme->ngrid[0], (size_t) pme->ngrid[1], (size_t) pme->ngrid[2]};
vector<size_t> axes = {0, 1, 2};
vector<ptrdiff_t> stride = {(ptrdiff_t) (pme->ngrid[1]*pme->ngrid[2]*sizeof(complex<double>)),
(ptrdiff_t) (pme->ngrid[2]*sizeof(complex<double>)),
(ptrdiff_t) sizeof(complex<double>)};
pocketfft::c2c(shape, stride, stride, axes, true, pme->grid, pme->grid, 1.0, 0);
/* solve in k-space */ /* solve in k-space */
pme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy); pme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy);
/* do 3d-invfft */ /* do 3d-invfft */
fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid); pocketfft::c2c(shape, stride, stride, axes, false, pme->grid, pme->grid, 1.0, 0);
/* Get the particle forces from the grid and bsplines in the pme structure */ /* Get the particle forces from the grid and bsplines in the pme structure */
pme_grid_interpolate_force(pme,recipBoxVectors,charges,forces); pme_grid_interpolate_force(pme,recipBoxVectors,charges,forces);
...@@ -834,13 +841,18 @@ int pme_exec_dpme(pme_t pme, ...@@ -834,13 +841,18 @@ int pme_exec_dpme(pme_t pme,
pme_grid_spread_charge(pme, c6s); pme_grid_spread_charge(pme, c6s);
/* do 3d-fft */ /* do 3d-fft */
fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid); vector<size_t> shape = {(size_t) pme->ngrid[0], (size_t) pme->ngrid[1], (size_t) pme->ngrid[2]};
vector<size_t> axes = {0, 1, 2};
vector<ptrdiff_t> stride = {(ptrdiff_t) (pme->ngrid[1]*pme->ngrid[2]*sizeof(complex<double>)),
(ptrdiff_t) (pme->ngrid[2]*sizeof(complex<double>)),
(ptrdiff_t) sizeof(complex<double>)};
pocketfft::c2c(shape, stride, stride, axes, true, pme->grid, pme->grid, 1.0, 0);
/* solve in k-space */ /* solve in k-space */
dpme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy); dpme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy);
/* do 3d-invfft */ /* do 3d-invfft */
fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid); pocketfft::c2c(shape, stride, stride, axes, false, pme->grid, pme->grid, 1.0, 0);
/* Get the particle forces from the grid and bsplines in the pme structure */ /* Get the particle forces from the grid and bsplines in the pme structure */
pme_grid_interpolate_force(pme,recipBoxVectors,c6s,forces); pme_grid_interpolate_force(pme,recipBoxVectors,c6s,forces);
...@@ -866,8 +878,6 @@ pme_destroy(pme_t pme) ...@@ -866,8 +878,6 @@ pme_destroy(pme_t pme)
free(pme->particlefraction); free(pme->particlefraction);
free(pme->particleindex); free(pme->particleindex);
fftpack_destroy(pme->fftplan);
/* destroy structure itself */ /* destroy structure itself */
free(pme); free(pme);
......
/*
* This file contains a Fortran to C translation of the 1D transformations
* based on the original FFTPACK, written by paul n swarztrauber
* at the national center for atmospheric research and available
* at www.netlib.org. FFTPACK is in the public domain.
*
* Higher-dimension transforms written by Erik Lindahl, 2008-2009.
* Just as FFTPACK, this file may be redistributed freely, and can be
* considered to be in the public domain.
*
* Any errors in this (threadsafe, but not threaded) C version
* are probably due to the f2c translator, or hacks by Erik Lindahl,
* rather than FFTPACK. If you find a bug, it would be great if you could
* drop a line to lindahl@cbr.su.se and let me know about it!
*/
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include "fftpack.h"
/** Contents of the FFTPACK fft datatype.
*
* FFTPACK only does 1d transforms, so we use a pointers to another fft for
* the transform in the next dimension.
* Thus, a 3d-structure contains a pointer to a 2d one, which in turns contains
* a pointer to a 1d. The 1d structure has next==NULL.
*/
struct fftpack
{
int ndim; /**< Dimensions, including our subdimensions. */
int n; /**< Number of points in this dimension. */
int ifac[15]; /**< 15 bytes needed for cfft and rfft */
struct fftpack * next; /**< Pointer to next dimension, or NULL. */
double * work; /**< 1st 4n reserved for cfft, 1st 2n for rfft */
};
static void
fftpack_passf2(int ido,
int l1,
double cc[],
double ch[],
double wa1[],
int isign)
{
int i, k, ah, ac;
double ti2, tr2;
if (ido <= 2)
{
for (k=0; k<l1; k++)
{
ah = k*ido;
ac = 2*k*ido;
ch[ah] = cc[ac] + cc[ac + ido];
ch[ah + ido*l1] = cc[ac] - cc[ac + ido];
ch[ah+1] = cc[ac+1] + cc[ac + ido + 1];
ch[ah + ido*l1 + 1] = cc[ac+1] - cc[ac + ido + 1];
}
}
else
{
for (k=0; k<l1; k++)
{
for (i=0; i<ido-1; i+=2)
{
ah = i + k*ido;
ac = i + 2*k*ido;
ch[ah] = cc[ac] + cc[ac + ido];
tr2 = cc[ac] - cc[ac + ido];
ch[ah+1] = cc[ac+1] + cc[ac + 1 + ido];
ti2 = cc[ac+1] - cc[ac + 1 + ido];
ch[ah+l1*ido+1] = wa1[i]*ti2 + isign*wa1[i+1]*tr2;
ch[ah+l1*ido] = wa1[i]*tr2 - isign*wa1[i+1]*ti2;
}
}
}
}
static void
fftpack_passf3(int ido,
int l1,
double cc[],
double ch[],
double wa1[],
double wa2[],
int isign)
{
const double taur = -0.5;
const double taui = 0.866025403784439;
int i, k, ac, ah;
double ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
if (ido == 2)
{
for (k=1; k<=l1; k++)
{
ac = (3*k - 2)*ido;
tr2 = cc[ac] + cc[ac + ido];
cr2 = cc[ac - ido] + taur*tr2;
ah = (k - 1)*ido;
ch[ah] = cc[ac - ido] + tr2;
ti2 = cc[ac + 1] + cc[ac + ido + 1];
ci2 = cc[ac - ido + 1] + taur*ti2;
ch[ah + 1] = cc[ac - ido + 1] + ti2;
cr3 = isign*taui*(cc[ac] - cc[ac + ido]);
ci3 = isign*taui*(cc[ac + 1] - cc[ac + ido + 1]);
ch[ah + l1*ido] = cr2 - ci3;
ch[ah + 2*l1*ido] = cr2 + ci3;
ch[ah + l1*ido + 1] = ci2 + cr3;
ch[ah + 2*l1*ido + 1] = ci2 - cr3;
}
}
else
{
for (k=1; k<=l1; k++)
{
for (i=0; i<ido-1; i+=2)
{
ac = i + (3*k - 2)*ido;
tr2 = cc[ac] + cc[ac + ido];
cr2 = cc[ac - ido] + taur*tr2;
ah = i + (k-1)*ido;
ch[ah] = cc[ac - ido] + tr2;
ti2 = cc[ac + 1] + cc[ac + ido + 1];
ci2 = cc[ac - ido + 1] + taur*ti2;
ch[ah + 1] = cc[ac - ido + 1] + ti2;
cr3 = isign*taui*(cc[ac] - cc[ac + ido]);
ci3 = isign*taui*(cc[ac + 1] - cc[ac + ido + 1]);
dr2 = cr2 - ci3;
dr3 = cr2 + ci3;
di2 = ci2 + cr3;
di3 = ci2 - cr3;
ch[ah + l1*ido + 1] = wa1[i]*di2 + isign*wa1[i+1]*dr2;
ch[ah + l1*ido] = wa1[i]*dr2 - isign*wa1[i+1]*di2;
ch[ah + 2*l1*ido + 1] = wa2[i]*di3 + isign*wa2[i+1]*dr3;
ch[ah + 2*l1*ido] = wa2[i]*dr3 - isign*wa2[i+1]*di3;
}
}
}
}
static void
fftpack_passf4(int ido,
int l1,
double cc[],
double ch[],
double wa1[],
double wa2[],
double wa3[],
int isign)
{
int i, k, ac, ah;
double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
if (ido == 2)
{
for (k=0; k<l1; k++)
{
ac = 4*k*ido + 1;
ti1 = cc[ac] - cc[ac + 2*ido];
ti2 = cc[ac] + cc[ac + 2*ido];
tr4 = cc[ac + 3*ido] - cc[ac + ido];
ti3 = cc[ac + ido] + cc[ac + 3*ido];
tr1 = cc[ac - 1] - cc[ac + 2*ido - 1];
tr2 = cc[ac - 1] + cc[ac + 2*ido - 1];
ti4 = cc[ac + ido - 1] - cc[ac + 3*ido - 1];
tr3 = cc[ac + ido - 1] + cc[ac + 3*ido - 1];
ah = k*ido;
ch[ah] = tr2 + tr3;
ch[ah + 2*l1*ido] = tr2 - tr3;
ch[ah + 1] = ti2 + ti3;
ch[ah + 2*l1*ido + 1] = ti2 - ti3;
ch[ah + l1*ido] = tr1 + isign*tr4;
ch[ah + 3*l1*ido] = tr1 - isign*tr4;
ch[ah + l1*ido + 1] = ti1 + isign*ti4;
ch[ah + 3*l1*ido + 1] = ti1 - isign*ti4;
}
}
else
{
for (k=0; k<l1; k++)
{
for (i=0; i<ido-1; i+=2)
{
ac = i + 1 + 4*k*ido;
ti1 = cc[ac] - cc[ac + 2*ido];
ti2 = cc[ac] + cc[ac + 2*ido];
ti3 = cc[ac + ido] + cc[ac + 3*ido];
tr4 = cc[ac + 3*ido] - cc[ac + ido];
tr1 = cc[ac - 1] - cc[ac + 2*ido - 1];
tr2 = cc[ac - 1] + cc[ac + 2*ido - 1];
ti4 = cc[ac + ido - 1] - cc[ac + 3*ido - 1];
tr3 = cc[ac + ido - 1] + cc[ac + 3*ido - 1];
ah = i + k*ido;
ch[ah] = tr2 + tr3;
cr3 = tr2 - tr3;
ch[ah + 1] = ti2 + ti3;
ci3 = ti2 - ti3;
cr2 = tr1 + isign*tr4;
cr4 = tr1 - isign*tr4;
ci2 = ti1 + isign*ti4;
ci4 = ti1 - isign*ti4;
ch[ah + l1*ido] = wa1[i]*cr2 - isign*wa1[i + 1]*ci2;
ch[ah + l1*ido + 1] = wa1[i]*ci2 + isign*wa1[i + 1]*cr2;
ch[ah + 2*l1*ido] = wa2[i]*cr3 - isign*wa2[i + 1]*ci3;
ch[ah + 2*l1*ido + 1] = wa2[i]*ci3 + isign*wa2[i + 1]*cr3;
ch[ah + 3*l1*ido] = wa3[i]*cr4 -isign*wa3[i + 1]*ci4;
ch[ah + 3*l1*ido + 1] = wa3[i]*ci4 + isign*wa3[i + 1]*cr4;
}
}
}
}
static void
fftpack_passf5(int ido,
int l1,
double cc[],
double ch[],
double wa1[],
double wa2[],
double wa3[],
double wa4[],
int isign)
{
const double tr11 = 0.309016994374947;
const double ti11 = 0.951056516295154;
const double tr12 = -0.809016994374947;
const double ti12 = 0.587785252292473;
int i, k, ac, ah;
double ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3,
ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
if (ido == 2)
{
for (k = 1; k <= l1; ++k)
{
ac = (5*k - 4)*ido + 1;
ti5 = cc[ac] - cc[ac + 3*ido];
ti2 = cc[ac] + cc[ac + 3*ido];
ti4 = cc[ac + ido] - cc[ac + 2*ido];
ti3 = cc[ac + ido] + cc[ac + 2*ido];
tr5 = cc[ac - 1] - cc[ac + 3*ido - 1];
tr2 = cc[ac - 1] + cc[ac + 3*ido - 1];
tr4 = cc[ac + ido - 1] - cc[ac + 2*ido - 1];
tr3 = cc[ac + ido - 1] + cc[ac + 2*ido - 1];
ah = (k - 1)*ido;
ch[ah] = cc[ac - ido - 1] + tr2 + tr3;
ch[ah + 1] = cc[ac - ido] + ti2 + ti3;
cr2 = cc[ac - ido - 1] + tr11*tr2 + tr12*tr3;
ci2 = cc[ac - ido] + tr11*ti2 + tr12*ti3;
cr3 = cc[ac - ido - 1] + tr12*tr2 + tr11*tr3;
ci3 = cc[ac - ido] + tr12*ti2 + tr11*ti3;
cr5 = isign*(ti11*tr5 + ti12*tr4);
ci5 = isign*(ti11*ti5 + ti12*ti4);
cr4 = isign*(ti12*tr5 - ti11*tr4);
ci4 = isign*(ti12*ti5 - ti11*ti4);
ch[ah + l1*ido] = cr2 - ci5;
ch[ah + 4*l1*ido] = cr2 + ci5;
ch[ah + l1*ido + 1] = ci2 + cr5;
ch[ah + 2*l1*ido + 1] = ci3 + cr4;
ch[ah + 2*l1*ido] = cr3 - ci4;
ch[ah + 3*l1*ido] = cr3 + ci4;
ch[ah + 3*l1*ido + 1] = ci3 - cr4;
ch[ah + 4*l1*ido + 1] = ci2 - cr5;
}
}
else
{
for (k=1; k<=l1; k++)
{
for (i=0; i<ido-1; i+=2)
{
ac = i + 1 + (k*5 - 4)*ido;
ti5 = cc[ac] - cc[ac + 3*ido];
ti2 = cc[ac] + cc[ac + 3*ido];
ti4 = cc[ac + ido] - cc[ac + 2*ido];
ti3 = cc[ac + ido] + cc[ac + 2*ido];
tr5 = cc[ac - 1] - cc[ac + 3*ido - 1];
tr2 = cc[ac - 1] + cc[ac + 3*ido - 1];
tr4 = cc[ac + ido - 1] - cc[ac + 2*ido - 1];
tr3 = cc[ac + ido - 1] + cc[ac + 2*ido - 1];
ah = i + (k - 1)*ido;
ch[ah] = cc[ac - ido - 1] + tr2 + tr3;
ch[ah + 1] = cc[ac - ido] + ti2 + ti3;
cr2 = cc[ac - ido - 1] + tr11*tr2 + tr12*tr3;
ci2 = cc[ac - ido] + tr11*ti2 + tr12*ti3;
cr3 = cc[ac - ido - 1] + tr12*tr2 + tr11*tr3;
ci3 = cc[ac - ido] + tr12*ti2 + tr11*ti3;
cr5 = isign*(ti11*tr5 + ti12*tr4);
ci5 = isign*(ti11*ti5 + ti12*ti4);
cr4 = isign*(ti12*tr5 - ti11*tr4);
ci4 = isign*(ti12*ti5 - ti11*ti4);
dr3 = cr3 - ci4;
dr4 = cr3 + ci4;
di3 = ci3 + cr4;
di4 = ci3 - cr4;
dr5 = cr2 + ci5;
dr2 = cr2 - ci5;
di5 = ci2 - cr5;
di2 = ci2 + cr5;
ch[ah + l1*ido] = wa1[i]*dr2 - isign*wa1[i+1]*di2;
ch[ah + l1*ido + 1] = wa1[i]*di2 + isign*wa1[i+1]*dr2;
ch[ah + 2*l1*ido] = wa2[i]*dr3 - isign*wa2[i+1]*di3;
ch[ah + 2*l1*ido + 1] = wa2[i]*di3 + isign*wa2[i+1]*dr3;
ch[ah + 3*l1*ido] = wa3[i]*dr4 - isign*wa3[i+1]*di4;
ch[ah + 3*l1*ido + 1] = wa3[i]*di4 + isign*wa3[i+1]*dr4;
ch[ah + 4*l1*ido] = wa4[i]*dr5 - isign*wa4[i+1]*di5;
ch[ah + 4*l1*ido + 1] = wa4[i]*di5 + isign*wa4[i+1]*dr5;
}
}
}
}
static void
fftpack_passf(int* nac,
int ido,
int ip,
int l1,
int idl1,
double cc[],
double ch[],
double wa[],
int isign)
{
int idij, idlj, idot, ipph, i, j, k, l, jc, lc, ik, nt, idj, idl, inc,idp;
double wai, war;
idot = ido / 2;
nt = ip*idl1;
ipph = (ip + 1) / 2;
idp = ip*ido;
if (ido >= l1)
{
for (j=1; j<ipph; j++)
{
jc = ip - j;
for (k=0; k<l1; k++)
{
for (i=0; i<ido; i++)
{
ch[i + (k + j*l1)*ido] = cc[i + (j + k*ip)*ido] + cc[i + (jc + k*ip)*ido];
ch[i + (k + jc*l1)*ido] = cc[i + (j + k*ip)*ido] - cc[i + (jc + k*ip)*ido];
}
}
}
for (k=0; k<l1; k++)
for (i=0; i<ido; i++)
ch[i + k*ido] = cc[i + k*ip*ido];
}
else
{
for (j=1; j<ipph; j++)
{
jc = ip - j;
for (i=0; i<ido; i++)
{
for (k=0; k<l1; k++)
{
ch[i + (k + j*l1)*ido] = cc[i + (j + k*ip)*ido] + cc[i + (jc + k*ip)*ido];
ch[i + (k + jc*l1)*ido] = cc[i + (j + k*ip)*ido] - cc[i + (jc + k*ip)*ido];
}
}
}
for (i=0; i<ido; i++)
for (k=0; k<l1; k++)
ch[i + k*ido] = cc[i + k*ip*ido];
}
idl = 2 - ido;
inc = 0;
for (l=1; l<ipph; l++)
{
lc = ip - l;
idl += ido;
for (ik=0; ik<idl1; ik++)
{
cc[ik + l*idl1] = ch[ik] + wa[idl - 2]*ch[ik + idl1];
cc[ik + lc*idl1] = isign*wa[idl-1]*ch[ik + (ip-1)*idl1];
}
idlj = idl;
inc += ido;
for (j=2; j<ipph; j++)
{
jc = ip - j;
idlj += inc;
if (idlj > idp) idlj -= idp;
war = wa[idlj - 2];
wai = wa[idlj-1];
for (ik=0; ik<idl1; ik++)
{
cc[ik + l*idl1] += war*ch[ik + j*idl1];
cc[ik + lc*idl1] += isign*wai*ch[ik + jc*idl1];
}
}
}
for (j=1; j<ipph; j++)
for (ik=0; ik<idl1; ik++)
ch[ik] += ch[ik + j*idl1];
for (j=1; j<ipph; j++)
{
jc = ip - j;
for (ik=1; ik<idl1; ik+=2)
{
ch[ik - 1 + j*idl1] = cc[ik - 1 + j*idl1] - cc[ik + jc*idl1];
ch[ik - 1 + jc*idl1] = cc[ik - 1 + j*idl1] + cc[ik + jc*idl1];
ch[ik + j*idl1] = cc[ik + j*idl1] + cc[ik - 1 + jc*idl1];
ch[ik + jc*idl1] = cc[ik + j*idl1] - cc[ik - 1 + jc*idl1];
}
}
*nac = 1;
if (ido == 2)
return;
*nac = 0;
for (ik=0; ik<idl1; ik++)
{
cc[ik] = ch[ik];
}
for (j=1; j<ip; j++)
{
for (k=0; k<l1; k++)
{
cc[(k + j*l1)*ido + 0] = ch[(k + j*l1)*ido + 0];
cc[(k + j*l1)*ido + 1] = ch[(k + j*l1)*ido + 1];
}
}
if (idot <= l1)
{
idij = 0;
for (j=1; j<ip; j++)
{
idij += 2;
for (i=3; i<ido; i+=2)
{
idij += 2;
for (k=0; k<l1; k++)
{
cc[i - 1 + (k + j*l1)*ido] =
wa[idij - 2]*ch[i - 1 + (k + j*l1)*ido] -
isign*wa[idij-1]*ch[i + (k + j*l1)*ido];
cc[i + (k + j*l1)*ido] =
wa[idij - 2]*ch[i + (k + j*l1)*ido] +
isign*wa[idij-1]*ch[i - 1 + (k + j*l1)*ido];
}
}
}
}
else
{
idj = 2 - ido;
for (j=1; j<ip; j++)
{
idj += ido;
for (k = 0; k < l1; k++)
{
idij = idj;
for (i=3; i<ido; i+=2)
{
idij += 2;
cc[i - 1 + (k + j*l1)*ido] =
wa[idij - 2]*ch[i - 1 + (k + j*l1)*ido] -
isign*wa[idij-1]*ch[i + (k + j*l1)*ido];
cc[i + (k + j*l1)*ido] =
wa[idij - 2]*ch[i + (k + j*l1)*ido] +
isign*wa[idij-1]*ch[i - 1 + (k + j*l1)*ido];
}
}
}
}
}
static void
fftpack_cfftf1(int n,
double c[],
double ch[],
double wa[],
int ifac[15],
int isign)
{
int idot, i;
int k1, l1, l2;
int na, nf, ip, iw, ix2, ix3, ix4, nac, ido, idl1;
double *cinput, *coutput;
nf = ifac[1];
na = 0;
l1 = 1;
iw = 0;
for (k1=2; k1<=nf+1; k1++)
{
ip = ifac[k1];
l2 = ip*l1;
ido = n / l2;
idot = ido + ido;
idl1 = idot*l1;
if (na)
{
cinput = ch;
coutput = c;
}
else
{
cinput = c;
coutput = ch;
}
switch (ip)
{
case 4:
ix2 = iw + idot;
ix3 = ix2 + idot;
fftpack_passf4(idot, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3], isign);
na = !na;
break;
case 2:
fftpack_passf2(idot, l1, cinput, coutput, &wa[iw], isign);
na = !na;
break;
case 3:
ix2 = iw + idot;
fftpack_passf3(idot, l1, cinput, coutput, &wa[iw], &wa[ix2], isign);
na = !na;
break;
case 5:
ix2 = iw + idot;
ix3 = ix2 + idot;
ix4 = ix3 + idot;
fftpack_passf5(idot, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign);
na = !na;
break;
default:
fftpack_passf(&nac, idot, ip, l1, idl1, cinput, coutput, &wa[iw], isign);
if (nac != 0) na = !na;
}
l1 = l2;
iw += (ip - 1)*idot;
}
if (na == 0)
return;
for (i=0; i<2*n; i++)
c[i] = ch[i];
}
static void
fftpack_factorize(int n,
int ifac[15])
{
static const int ntryh[4] = { 3,4,2,5 };
int ntry=3, i, j=0, ib, nf=0, nl=n, nq, nr;
startloop:
if (j < 4)
ntry = ntryh[j];
else
ntry+= 2;
j++;
do
{
nq = nl / ntry;
nr = nl - ntry*nq;
if (nr != 0) goto startloop;
nf++;
ifac[nf + 1] = ntry;
nl = nq;
if (ntry == 2 && nf != 1)
{
for (i=2; i<=nf; i++)
{
ib = nf - i + 2;
ifac[ib + 1] = ifac[ib];
}
ifac[2] = 2;
}
}
while (nl != 1);
ifac[0] = n;
ifac[1] = nf;
}
static void
fftpack_cffti1(int n,
double wa[],
int ifac[15])
{
const double twopi = 6.28318530717959;
double arg, argh, argld, fi;
int idot, i, j;
int i1, k1, l1, l2;
int ld, ii, nf, ip;
int ido, ipm;
fftpack_factorize(n,ifac);
nf = ifac[1];
argh = twopi/n;
i = 1;
l1 = 1;
for (k1=1; k1<=nf; k1++)
{
ip = ifac[k1+1];
ld = 0;
l2 = l1*ip;
ido = n / l2;
idot = ido + ido + 2;
ipm = ip - 1;
for (j=1; j<=ipm; j++)
{
i1 = i;
wa[i-1] = 1;
wa[i] = 0;
ld += l1;
fi = 0;
argld = ld*argh;
for (ii=4; ii<=idot; ii+=2)
{
i+= 2;
fi+= 1;
arg = fi*argld;
wa[i-1] = cos(arg);
wa[i] = sin(arg);
}
if (ip > 5)
{
wa[i1-1] = wa[i-1];
wa[i1] = wa[i];
}
}
l1 = l2;
}
}
static int
fftpack_transpose_2d(t_complex * in_data,
t_complex * out_data,
int nx,
int ny)
{
t_complex * src;
int i,j;
if (nx<2 || ny<2)
{
if (in_data != out_data)
{
memcpy(out_data,in_data,sizeof(t_complex)*nx*ny);
}
return 0;
}
if (in_data == out_data)
{
src = (t_complex *)malloc(sizeof(t_complex)*nx*ny);
memcpy(src,in_data,sizeof(t_complex)*nx*ny);
}
else
{
src = in_data;
}
for (i=0;i<nx;i++)
{
for (j=0;j<ny;j++)
{
out_data[j*nx+i].re = src[i*ny+j].re;
out_data[j*nx+i].im = src[i*ny+j].im;
}
}
if (src != in_data)
{
free(src);
}
return 0;
}
static int
fftpack_transpose_2d_nelem(t_complex * in_data,
t_complex * out_data,
int nx,
int ny,
int nelem)
{
t_complex * src;
int ncopy;
int i,j;
ncopy = nelem*sizeof(t_complex);
if (nx<2 || ny<2)
{
if (in_data != out_data)
{
memcpy(out_data,in_data,nx*ny*ncopy);
}
return 0;
}
if (in_data == out_data)
{
src = (t_complex *)malloc(nx*ny*ncopy);
memcpy(src,in_data,nx*ny*ncopy);
}
else
{
src = in_data;
}
for (i=0;i<nx;i++)
{
for (j=0;j<ny;j++)
{
memcpy(out_data + (j*nx+i)*nelem , src + (i*ny+j)*nelem , ncopy);
}
}
if (src != in_data)
{
free(src);
}
return 0;
}
int
fftpack_init_1d(fftpack_t * pfft,
int nx)
{
fftpack_t fft;
if (pfft==NULL)
{
fprintf(stderr,"Fatal error - Invalid FFT opaque type pointer.");
return EINVAL;
}
*pfft = NULL;
if ((fft = (struct fftpack *)malloc(sizeof(struct fftpack))) == NULL)
{
return ENOMEM;
}
fft->next = NULL;
fft->n = nx;
/* Need 4*n storage for 1D complex FFT */
if ((fft->work = (double *)malloc(sizeof(double)*(4*nx))) == NULL)
{
free(fft);
return ENOMEM;
}
if (fft->n>1)
fftpack_cffti1(nx,fft->work,fft->ifac);
*pfft = fft;
return 0;
};
int
fftpack_init_2d(fftpack_t * pfft,
int nx,
int ny)
{
fftpack_t fft;
int rc;
if (pfft==NULL)
{
fprintf(stderr,"Fatal error - Invalid FFT opaque type pointer.");
return EINVAL;
}
*pfft = NULL;
/* Create the X transform */
if ((rc = fftpack_init_1d(&fft,nx)) != 0)
{
return rc;
}
/* Create Y transform as a link from X */
if ((rc=fftpack_init_1d(&(fft->next),ny)) != 0)
{
free(fft);
return rc;
}
*pfft = fft;
return 0;
};
int
fftpack_init_3d(fftpack_t * pfft,
int nx,
int ny,
int nz)
{
fftpack_t fft;
int rc;
if (pfft==NULL)
{
fprintf(stderr,"Fatal error - Invalid FFT opaque type pointer.");
return EINVAL;
}
*pfft = NULL;
/* Create the X transform */
if ((fft = (struct fftpack *)malloc(sizeof(struct fftpack))) == NULL)
{
return ENOMEM;
}
fft->n = nx;
/* Need 4*nx storage for 1D complex FFT.
*/
if ((fft->work = (double *)malloc(sizeof(double)*(4*nx))) == NULL)
{
free(fft);
return ENOMEM;
}
fftpack_cffti1(nx,fft->work,fft->ifac);
/* Create 2D Y/Z transforms as a link from X */
if ((rc=fftpack_init_2d(&(fft->next),ny,nz)) != 0)
{
free(fft);
return rc;
}
*pfft = fft;
return 0;
};
int
fftpack_exec_1d (fftpack_t fft,
enum fftpack_direction dir,
t_complex * in_data,
t_complex * out_data)
{
int i, n;
double* p1;
double* p2;
n=fft->n;
if (n==1)
{
p1 = (double *)in_data;
p2 = (double *)out_data;
p2[0] = p1[0];
p2[1] = p1[1];
}
/* FFTPACK only does in-place transforms, so emulate out-of-place
* by copying data to the output array first.
*/
if (in_data != out_data)
{
p1 = (double *)in_data;
p2 = (double *)out_data;
/* n complex = 2*n double elements */
for (i=0;i<2*n;i++)
{
p2[i] = p1[i];
}
}
/* Elements 0 .. 2*n-1 in work are used for ffac values,
* Elements 2*n .. 4*n-1 are internal FFTPACK work space.
*/
if (dir == FFTPACK_FORWARD)
{
fftpack_cfftf1(n,(double *)out_data,fft->work+2*n,fft->work,fft->ifac, -1);
}
else if (dir == FFTPACK_BACKWARD)
{
fftpack_cfftf1(n,(double *)out_data,fft->work+2*n,fft->work,fft->ifac, 1);
}
else
{
fprintf(stderr,"FFT plan mismatch - bad plan or direction.");
return EINVAL;
}
return 0;
}
int
fftpack_exec_2d (fftpack_t fft,
enum fftpack_direction dir,
t_complex * in_data,
t_complex * out_data)
{
int i,nx,ny;
t_complex * data;
nx = fft->n;
ny = fft->next->n;
/* FFTPACK only does in-place transforms, so emulate out-of-place
* by copying data to the output array first.
* For 2D there is likely enough data to benefit from memcpy().
*/
if (in_data != out_data)
{
memcpy(out_data,in_data,sizeof(t_complex)*nx*ny);
}
/* Much easier to do pointer arithmetic when base has the correct type */
data = (t_complex *)out_data;
/* y transforms */
for (i=0;i<nx;i++)
{
fftpack_exec_1d(fft->next,dir,data+i*ny,data+i*ny);
}
/* Transpose in-place to get data in place for x transform now */
fftpack_transpose_2d(data,data,nx,ny);
/* x transforms */
for (i=0;i<ny;i++)
{
fftpack_exec_1d(fft,dir,data+i*nx,data+i*nx);
}
/* Transpose in-place to get data back in original order */
fftpack_transpose_2d(data,data,ny,nx);
return 0;
}
int
fftpack_exec_3d (fftpack_t fft,
enum fftpack_direction dir,
t_complex * in_data,
t_complex * out_data)
{
int i,nx,ny,nz,rc;
t_complex * data;
nx=fft->n;
ny=fft->next->n;
nz=fft->next->next->n;
/* FFTPACK only does in-place transforms, so emulate out-of-place
* by copying data to the output array first.
* For 3D there is likely enough data to benefit from memcpy().
*/
if (in_data != out_data)
{
memcpy(out_data,in_data,sizeof(t_complex)*nx*ny*nz);
}
/* Much easier to do pointer arithmetic when base has the correct type */
data = (t_complex *)out_data;
/* Perform z transforms */
for (i=0;i<nx*ny;i++)
fftpack_exec_1d(fft->next->next,dir,data+i*nz,data+i*nz);
/* For each X slice, transpose the y & z dimensions inside the slice */
for (i=0;i<nx;i++)
{
fftpack_transpose_2d(data+i*ny*nz,data+i*ny*nz,ny,nz);
}
/* Array is now (nx,nz,ny) - perform y transforms */
for (i=0;i<nx*nz;i++)
{
fftpack_exec_1d(fft->next,dir,data+i*ny,data+i*ny);
}
/* Transpose back to (nx,ny,nz) */
for (i=0;i<nx;i++)
{
fftpack_transpose_2d(data+i*ny*nz,data+i*ny*nz,nz,ny);
}
/* Transpose entire x & y slices to go from
* (nx,ny,nz) to (ny,nx,nz).
*/
rc=fftpack_transpose_2d_nelem(data,data,nx,ny,nz);
if (rc != 0)
{
fprintf(stderr,"Fatal error - cannot transpose X & Y/Z in fftpack_exec_3d().");
return rc;
}
/* Then go from (ny,nx,nz) to (ny,nz,nx) */
for (i=0;i<ny;i++)
{
fftpack_transpose_2d(data+i*nx*nz,data+i*nx*nz,nx,nz);
}
/* Perform x transforms */
for (i=0;i<ny*nz;i++)
{
fftpack_exec_1d(fft,dir,data+i*nx,data+i*nx);
}
/* Transpose back from (ny,nz,nx) to (ny,nx,nz) */
for (i=0;i<ny;i++)
{
fftpack_transpose_2d(data+i*nz*nx,data+i*nz*nx,nz,nx);
}
/* Transpose from (ny,nx,nz) to (nx,ny,nz).
*/
rc = fftpack_transpose_2d_nelem(data,data,ny,nx,nz);
if (rc != 0)
{
fprintf(stderr,"Fatal error - cannot transpose Y/Z & X in fftpack_exec_3d().");
return rc;
}
return 0;
}
void
fftpack_destroy(fftpack_t fft)
{
if (fft != NULL)
{
free(fft->work);
if (fft->next != NULL)
fftpack_destroy(fft->next);
free(fft);
}
}
/* Portions copyright (c) 2006-2019 Stanford University and Simbios. /* Portions copyright (c) 2006-2022 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -30,6 +30,10 @@ ...@@ -30,6 +30,10 @@
#include "SimTKOpenMMRealType.h" #include "SimTKOpenMMRealType.h"
#include "jama_svd.h" #include "jama_svd.h"
#include <algorithm> #include <algorithm>
#ifdef _MSC_VER
#define POCKETFFT_NO_VECTORS
#endif
#include "pocketfft_hdronly.h"
// In case we're using some primitive version of Visual Studio this will // In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined. // make sure that erf() and erfc() are defined.
...@@ -1396,7 +1400,6 @@ const double AmoebaReferencePmeHippoNonbondedForce::SQRT_PI = sqrt(M_PI); ...@@ -1396,7 +1400,6 @@ const double AmoebaReferencePmeHippoNonbondedForce::SQRT_PI = sqrt(M_PI);
AmoebaReferencePmeHippoNonbondedForce::AmoebaReferencePmeHippoNonbondedForce(const HippoNonbondedForce& force, const System& system) : AmoebaReferencePmeHippoNonbondedForce::AmoebaReferencePmeHippoNonbondedForce(const HippoNonbondedForce& force, const System& system) :
AmoebaReferenceHippoNonbondedForce(force) { AmoebaReferenceHippoNonbondedForce(force) {
_fftplan = NULL;
force.getPMEParameters(_alphaEwald, _pmeGridDimensions[0], _pmeGridDimensions[1], _pmeGridDimensions[2]); force.getPMEParameters(_alphaEwald, _pmeGridDimensions[0], _pmeGridDimensions[1], _pmeGridDimensions[2]);
force.getDPMEParameters(_dalphaEwald, _dpmeGridDimensions[0], _dpmeGridDimensions[1], _dpmeGridDimensions[2]); force.getDPMEParameters(_dalphaEwald, _dpmeGridDimensions[0], _dpmeGridDimensions[1], _dpmeGridDimensions[2]);
if (_alphaEwald == 0.0 || _dalphaEwald == 0.0) { if (_alphaEwald == 0.0 || _dalphaEwald == 0.0) {
...@@ -1408,15 +1411,9 @@ AmoebaReferencePmeHippoNonbondedForce::AmoebaReferencePmeHippoNonbondedForce(con ...@@ -1408,15 +1411,9 @@ AmoebaReferencePmeHippoNonbondedForce::AmoebaReferencePmeHippoNonbondedForce(con
if (_dalphaEwald == 0.0) if (_dalphaEwald == 0.0)
NonbondedForceImpl::calcPMEParameters(system, nb, _dalphaEwald, _dpmeGridDimensions[0], _dpmeGridDimensions[1], _dpmeGridDimensions[2], true); NonbondedForceImpl::calcPMEParameters(system, nb, _dalphaEwald, _dpmeGridDimensions[0], _dpmeGridDimensions[1], _dpmeGridDimensions[2], true);
} }
fftpack_init_3d(&_fftplan, _pmeGridDimensions[0], _pmeGridDimensions[1], _pmeGridDimensions[2]);
initializeBSplineModuli(); initializeBSplineModuli();
} }
AmoebaReferencePmeHippoNonbondedForce::~AmoebaReferencePmeHippoNonbondedForce() {
if (_fftplan != NULL)
fftpack_destroy(_fftplan);
};
double AmoebaReferencePmeHippoNonbondedForce::getCutoffDistance() const { double AmoebaReferencePmeHippoNonbondedForce::getCutoffDistance() const {
return _cutoffDistance; return _cutoffDistance;
}; };
...@@ -1451,11 +1448,6 @@ void AmoebaReferencePmeHippoNonbondedForce::setPmeGridDimensions(vector<int>& pm ...@@ -1451,11 +1448,6 @@ void AmoebaReferencePmeHippoNonbondedForce::setPmeGridDimensions(vector<int>& pm
(pmeGridDimensions[2] == _pmeGridDimensions[2])) (pmeGridDimensions[2] == _pmeGridDimensions[2]))
return; return;
if (_fftplan) {
fftpack_destroy(_fftplan);
}
fftpack_init_3d(&_fftplan,pmeGridDimensions[0], pmeGridDimensions[1], pmeGridDimensions[2]);
_pmeGridDimensions[0] = pmeGridDimensions[0]; _pmeGridDimensions[0] = pmeGridDimensions[0];
_pmeGridDimensions[1] = pmeGridDimensions[1]; _pmeGridDimensions[1] = pmeGridDimensions[1];
_pmeGridDimensions[2] = pmeGridDimensions[2]; _pmeGridDimensions[2] = pmeGridDimensions[2];
...@@ -1469,11 +1461,6 @@ void AmoebaReferencePmeHippoNonbondedForce::setDispersionPmeGridDimensions(vecto ...@@ -1469,11 +1461,6 @@ void AmoebaReferencePmeHippoNonbondedForce::setDispersionPmeGridDimensions(vecto
(pmeGridDimensions[2] == _dpmeGridDimensions[2])) (pmeGridDimensions[2] == _dpmeGridDimensions[2]))
return; return;
if (_fftplan) {
fftpack_destroy(_fftplan);
}
fftpack_init_3d(&_fftplan,pmeGridDimensions[0], pmeGridDimensions[1], pmeGridDimensions[2]);
_dpmeGridDimensions[0] = pmeGridDimensions[0]; _dpmeGridDimensions[0] = pmeGridDimensions[0];
_dpmeGridDimensions[1] = pmeGridDimensions[1]; _dpmeGridDimensions[1] = pmeGridDimensions[1];
_dpmeGridDimensions[2] = pmeGridDimensions[2]; _dpmeGridDimensions[2] = pmeGridDimensions[2];
...@@ -1516,7 +1503,7 @@ void AmoebaReferencePmeHippoNonbondedForce::resizePmeArrays() { ...@@ -1516,7 +1503,7 @@ void AmoebaReferencePmeHippoNonbondedForce::resizePmeArrays() {
void AmoebaReferencePmeHippoNonbondedForce::initializePmeGrid() { void AmoebaReferencePmeHippoNonbondedForce::initializePmeGrid() {
for (int jj = 0; jj < _pmeGrid.size(); jj++) for (int jj = 0; jj < _pmeGrid.size(); jj++)
_pmeGrid[jj].re = _pmeGrid[jj].im = 0.0; _pmeGrid[jj] = complex<double>(0, 0);
} }
void AmoebaReferencePmeHippoNonbondedForce::getPeriodicDelta(Vec3& deltaR) const { void AmoebaReferencePmeHippoNonbondedForce::getPeriodicDelta(Vec3& deltaR) const {
...@@ -1674,9 +1661,14 @@ void AmoebaReferencePmeHippoNonbondedForce::calculateFixedMultipoleField() { ...@@ -1674,9 +1661,14 @@ void AmoebaReferencePmeHippoNonbondedForce::calculateFixedMultipoleField() {
computeAmoebaBsplines(particleData); computeAmoebaBsplines(particleData);
initializePmeGrid(); initializePmeGrid();
spreadFixedMultipolesOntoGrid(particleData); spreadFixedMultipolesOntoGrid(particleData);
fftpack_exec_3d(_fftplan, FFTPACK_FORWARD, _pmeGrid.data(), _pmeGrid.data()); vector<size_t> shape = {(size_t) _pmeGridDimensions[0], (size_t) _pmeGridDimensions[1], (size_t) _pmeGridDimensions[2]};
vector<size_t> axes = {0, 1, 2};
vector<ptrdiff_t> stride = {(ptrdiff_t) (_pmeGridDimensions[1]*_pmeGridDimensions[2]*sizeof(complex<double>)),
(ptrdiff_t) (_pmeGridDimensions[2]*sizeof(complex<double>)),
(ptrdiff_t) sizeof(complex<double>)};
pocketfft::c2c(shape, stride, stride, axes, true, _pmeGrid.data(), _pmeGrid.data(), 1.0, 0);
performAmoebaReciprocalConvolution(); performAmoebaReciprocalConvolution();
fftpack_exec_3d(_fftplan, FFTPACK_BACKWARD, _pmeGrid.data(), _pmeGrid.data()); pocketfft::c2c(shape, stride, stride, axes, false, _pmeGrid.data(), _pmeGrid.data(), 1.0, 0);
computeFixedPotentialFromGrid(); computeFixedPotentialFromGrid();
recordFixedMultipoleField(); recordFixedMultipoleField();
...@@ -1875,7 +1867,7 @@ void AmoebaReferencePmeHippoNonbondedForce::spreadFixedMultipolesOntoGrid(const ...@@ -1875,7 +1867,7 @@ void AmoebaReferencePmeHippoNonbondedForce::spreadFixedMultipolesOntoGrid(const
// Clear the grid. // Clear the grid.
for (int gridIndex = 0; gridIndex < _pmeGrid.size(); gridIndex++) for (int gridIndex = 0; gridIndex < _pmeGrid.size(); gridIndex++)
_pmeGrid[gridIndex] = t_complex(0, 0); _pmeGrid[gridIndex] = complex<double>(0, 0);
// Loop over atoms and spread them on the grid. // Loop over atoms and spread them on the grid.
...@@ -1904,8 +1896,8 @@ void AmoebaReferencePmeHippoNonbondedForce::spreadFixedMultipolesOntoGrid(const ...@@ -1904,8 +1896,8 @@ void AmoebaReferencePmeHippoNonbondedForce::spreadFixedMultipolesOntoGrid(const
for (int iz = 0; iz < AMOEBA_PME_ORDER; iz++) { for (int iz = 0; iz < AMOEBA_PME_ORDER; iz++) {
int z = (gridPoint[2]+iz) % _pmeGridDimensions[2]; int z = (gridPoint[2]+iz) % _pmeGridDimensions[2];
HippoDouble4 v = _thetai[2][atomIndex*AMOEBA_PME_ORDER+iz]; HippoDouble4 v = _thetai[2][atomIndex*AMOEBA_PME_ORDER+iz];
t_complex& gridValue = _pmeGrid[x*_pmeGridDimensions[1]*_pmeGridDimensions[2]+y*_pmeGridDimensions[2]+z]; complex<double>& gridValue = _pmeGrid[x*_pmeGridDimensions[1]*_pmeGridDimensions[2]+y*_pmeGridDimensions[2]+z];
gridValue.re += term0*v[0] + term1*v[1] + term2*v[2]; gridValue += term0*v[0] + term1*v[1] + term2*v[2];
} }
} }
} }
...@@ -1923,7 +1915,7 @@ void AmoebaReferencePmeHippoNonbondedForce::performAmoebaReciprocalConvolution() ...@@ -1923,7 +1915,7 @@ void AmoebaReferencePmeHippoNonbondedForce::performAmoebaReciprocalConvolution()
int kz = remainder-ky*_pmeGridDimensions[2]; int kz = remainder-ky*_pmeGridDimensions[2];
if (kx == 0 && ky == 0 && kz == 0) { if (kx == 0 && ky == 0 && kz == 0) {
_pmeGrid[index].re = _pmeGrid[index].im = 0.0; _pmeGrid[index] = complex<double>(0, 0);
continue; continue;
} }
...@@ -1943,8 +1935,7 @@ void AmoebaReferencePmeHippoNonbondedForce::performAmoebaReciprocalConvolution() ...@@ -1943,8 +1935,7 @@ void AmoebaReferencePmeHippoNonbondedForce::performAmoebaReciprocalConvolution()
double denom = m2*bx*by*bz; double denom = m2*bx*by*bz;
double eterm = scaleFactor*exp(-expFactor*m2)/denom; double eterm = scaleFactor*exp(-expFactor*m2)/denom;
_pmeGrid[index].re *= eterm; _pmeGrid[index] *= eterm;
_pmeGrid[index].im *= eterm;
} }
} }
...@@ -1993,7 +1984,7 @@ void AmoebaReferencePmeHippoNonbondedForce::computeFixedPotentialFromGrid() { ...@@ -1993,7 +1984,7 @@ void AmoebaReferencePmeHippoNonbondedForce::computeFixedPotentialFromGrid() {
for (int ix = 0; ix < AMOEBA_PME_ORDER; ix++) { for (int ix = 0; ix < AMOEBA_PME_ORDER; ix++) {
int i = gridPoint[0]+ix-(gridPoint[0]+ix >= _pmeGridDimensions[0] ? _pmeGridDimensions[0] : 0); int i = gridPoint[0]+ix-(gridPoint[0]+ix >= _pmeGridDimensions[0] ? _pmeGridDimensions[0] : 0);
int gridIndex = i*_pmeGridDimensions[1]*_pmeGridDimensions[2] + j*_pmeGridDimensions[2] + k; int gridIndex = i*_pmeGridDimensions[1]*_pmeGridDimensions[2] + j*_pmeGridDimensions[2] + k;
double tq = _pmeGrid[gridIndex].re; double tq = _pmeGrid[gridIndex].real();
HippoDouble4 tadd = _thetai[0][m*AMOEBA_PME_ORDER+ix]; HippoDouble4 tadd = _thetai[0][m*AMOEBA_PME_ORDER+ix];
t[0] += tq*tadd[0]; t[0] += tq*tadd[0];
t[1] += tq*tadd[1]; t[1] += tq*tadd[1];
...@@ -2066,7 +2057,7 @@ void AmoebaReferencePmeHippoNonbondedForce::spreadInducedDipolesOnGrid(const vec ...@@ -2066,7 +2057,7 @@ void AmoebaReferencePmeHippoNonbondedForce::spreadInducedDipolesOnGrid(const vec
// Clear the grid. // Clear the grid.
for (int gridIndex = 0; gridIndex < _pmeGrid.size(); gridIndex++) for (int gridIndex = 0; gridIndex < _pmeGrid.size(); gridIndex++)
_pmeGrid[gridIndex] = t_complex(0, 0); _pmeGrid[gridIndex] = complex<double>(0, 0);
// Loop over atoms and spread them on the grid. // Loop over atoms and spread them on the grid.
...@@ -2086,8 +2077,8 @@ void AmoebaReferencePmeHippoNonbondedForce::spreadInducedDipolesOnGrid(const vec ...@@ -2086,8 +2077,8 @@ void AmoebaReferencePmeHippoNonbondedForce::spreadInducedDipolesOnGrid(const vec
for (int iz = 0; iz < AMOEBA_PME_ORDER; iz++) { for (int iz = 0; iz < AMOEBA_PME_ORDER; iz++) {
int z = (gridPoint[2]+iz) % _pmeGridDimensions[2]; int z = (gridPoint[2]+iz) % _pmeGridDimensions[2];
HippoDouble4 v = _thetai[2][atomIndex*AMOEBA_PME_ORDER+iz]; HippoDouble4 v = _thetai[2][atomIndex*AMOEBA_PME_ORDER+iz];
t_complex& gridValue = _pmeGrid[x*_pmeGridDimensions[1]*_pmeGridDimensions[2]+y*_pmeGridDimensions[2]+z]; complex<double>& gridValue = _pmeGrid[x*_pmeGridDimensions[1]*_pmeGridDimensions[2]+y*_pmeGridDimensions[2]+z];
gridValue.re += term01*v[0] + term11*v[1]; gridValue += term01*v[0] + term11*v[1];
} }
} }
} }
...@@ -2142,12 +2133,12 @@ void AmoebaReferencePmeHippoNonbondedForce::computeInducedPotentialFromGrid() { ...@@ -2142,12 +2133,12 @@ void AmoebaReferencePmeHippoNonbondedForce::computeInducedPotentialFromGrid() {
for (int ix = 0; ix < AMOEBA_PME_ORDER; ix++) { for (int ix = 0; ix < AMOEBA_PME_ORDER; ix++) {
int i = gridPoint[0]+ix-(gridPoint[0]+ix >= _pmeGridDimensions[0] ? _pmeGridDimensions[0] : 0); int i = gridPoint[0]+ix-(gridPoint[0]+ix >= _pmeGridDimensions[0] ? _pmeGridDimensions[0] : 0);
int gridIndex = i*_pmeGridDimensions[1]*_pmeGridDimensions[2] + j*_pmeGridDimensions[2] + k; int gridIndex = i*_pmeGridDimensions[1]*_pmeGridDimensions[2] + j*_pmeGridDimensions[2] + k;
t_complex tq = _pmeGrid[gridIndex]; complex<double> tq = _pmeGrid[gridIndex];
HippoDouble4 tadd = _thetai[0][m*AMOEBA_PME_ORDER+ix]; HippoDouble4 tadd = _thetai[0][m*AMOEBA_PME_ORDER+ix];
t0 += tq.re*tadd[0]; t0 += tq.real()*tadd[0];
t1 += tq.re*tadd[1]; t1 += tq.real()*tadd[1];
t2 += tq.re*tadd[2]; t2 += tq.real()*tadd[2];
t3 += tq.re*tadd[3]; t3 += tq.real()*tadd[3];
} }
tu00 += t0*u[0]; tu00 += t0*u[0];
tu10 += t1*u[0]; tu10 += t1*u[0];
...@@ -2412,9 +2403,14 @@ void AmoebaReferencePmeHippoNonbondedForce::calculateReciprocalSpaceInducedDipol ...@@ -2412,9 +2403,14 @@ void AmoebaReferencePmeHippoNonbondedForce::calculateReciprocalSpaceInducedDipol
initializePmeGrid(); initializePmeGrid();
spreadInducedDipolesOnGrid(_inducedDipole); spreadInducedDipolesOnGrid(_inducedDipole);
fftpack_exec_3d(_fftplan, FFTPACK_FORWARD, _pmeGrid.data(), _pmeGrid.data()); vector<size_t> shape = {(size_t) _pmeGridDimensions[0], (size_t) _pmeGridDimensions[1], (size_t) _pmeGridDimensions[2]};
vector<size_t> axes = {0, 1, 2};
vector<ptrdiff_t> stride = {(ptrdiff_t) (_pmeGridDimensions[1]*_pmeGridDimensions[2]*sizeof(complex<double>)),
(ptrdiff_t) (_pmeGridDimensions[2]*sizeof(complex<double>)),
(ptrdiff_t) sizeof(complex<double>)};
pocketfft::c2c(shape, stride, stride, axes, true, _pmeGrid.data(), _pmeGrid.data(), 1.0, 0);
performAmoebaReciprocalConvolution(); performAmoebaReciprocalConvolution();
fftpack_exec_3d(_fftplan, FFTPACK_BACKWARD, _pmeGrid.data(), _pmeGrid.data()); pocketfft::c2c(shape, stride, stride, axes, false, _pmeGrid.data(), _pmeGrid.data(), 1.0, 0);
computeInducedPotentialFromGrid(); computeInducedPotentialFromGrid();
recordInducedDipoleField(_inducedDipoleField); recordInducedDipoleField(_inducedDipoleField);
} }
......
/* Portions copyright (c) 2006-2019 Stanford University and Simbios. /* Portions copyright (c) 2006-2022 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -31,7 +31,6 @@ ...@@ -31,7 +31,6 @@
#include <map> #include <map>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "fftpack.h"
#include <complex> #include <complex>
namespace OpenMM { namespace OpenMM {
...@@ -109,12 +108,6 @@ public: ...@@ -109,12 +108,6 @@ public:
* *
*/ */
AmoebaReferenceHippoNonbondedForce(const HippoNonbondedForce& force); AmoebaReferenceHippoNonbondedForce(const HippoNonbondedForce& force);
/**
* Destructor
*
*/
virtual ~AmoebaReferenceHippoNonbondedForce() {};
/** /**
* Get nonbonded method. * Get nonbonded method.
...@@ -655,9 +648,7 @@ private: ...@@ -655,9 +648,7 @@ private:
int _pmeGridDimensions[3]; int _pmeGridDimensions[3];
int _dpmeGridDimensions[3]; int _dpmeGridDimensions[3];
fftpack_t _fftplan; std::vector<std::complex<double> > _pmeGrid;
std::vector<t_complex> _pmeGrid;
std::vector<double> _pmeBsplineModuli[3]; std::vector<double> _pmeBsplineModuli[3];
std::vector<HippoDouble4> _thetai[3]; std::vector<HippoDouble4> _thetai[3];
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios. /* Portions copyright (c) 2006-222 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -26,6 +26,10 @@ ...@@ -26,6 +26,10 @@
#include "SimTKOpenMMRealType.h" #include "SimTKOpenMMRealType.h"
#include "jama_svd.h" #include "jama_svd.h"
#include <algorithm> #include <algorithm>
#ifdef _MSC_VER
#define POCKETFFT_NO_VECTORS
#endif
#include "pocketfft_hdronly.h"
// In case we're using some primitive version of Visual Studio this will // In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined. // make sure that erf() and erfc() are defined.
...@@ -4809,16 +4813,12 @@ AmoebaReferencePmeMultipoleForce::AmoebaReferencePmeMultipoleForce() : ...@@ -4809,16 +4813,12 @@ AmoebaReferencePmeMultipoleForce::AmoebaReferencePmeMultipoleForce() :
_pmeGridSize(0), _totalGridSize(0), _alphaEwald(0.0) _pmeGridSize(0), _totalGridSize(0), _alphaEwald(0.0)
{ {
_fftplan = NULL;
_pmeGrid = NULL; _pmeGrid = NULL;
_pmeGridDimensions = IntVec(-1, -1, -1); _pmeGridDimensions = IntVec(-1, -1, -1);
} }
AmoebaReferencePmeMultipoleForce::~AmoebaReferencePmeMultipoleForce() AmoebaReferencePmeMultipoleForce::~AmoebaReferencePmeMultipoleForce()
{ {
if (_fftplan) {
fftpack_destroy(_fftplan);
}
if (_pmeGrid) { if (_pmeGrid) {
delete[] _pmeGrid; delete[] _pmeGrid;
} }
...@@ -4863,11 +4863,6 @@ void AmoebaReferencePmeMultipoleForce::setPmeGridDimensions(vector<int>& pmeGrid ...@@ -4863,11 +4863,6 @@ void AmoebaReferencePmeMultipoleForce::setPmeGridDimensions(vector<int>& pmeGrid
(pmeGridDimensions[2] == _pmeGridDimensions[2])) (pmeGridDimensions[2] == _pmeGridDimensions[2]))
return; return;
if (_fftplan) {
fftpack_destroy(_fftplan);
}
fftpack_init_3d(&_fftplan,pmeGridDimensions[0], pmeGridDimensions[1], pmeGridDimensions[2]);
_pmeGridDimensions[0] = pmeGridDimensions[0]; _pmeGridDimensions[0] = pmeGridDimensions[0];
_pmeGridDimensions[1] = pmeGridDimensions[1]; _pmeGridDimensions[1] = pmeGridDimensions[1];
_pmeGridDimensions[2] = pmeGridDimensions[2]; _pmeGridDimensions[2] = pmeGridDimensions[2];
...@@ -4908,7 +4903,7 @@ void AmoebaReferencePmeMultipoleForce::resizePmeArrays() ...@@ -4908,7 +4903,7 @@ void AmoebaReferencePmeMultipoleForce::resizePmeArrays()
if (_pmeGrid) { if (_pmeGrid) {
delete[] _pmeGrid; delete[] _pmeGrid;
} }
_pmeGrid = new t_complex[_totalGridSize]; _pmeGrid = new complex<double>[_totalGridSize];
_pmeGridSize = _totalGridSize; _pmeGridSize = _totalGridSize;
} }
...@@ -4930,7 +4925,7 @@ void AmoebaReferencePmeMultipoleForce::initializePmeGrid() ...@@ -4930,7 +4925,7 @@ void AmoebaReferencePmeMultipoleForce::initializePmeGrid()
return; return;
for (int jj = 0; jj < _totalGridSize; jj++) for (int jj = 0; jj < _totalGridSize; jj++)
_pmeGrid[jj].re = _pmeGrid[jj].im = 0.0; _pmeGrid[jj] = complex<double>(0, 0);
} }
void AmoebaReferencePmeMultipoleForce::getPeriodicDelta(Vec3& deltaR) const void AmoebaReferencePmeMultipoleForce::getPeriodicDelta(Vec3& deltaR) const
...@@ -5175,9 +5170,14 @@ void AmoebaReferencePmeMultipoleForce::calculateFixedMultipoleField(const vector ...@@ -5175,9 +5170,14 @@ void AmoebaReferencePmeMultipoleForce::calculateFixedMultipoleField(const vector
computeAmoebaBsplines(particleData); computeAmoebaBsplines(particleData);
initializePmeGrid(); initializePmeGrid();
spreadFixedMultipolesOntoGrid(particleData); spreadFixedMultipolesOntoGrid(particleData);
fftpack_exec_3d(_fftplan, FFTPACK_FORWARD, _pmeGrid, _pmeGrid); vector<size_t> shape = {(size_t) _pmeGridDimensions[0], (size_t) _pmeGridDimensions[1], (size_t) _pmeGridDimensions[2]};
vector<size_t> axes = {0, 1, 2};
vector<ptrdiff_t> stride = {(ptrdiff_t) (_pmeGridDimensions[1]*_pmeGridDimensions[2]*sizeof(complex<double>)),
(ptrdiff_t) (_pmeGridDimensions[2]*sizeof(complex<double>)),
(ptrdiff_t) sizeof(complex<double>)};
pocketfft::c2c(shape, stride, stride, axes, true, _pmeGrid, _pmeGrid, 1.0, 0);
performAmoebaReciprocalConvolution(); performAmoebaReciprocalConvolution();
fftpack_exec_3d(_fftplan, FFTPACK_BACKWARD, _pmeGrid, _pmeGrid); pocketfft::c2c(shape, stride, stride, axes, false, _pmeGrid, _pmeGrid, 1.0, 0);
computeFixedPotentialFromGrid(); computeFixedPotentialFromGrid();
recordFixedMultipoleField(); recordFixedMultipoleField();
...@@ -5385,7 +5385,7 @@ void AmoebaReferencePmeMultipoleForce::spreadFixedMultipolesOntoGrid(const vecto ...@@ -5385,7 +5385,7 @@ void AmoebaReferencePmeMultipoleForce::spreadFixedMultipolesOntoGrid(const vecto
// Clear the grid. // Clear the grid.
for (int gridIndex = 0; gridIndex < _totalGridSize; gridIndex++) for (int gridIndex = 0; gridIndex < _totalGridSize; gridIndex++)
_pmeGrid[gridIndex] = t_complex(0, 0); _pmeGrid[gridIndex] = complex<double>(0, 0);
// Loop over atoms and spread them on the grid. // Loop over atoms and spread them on the grid.
...@@ -5414,8 +5414,8 @@ void AmoebaReferencePmeMultipoleForce::spreadFixedMultipolesOntoGrid(const vecto ...@@ -5414,8 +5414,8 @@ void AmoebaReferencePmeMultipoleForce::spreadFixedMultipolesOntoGrid(const vecto
for (int iz = 0; iz < AMOEBA_PME_ORDER; iz++) { for (int iz = 0; iz < AMOEBA_PME_ORDER; iz++) {
int z = (gridPoint[2]+iz) % _pmeGridDimensions[2]; int z = (gridPoint[2]+iz) % _pmeGridDimensions[2];
double4 v = _thetai[2][atomIndex*AMOEBA_PME_ORDER+iz]; double4 v = _thetai[2][atomIndex*AMOEBA_PME_ORDER+iz];
t_complex& gridValue = _pmeGrid[x*_pmeGridDimensions[1]*_pmeGridDimensions[2]+y*_pmeGridDimensions[2]+z]; complex<double>& gridValue = _pmeGrid[x*_pmeGridDimensions[1]*_pmeGridDimensions[2]+y*_pmeGridDimensions[2]+z];
gridValue.re += term0*v[0] + term1*v[1] + term2*v[2]; gridValue += term0*v[0] + term1*v[1] + term2*v[2];
} }
} }
} }
...@@ -5436,7 +5436,7 @@ void AmoebaReferencePmeMultipoleForce::performAmoebaReciprocalConvolution() ...@@ -5436,7 +5436,7 @@ void AmoebaReferencePmeMultipoleForce::performAmoebaReciprocalConvolution()
int kz = remainder-ky*_pmeGridDimensions[2]; int kz = remainder-ky*_pmeGridDimensions[2];
if (kx == 0 && ky == 0 && kz == 0) { if (kx == 0 && ky == 0 && kz == 0) {
_pmeGrid[index].re = _pmeGrid[index].im = 0.0; _pmeGrid[index] = complex<double>(0, 0);
continue; continue;
} }
...@@ -5456,8 +5456,7 @@ void AmoebaReferencePmeMultipoleForce::performAmoebaReciprocalConvolution() ...@@ -5456,8 +5456,7 @@ void AmoebaReferencePmeMultipoleForce::performAmoebaReciprocalConvolution()
double denom = m2*bx*by*bz; double denom = m2*bx*by*bz;
double eterm = scaleFactor*exp(-expFactor*m2)/denom; double eterm = scaleFactor*exp(-expFactor*m2)/denom;
_pmeGrid[index].re *= eterm; _pmeGrid[index] *= eterm;
_pmeGrid[index].im *= eterm;
} }
} }
...@@ -5507,7 +5506,7 @@ void AmoebaReferencePmeMultipoleForce::computeFixedPotentialFromGrid() ...@@ -5507,7 +5506,7 @@ void AmoebaReferencePmeMultipoleForce::computeFixedPotentialFromGrid()
for (int ix = 0; ix < AMOEBA_PME_ORDER; ix++) { for (int ix = 0; ix < AMOEBA_PME_ORDER; ix++) {
int i = gridPoint[0]+ix-(gridPoint[0]+ix >= _pmeGridDimensions[0] ? _pmeGridDimensions[0] : 0); int i = gridPoint[0]+ix-(gridPoint[0]+ix >= _pmeGridDimensions[0] ? _pmeGridDimensions[0] : 0);
int gridIndex = i*_pmeGridDimensions[1]*_pmeGridDimensions[2] + j*_pmeGridDimensions[2] + k; int gridIndex = i*_pmeGridDimensions[1]*_pmeGridDimensions[2] + j*_pmeGridDimensions[2] + k;
double tq = _pmeGrid[gridIndex].re; double tq = _pmeGrid[gridIndex].real();
double4 tadd = _thetai[0][m*AMOEBA_PME_ORDER+ix]; double4 tadd = _thetai[0][m*AMOEBA_PME_ORDER+ix];
t[0] += tq*tadd[0]; t[0] += tq*tadd[0];
t[1] += tq*tadd[1]; t[1] += tq*tadd[1];
...@@ -5581,7 +5580,7 @@ void AmoebaReferencePmeMultipoleForce::spreadInducedDipolesOnGrid(const vector<V ...@@ -5581,7 +5580,7 @@ void AmoebaReferencePmeMultipoleForce::spreadInducedDipolesOnGrid(const vector<V
// Clear the grid. // Clear the grid.
for (int gridIndex = 0; gridIndex < _totalGridSize; gridIndex++) for (int gridIndex = 0; gridIndex < _totalGridSize; gridIndex++)
_pmeGrid[gridIndex] = t_complex(0, 0); _pmeGrid[gridIndex] = complex<double>(0, 0);
// Loop over atoms and spread them on the grid. // Loop over atoms and spread them on the grid.
...@@ -5606,9 +5605,8 @@ void AmoebaReferencePmeMultipoleForce::spreadInducedDipolesOnGrid(const vector<V ...@@ -5606,9 +5605,8 @@ void AmoebaReferencePmeMultipoleForce::spreadInducedDipolesOnGrid(const vector<V
for (int iz = 0; iz < AMOEBA_PME_ORDER; iz++) { for (int iz = 0; iz < AMOEBA_PME_ORDER; iz++) {
int z = (gridPoint[2]+iz) % _pmeGridDimensions[2]; int z = (gridPoint[2]+iz) % _pmeGridDimensions[2];
double4 v = _thetai[2][atomIndex*AMOEBA_PME_ORDER+iz]; double4 v = _thetai[2][atomIndex*AMOEBA_PME_ORDER+iz];
t_complex& gridValue = _pmeGrid[x*_pmeGridDimensions[1]*_pmeGridDimensions[2]+y*_pmeGridDimensions[2]+z]; complex<double>& gridValue = _pmeGrid[x*_pmeGridDimensions[1]*_pmeGridDimensions[2]+y*_pmeGridDimensions[2]+z];
gridValue.re += term01*v[0] + term11*v[1]; gridValue += complex<double>(term01*v[0] + term11*v[1], term02*v[0] + term12*v[1]);
gridValue.im += term02*v[0] + term12*v[1];
} }
} }
} }
...@@ -5697,15 +5695,15 @@ void AmoebaReferencePmeMultipoleForce::computeInducedPotentialFromGrid() ...@@ -5697,15 +5695,15 @@ void AmoebaReferencePmeMultipoleForce::computeInducedPotentialFromGrid()
for (int ix = 0; ix < AMOEBA_PME_ORDER; ix++) { for (int ix = 0; ix < AMOEBA_PME_ORDER; ix++) {
int i = gridPoint[0]+ix-(gridPoint[0]+ix >= _pmeGridDimensions[0] ? _pmeGridDimensions[0] : 0); int i = gridPoint[0]+ix-(gridPoint[0]+ix >= _pmeGridDimensions[0] ? _pmeGridDimensions[0] : 0);
int gridIndex = i*_pmeGridDimensions[1]*_pmeGridDimensions[2] + j*_pmeGridDimensions[2] + k; int gridIndex = i*_pmeGridDimensions[1]*_pmeGridDimensions[2] + j*_pmeGridDimensions[2] + k;
t_complex tq = _pmeGrid[gridIndex]; complex<double> tq = _pmeGrid[gridIndex];
double4 tadd = _thetai[0][m*AMOEBA_PME_ORDER+ix]; double4 tadd = _thetai[0][m*AMOEBA_PME_ORDER+ix];
t0_1 += tq.re*tadd[0]; t0_1 += tq.real()*tadd[0];
t1_1 += tq.re*tadd[1]; t1_1 += tq.real()*tadd[1];
t2_1 += tq.re*tadd[2]; t2_1 += tq.real()*tadd[2];
t0_2 += tq.im*tadd[0]; t0_2 += tq.imag()*tadd[0];
t1_2 += tq.im*tadd[1]; t1_2 += tq.imag()*tadd[1];
t2_2 += tq.im*tadd[2]; t2_2 += tq.imag()*tadd[2];
t3 += (tq.re+tq.im)*tadd[3]; t3 += (tq.real()+tq.imag())*tadd[3];
} }
tu00_1 += t0_1*u[0]; tu00_1 += t0_1*u[0];
tu10_1 += t1_1*u[0]; tu10_1 += t1_1*u[0];
...@@ -6049,9 +6047,14 @@ void AmoebaReferencePmeMultipoleForce::calculateReciprocalSpaceInducedDipoleFiel ...@@ -6049,9 +6047,14 @@ void AmoebaReferencePmeMultipoleForce::calculateReciprocalSpaceInducedDipoleFiel
initializePmeGrid(); initializePmeGrid();
spreadInducedDipolesOnGrid(*updateInducedDipoleFields[0].inducedDipoles, *updateInducedDipoleFields[1].inducedDipoles); spreadInducedDipolesOnGrid(*updateInducedDipoleFields[0].inducedDipoles, *updateInducedDipoleFields[1].inducedDipoles);
fftpack_exec_3d(_fftplan, FFTPACK_FORWARD, _pmeGrid, _pmeGrid); vector<size_t> shape = {(size_t) _pmeGridDimensions[0], (size_t) _pmeGridDimensions[1], (size_t) _pmeGridDimensions[2]};
vector<size_t> axes = {0, 1, 2};
vector<ptrdiff_t> stride = {(ptrdiff_t) (_pmeGridDimensions[1]*_pmeGridDimensions[2]*sizeof(complex<double>)),
(ptrdiff_t) (_pmeGridDimensions[2]*sizeof(complex<double>)),
(ptrdiff_t) sizeof(complex<double>)};
pocketfft::c2c(shape, stride, stride, axes, true, _pmeGrid, _pmeGrid, 1.0, 0);
performAmoebaReciprocalConvolution(); performAmoebaReciprocalConvolution();
fftpack_exec_3d(_fftplan, FFTPACK_BACKWARD, _pmeGrid, _pmeGrid); pocketfft::c2c(shape, stride, stride, axes, false, _pmeGrid, _pmeGrid, 1.0, 0);
computeInducedPotentialFromGrid(); computeInducedPotentialFromGrid();
recordInducedDipoleField(updateInducedDipoleFields[0].inducedDipoleField, updateInducedDipoleFields[1].inducedDipoleField); recordInducedDipoleField(updateInducedDipoleFields[0].inducedDipoleField, updateInducedDipoleFields[1].inducedDipoleField);
} }
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios. /* Portions copyright (c) 2006-2022 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#include "openmm/Vec3.h" #include "openmm/Vec3.h"
#include "AmoebaReferenceGeneralizedKirkwoodForce.h" #include "AmoebaReferenceGeneralizedKirkwoodForce.h"
#include <map> #include <map>
#include "fftpack.h"
#include <complex> #include <complex>
namespace OpenMM { namespace OpenMM {
...@@ -1474,10 +1473,8 @@ private: ...@@ -1474,10 +1473,8 @@ private:
int _totalGridSize; int _totalGridSize;
IntVec _pmeGridDimensions; IntVec _pmeGridDimensions;
fftpack_t _fftplan;
unsigned int _pmeGridSize; unsigned int _pmeGridSize;
t_complex* _pmeGrid; std::complex<double>* _pmeGrid;
std::vector<double> _pmeBsplineModuli[3]; std::vector<double> _pmeBsplineModuli[3];
std::vector<double4> _thetai[3]; std::vector<double4> _thetai[3];
......
...@@ -64,17 +64,14 @@ IF(NOT MSVC) ...@@ -64,17 +64,14 @@ IF(NOT MSVC)
ENDIF() ENDIF()
ENDIF() ENDIF()
# Include FFTW related files. # Include PocketFFT.
INCLUDE_DIRECTORIES(${FFTW_INCLUDES}) INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/libraries/include/pocketfft")
# Build the shared plugin library. # Build the shared plugin library.
IF (OPENMM_BUILD_SHARED_LIB) IF (OPENMM_BUILD_SHARED_LIB)
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_INCLUDE_FILES}) ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_INCLUDE_FILES})
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME} ${PTHREADS_LIB} ${FFTW_LIBRARY}) TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME} ${PTHREADS_LIB})
IF (FFTW_THREADS_LIBRARY)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${FFTW_THREADS_LIBRARY})
ENDIF (FFTW_THREADS_LIBRARY)
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_PME_BUILDING_SHARED_LIBRARY") SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_PME_BUILDING_SHARED_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET}) INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
...@@ -84,10 +81,7 @@ ENDIF (OPENMM_BUILD_SHARED_LIB) ...@@ -84,10 +81,7 @@ ENDIF (OPENMM_BUILD_SHARED_LIB)
IF(OPENMM_BUILD_STATIC_LIB) IF(OPENMM_BUILD_STATIC_LIB)
ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_INCLUDE_FILES}) ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_INCLUDE_FILES})
TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${OPENMM_LIBRARY_NAME}_static ${PTHREADS_LIB} ${FFTW_LIBRARY}) TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${OPENMM_LIBRARY_NAME}_static ${PTHREADS_LIB})
IF (FFTW_THREADS_LIBRARY)
TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${FFTW_THREADS_LIBRARY})
ENDIF (FFTW_THREADS_LIBRARY)
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_PME_BUILDING_STATIC_LIBRARY") SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_PME_BUILDING_STATIC_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${STATIC_TARGET}) INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${STATIC_TARGET})
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. * * Portions copyright (c) 2013-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -32,11 +32,16 @@ ...@@ -32,11 +32,16 @@
#ifdef WIN32 #ifdef WIN32
#define _USE_MATH_DEFINES // Needed to get M_PI #define _USE_MATH_DEFINES // Needed to get M_PI
#endif #endif
#ifdef _MSC_VER
#define POCKETFFT_NO_VECTORS
#endif
#define POCKETFFT_CACHE_SIZE 4
#include "CpuPmeKernels.h" #include "CpuPmeKernels.h"
#include "SimTKOpenMMRealType.h" #include "SimTKOpenMMRealType.h"
#include "openmm/internal/hardware.h" #include "openmm/internal/hardware.h"
#include "openmm/internal/vectorize.h" #include "openmm/internal/vectorize.h"
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
#include "pocketfft_hdronly.h"
#include <cmath> #include <cmath>
#include <algorithm> #include <algorithm>
#include <cstring> #include <cstring>
...@@ -51,7 +56,7 @@ static const int PME_ORDER = 5; ...@@ -51,7 +56,7 @@ static const int PME_ORDER = 5;
bool CpuCalcDispersionPmeReciprocalForceKernel::hasInitializedThreads = false; bool CpuCalcDispersionPmeReciprocalForceKernel::hasInitializedThreads = false;
int CpuCalcDispersionPmeReciprocalForceKernel::numThreads = 0; int CpuCalcDispersionPmeReciprocalForceKernel::numThreads = 0;
static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors, static void spreadCharge(float* posq, vector<float>& grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors,
atomic<int>& atomicCounter, const float epsilonFactor, int threadIndex, int numThreads, bool deterministic) { atomic<int>& atomicCounter, const float epsilonFactor, int threadIndex, int numThreads, bool deterministic) {
float temp[4]; float temp[4];
fvec4 boxSize((float) periodicBoxVectors[0][0], (float) periodicBoxVectors[1][1], (float) periodicBoxVectors[2][2], 0); fvec4 boxSize((float) periodicBoxVectors[0][0], (float) periodicBoxVectors[1][1], (float) periodicBoxVectors[2][2], 0);
...@@ -64,7 +69,7 @@ static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gri ...@@ -64,7 +69,7 @@ static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gri
fvec4 one(1); fvec4 one(1);
fvec4 scale(1.0f/(PME_ORDER-1)); fvec4 scale(1.0f/(PME_ORDER-1));
float posInBox[4] = {0,0,0,0}; float posInBox[4] = {0,0,0,0};
memset(grid, 0, sizeof(float)*gridx*gridy*gridz); memset(grid.data(), 0, sizeof(float)*gridx*gridy*gridz);
const int groupSize = max(1, numParticles / (10 * numThreads)); const int groupSize = max(1, numParticles / (10 * numThreads));
int start = groupSize * threadIndex; int start = groupSize * threadIndex;
...@@ -244,7 +249,7 @@ static void computeReciprocalEterm(int start, int end, int gridx, int gridy, int ...@@ -244,7 +249,7 @@ static void computeReciprocalEterm(int start, int end, int gridx, int gridy, int
} }
} }
static double reciprocalEnergy(int start, int end, fftwf_complex* grid, vector<float>& recipEterm, int gridx, int gridy, int gridz, double alpha, vector<float>* bsplineModuli, Vec3* periodicBoxVectors, Vec3* recipBoxVectors) { static double reciprocalEnergy(int start, int end, vector<complex<float> >& grid, vector<float>& recipEterm, int gridx, int gridy, int gridz, double alpha, vector<float>* bsplineModuli, Vec3* periodicBoxVectors, Vec3* recipBoxVectors) {
const unsigned int zsizeHalf = gridz/2+1; const unsigned int zsizeHalf = gridz/2+1;
const unsigned int yzsizeHalf = gridy*zsizeHalf; const unsigned int yzsizeHalf = gridy*zsizeHalf;
...@@ -266,8 +271,8 @@ static double reciprocalEnergy(int start, int end, fftwf_complex* grid, vector<f ...@@ -266,8 +271,8 @@ static double reciprocalEnergy(int start, int end, fftwf_complex* grid, vector<f
kz1 = kz; kz1 = kz;
} }
int index = kx1*yzsizeHalf + ky1*zsizeHalf + kz1; int index = kx1*yzsizeHalf + ky1*zsizeHalf + kz1;
float gridReal = grid[index][0]; float gridReal = grid[index].real();
float gridImag = grid[index][1]; float gridImag = grid[index].imag();
energy += recipEterm[index]*(gridReal*gridReal+gridImag*gridImag); energy += recipEterm[index]*(gridReal*gridReal+gridImag*gridImag);
} }
firstz = 0; firstz = 0;
...@@ -277,7 +282,7 @@ static double reciprocalEnergy(int start, int end, fftwf_complex* grid, vector<f ...@@ -277,7 +282,7 @@ static double reciprocalEnergy(int start, int end, fftwf_complex* grid, vector<f
} }
static double reciprocalDispersionEnergy(int start, int end, fftwf_complex* grid, const vector<float>& recipEterm, int gridx, int gridy, int gridz, double alpha, vector<float>* bsplineModuli, Vec3* periodicBoxVectors, Vec3* recipBoxVectors) { static double reciprocalDispersionEnergy(int start, int end, vector<complex<float> >& grid, const vector<float>& recipEterm, int gridx, int gridy, int gridz, double alpha, vector<float>* bsplineModuli, Vec3* periodicBoxVectors, Vec3* recipBoxVectors) {
const unsigned int zsizeHalf = gridz/2+1; const unsigned int zsizeHalf = gridz/2+1;
const unsigned int yzsizeHalf = gridy*zsizeHalf; const unsigned int yzsizeHalf = gridy*zsizeHalf;
...@@ -298,8 +303,8 @@ static double reciprocalDispersionEnergy(int start, int end, fftwf_complex* grid ...@@ -298,8 +303,8 @@ static double reciprocalDispersionEnergy(int start, int end, fftwf_complex* grid
kz1 = kz; kz1 = kz;
} }
int index = kx1*yzsizeHalf + ky1*zsizeHalf + kz1; int index = kx1*yzsizeHalf + ky1*zsizeHalf + kz1;
float gridReal = grid[index][0]; float gridReal = grid[index].real();
float gridImag = grid[index][1]; float gridImag = grid[index].imag();
energy += recipEterm[index]*(gridReal*gridReal+gridImag*gridImag); energy += recipEterm[index]*(gridReal*gridReal+gridImag*gridImag);
} }
} }
...@@ -308,15 +313,14 @@ static double reciprocalDispersionEnergy(int start, int end, fftwf_complex* grid ...@@ -308,15 +313,14 @@ static double reciprocalDispersionEnergy(int start, int end, fftwf_complex* grid
} }
static void reciprocalConvolution(int start, int end, fftwf_complex* grid, vector<float>& recipEterm) { static void reciprocalConvolution(int start, int end, vector<complex<float> >& grid, vector<float>& recipEterm) {
for (int index = start; index < end; index++) { for (int index = start; index < end; index++) {
float eterm = recipEterm[index]; float eterm = recipEterm[index];
grid[index][0] *= eterm; grid[index] *= eterm;
grid[index][1] *= eterm;
} }
} }
static void interpolateForces(float* posq, float* force, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors, atomic<int>& atomicCounter, const float epsilonFactor, int numThreads) { static void interpolateForces(float* posq, vector<float>& force, vector<float>& grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors, atomic<int>& atomicCounter, const float epsilonFactor, int numThreads) {
fvec4 boxSize((float) periodicBoxVectors[0][0], (float) periodicBoxVectors[1][1], (float) periodicBoxVectors[2][2], 0); fvec4 boxSize((float) periodicBoxVectors[0][0], (float) periodicBoxVectors[1][1], (float) periodicBoxVectors[2][2], 0);
fvec4 invBoxSize((float) recipBoxVectors[0][0], (float) recipBoxVectors[1][1], (float) recipBoxVectors[2][2], 0); fvec4 invBoxSize((float) recipBoxVectors[0][0], (float) recipBoxVectors[1][1], (float) recipBoxVectors[2][2], 0);
fvec4 recipBoxVec0((float) recipBoxVectors[0][0], (float) recipBoxVectors[0][1], (float) recipBoxVectors[0][2], 0); fvec4 recipBoxVec0((float) recipBoxVectors[0][0], (float) recipBoxVectors[0][1], (float) recipBoxVectors[0][2], 0);
...@@ -429,13 +433,24 @@ void CpuCalcPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize ...@@ -429,13 +433,24 @@ void CpuCalcPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize
char* threadsEnv = getenv("OPENMM_CPU_THREADS"); char* threadsEnv = getenv("OPENMM_CPU_THREADS");
if (threadsEnv != NULL) if (threadsEnv != NULL)
stringstream(threadsEnv) >> numThreads; stringstream(threadsEnv) >> numThreads;
fftwf_init_threads();
hasInitializedThreads = true; hasInitializedThreads = true;
} }
threadEnergy.resize(numThreads); threadEnergy.resize(numThreads);
gridx = findFFTDimension(xsize, false); gridx = findFFTDimension(xsize);
gridy = findFFTDimension(ysize, false); gridy = findFFTDimension(ysize);
gridz = findFFTDimension(zsize, true); gridz = findFFTDimension(zsize);
gridShape.push_back(gridx);
gridShape.push_back(gridy);
gridShape.push_back(gridz);
fftAxes.push_back(0);
fftAxes.push_back(1);
fftAxes.push_back(2);
realGridStride.push_back(gridy*gridz*sizeof(float));
realGridStride.push_back(gridz*sizeof(float));
realGridStride.push_back(sizeof(float));
complexGridStride.push_back(gridy*(gridz/2+1)*sizeof(complex<float>));
complexGridStride.push_back((gridz/2+1)*sizeof(complex<float>));
complexGridStride.push_back(sizeof(complex<float>));
this->numParticles = numParticles; this->numParticles = numParticles;
this->alpha = alpha; this->alpha = alpha;
this->deterministic = deterministic; this->deterministic = deterministic;
...@@ -457,16 +472,10 @@ void CpuCalcPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize ...@@ -457,16 +472,10 @@ void CpuCalcPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize
pthread_cond_wait(&endCondition, &lock); pthread_cond_wait(&endCondition, &lock);
pthread_mutex_unlock(&lock); pthread_mutex_unlock(&lock);
// Initialize FFTW. // Initialize the FFT grids.
for (int i = 0; i < numThreads; i++) realGrids.resize(numThreads, vector<float>(gridx*gridy*gridz+3));
tempGrid.push_back((float*) fftwf_malloc(sizeof(float)*(gridx*gridy*gridz+3))); complexGrid.resize(gridx*gridy*(gridz/2+1));
realGrid = tempGrid[0];
complexGrid = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)*gridx*gridy*(gridz/2+1));
fftwf_plan_with_nthreads(numThreads);
forwardFFT = fftwf_plan_dft_r2c_3d(gridx, gridy, gridz, realGrid, complexGrid, FFTW_MEASURE);
backwardFFT = fftwf_plan_dft_c2r_3d(gridx, gridy, gridz, complexGrid, realGrid, FFTW_MEASURE);
hasCreatedPlan = true;
// Initialize the b-spline moduli. // Initialize the b-spline moduli.
...@@ -533,14 +542,6 @@ CpuCalcPmeReciprocalForceKernel::~CpuCalcPmeReciprocalForceKernel() { ...@@ -533,14 +542,6 @@ CpuCalcPmeReciprocalForceKernel::~CpuCalcPmeReciprocalForceKernel() {
pthread_mutex_destroy(&lock); pthread_mutex_destroy(&lock);
pthread_cond_destroy(&startCondition); pthread_cond_destroy(&startCondition);
pthread_cond_destroy(&endCondition); pthread_cond_destroy(&endCondition);
for (auto grid : tempGrid)
fftwf_free(grid);
if (complexGrid != NULL)
fftwf_free(complexGrid);
if (hasCreatedPlan) {
fftwf_destroy_plan(forwardFFT);
fftwf_destroy_plan(backwardFFT);
}
} }
void CpuCalcPmeReciprocalForceKernel::runMainThread() { void CpuCalcPmeReciprocalForceKernel::runMainThread() {
...@@ -562,7 +563,7 @@ void CpuCalcPmeReciprocalForceKernel::runMainThread() { ...@@ -562,7 +563,7 @@ void CpuCalcPmeReciprocalForceKernel::runMainThread() {
threads.waitForThreads(); threads.waitForThreads();
threads.resumeThreads(); // Signal threads to sum the charge grids. threads.resumeThreads(); // Signal threads to sum the charge grids.
threads.waitForThreads(); threads.waitForThreads();
fftwf_execute_dft_r2c(forwardFFT, realGrid, complexGrid); pocketfft::r2c(gridShape, realGridStride, complexGridStride, fftAxes, true, realGrids[0].data(), complexGrid.data(), 1.0f, 0);
if (lastBoxVectors[0] != periodicBoxVectors[0] || lastBoxVectors[1] != periodicBoxVectors[1] || lastBoxVectors[2] != periodicBoxVectors[2]) { if (lastBoxVectors[0] != periodicBoxVectors[0] || lastBoxVectors[1] != periodicBoxVectors[1] || lastBoxVectors[2] != periodicBoxVectors[2]) {
threads.resumeThreads(); // Signal threads to compute the reciprocal scale factors. threads.resumeThreads(); // Signal threads to compute the reciprocal scale factors.
threads.waitForThreads(); threads.waitForThreads();
...@@ -575,7 +576,7 @@ void CpuCalcPmeReciprocalForceKernel::runMainThread() { ...@@ -575,7 +576,7 @@ void CpuCalcPmeReciprocalForceKernel::runMainThread() {
} }
threads.resumeThreads(); // Signal threads to perform reciprocal convolution. threads.resumeThreads(); // Signal threads to perform reciprocal convolution.
threads.waitForThreads(); threads.waitForThreads();
fftwf_execute_dft_c2r(backwardFFT, complexGrid, realGrid); pocketfft::c2r(gridShape, complexGridStride, realGridStride, fftAxes, false, complexGrid.data(), realGrids[0].data(), 1.0f, 0);
atomicCounter = 0; atomicCounter = 0;
threads.resumeThreads(); // Signal threads to interpolate forces. threads.resumeThreads(); // Signal threads to interpolate forces.
threads.waitForThreads(); threads.waitForThreads();
...@@ -598,14 +599,14 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i ...@@ -598,14 +599,14 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i
int complexStart = std::max(1, ((index*complexSize)/numThreads)); int complexStart = std::max(1, ((index*complexSize)/numThreads));
int complexEnd = (((index+1)*complexSize)/numThreads); int complexEnd = (((index+1)*complexSize)/numThreads);
const float epsilonFactor = sqrt(ONE_4PI_EPS0); const float epsilonFactor = sqrt(ONE_4PI_EPS0);
spreadCharge(posq, tempGrid[index], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor, index, numThreads, deterministic); spreadCharge(posq, realGrids[index], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor, index, numThreads, deterministic);
threads.syncThreads(); threads.syncThreads();
int numGrids = tempGrid.size(); int numGrids = realGrids.size();
for (int i = gridStart; i < gridEnd; i += 4) { for (int i = gridStart; i < gridEnd; i += 4) {
fvec4 sum(&realGrid[i]); fvec4 sum(&realGrids[0][i]);
for (int j = 1; j < numGrids; j++) for (int j = 1; j < numGrids; j++)
sum += fvec4(&tempGrid[j][i]); sum += fvec4(&realGrids[j][i]);
sum.store(&realGrid[i]); sum.store(&realGrids[0][i]);
} }
threads.syncThreads(); threads.syncThreads();
if (lastBoxVectors[0] != periodicBoxVectors[0] || lastBoxVectors[1] != periodicBoxVectors[1] || lastBoxVectors[2] != periodicBoxVectors[2]) { if (lastBoxVectors[0] != periodicBoxVectors[0] || lastBoxVectors[1] != periodicBoxVectors[1] || lastBoxVectors[2] != periodicBoxVectors[2]) {
...@@ -618,7 +619,7 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i ...@@ -618,7 +619,7 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i
} }
reciprocalConvolution(complexStart, complexEnd, complexGrid, recipEterm); reciprocalConvolution(complexStart, complexEnd, complexGrid, recipEterm);
threads.syncThreads(); threads.syncThreads();
interpolateForces(posq, &force[0], realGrid, gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor, numThreads); interpolateForces(posq, force, realGrids[0], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor, numThreads);
} }
void CpuCalcPmeReciprocalForceKernel::beginComputation(IO& io, const Vec3* periodicBoxVectors, bool includeEnergy) { void CpuCalcPmeReciprocalForceKernel::beginComputation(IO& io, const Vec3* periodicBoxVectors, bool includeEnergy) {
...@@ -666,24 +667,18 @@ void CpuCalcPmeReciprocalForceKernel::getPMEParameters(double& alpha, int& nx, i ...@@ -666,24 +667,18 @@ void CpuCalcPmeReciprocalForceKernel::getPMEParameters(double& alpha, int& nx, i
nz = gridz; nz = gridz;
} }
int CpuCalcPmeReciprocalForceKernel::findFFTDimension(int minimum, bool isZ) { int CpuCalcPmeReciprocalForceKernel::findFFTDimension(int minimum) {
if (minimum < 1) if (minimum < 1)
return 1; return 1;
while (true) { while (true) {
// Attempt to factor the current value. // Attempt to factor the current value.
if (isZ && minimum%2 == 1) {
// Force the last dimension to be even, since this produces better performance in FFTW.
minimum++;
continue;
}
int unfactored = minimum; int unfactored = minimum;
for (int factor = 2; factor < 8; factor++) { for (int factor = 2; factor < 9; factor++) {
while (unfactored > 1 && unfactored%factor == 0) while (unfactored > 1 && unfactored%factor == 0)
unfactored /= factor; unfactored /= factor;
} }
if (unfactored == 1 || unfactored == 11 || unfactored == 13) if (unfactored == 1 || unfactored == 11)
return minimum; return minimum;
minimum++; minimum++;
} }
...@@ -720,13 +715,24 @@ void CpuCalcDispersionPmeReciprocalForceKernel::initialize(int xsize, int ysize, ...@@ -720,13 +715,24 @@ void CpuCalcDispersionPmeReciprocalForceKernel::initialize(int xsize, int ysize,
char* threadsEnv = getenv("OPENMM_CPU_THREADS"); char* threadsEnv = getenv("OPENMM_CPU_THREADS");
if (threadsEnv != NULL) if (threadsEnv != NULL)
stringstream(threadsEnv) >> numThreads; stringstream(threadsEnv) >> numThreads;
fftwf_init_threads();
hasInitializedThreads = true; hasInitializedThreads = true;
} }
threadEnergy.resize(numThreads); threadEnergy.resize(numThreads);
gridx = findFFTDimension(xsize, false); gridx = findFFTDimension(xsize);
gridy = findFFTDimension(ysize, false); gridy = findFFTDimension(ysize);
gridz = findFFTDimension(zsize, true); gridz = findFFTDimension(zsize);
gridShape.push_back(gridx);
gridShape.push_back(gridy);
gridShape.push_back(gridz);
fftAxes.push_back(0);
fftAxes.push_back(1);
fftAxes.push_back(2);
realGridStride.push_back(gridy*gridz*sizeof(float));
realGridStride.push_back(gridz*sizeof(float));
realGridStride.push_back(sizeof(float));
complexGridStride.push_back(gridy*(gridz/2+1)*sizeof(complex<float>));
complexGridStride.push_back((gridz/2+1)*sizeof(complex<float>));
complexGridStride.push_back(sizeof(complex<float>));
this->numParticles = numParticles; this->numParticles = numParticles;
this->alpha = alpha; this->alpha = alpha;
this->deterministic = deterministic; this->deterministic = deterministic;
...@@ -748,16 +754,11 @@ void CpuCalcDispersionPmeReciprocalForceKernel::initialize(int xsize, int ysize, ...@@ -748,16 +754,11 @@ void CpuCalcDispersionPmeReciprocalForceKernel::initialize(int xsize, int ysize,
pthread_cond_wait(&endCondition, &lock); pthread_cond_wait(&endCondition, &lock);
pthread_mutex_unlock(&lock); pthread_mutex_unlock(&lock);
// Initialize FFTW.
// Initialize the FFT grids.
for (int i = 0; i < numThreads; i++)
tempGrid.push_back((float*) fftwf_malloc(sizeof(float)*(gridx*gridy*gridz+3))); realGrids.resize(numThreads, vector<float>(gridx*gridy*gridz+3));
realGrid = tempGrid[0]; complexGrid.resize(gridx*gridy*(gridz/2+1));
complexGrid = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)*gridx*gridy*(gridz/2+1));
fftwf_plan_with_nthreads(numThreads);
forwardFFT = fftwf_plan_dft_r2c_3d(gridx, gridy, gridz, realGrid, complexGrid, FFTW_MEASURE);
backwardFFT = fftwf_plan_dft_c2r_3d(gridx, gridy, gridz, complexGrid, realGrid, FFTW_MEASURE);
hasCreatedPlan = true;
// Initialize the b-spline moduli. // Initialize the b-spline moduli.
...@@ -824,14 +825,6 @@ CpuCalcDispersionPmeReciprocalForceKernel::~CpuCalcDispersionPmeReciprocalForceK ...@@ -824,14 +825,6 @@ CpuCalcDispersionPmeReciprocalForceKernel::~CpuCalcDispersionPmeReciprocalForceK
pthread_mutex_destroy(&lock); pthread_mutex_destroy(&lock);
pthread_cond_destroy(&startCondition); pthread_cond_destroy(&startCondition);
pthread_cond_destroy(&endCondition); pthread_cond_destroy(&endCondition);
for (auto grid : tempGrid)
fftwf_free(grid);
if (complexGrid != NULL)
fftwf_free(complexGrid);
if (hasCreatedPlan) {
fftwf_destroy_plan(forwardFFT);
fftwf_destroy_plan(backwardFFT);
}
} }
void CpuCalcDispersionPmeReciprocalForceKernel::runMainThread() { void CpuCalcDispersionPmeReciprocalForceKernel::runMainThread() {
...@@ -854,7 +847,7 @@ void CpuCalcDispersionPmeReciprocalForceKernel::runMainThread() { ...@@ -854,7 +847,7 @@ void CpuCalcDispersionPmeReciprocalForceKernel::runMainThread() {
threads.waitForThreads(); threads.waitForThreads();
threads.resumeThreads(); // Signal threads to sum the charge grids. threads.resumeThreads(); // Signal threads to sum the charge grids.
threads.waitForThreads(); threads.waitForThreads();
fftwf_execute_dft_r2c(forwardFFT, realGrid, complexGrid); pocketfft::r2c(gridShape, realGridStride, complexGridStride, fftAxes, true, realGrids[0].data(), complexGrid.data(), 1.0f, 0);
if (lastBoxVectors[0] != periodicBoxVectors[0] || lastBoxVectors[1] != periodicBoxVectors[1] || lastBoxVectors[2] != periodicBoxVectors[2]) { if (lastBoxVectors[0] != periodicBoxVectors[0] || lastBoxVectors[1] != periodicBoxVectors[1] || lastBoxVectors[2] != periodicBoxVectors[2]) {
threads.resumeThreads(); // Signal threads to compute the reciprocal scale factors. threads.resumeThreads(); // Signal threads to compute the reciprocal scale factors.
threads.waitForThreads(); threads.waitForThreads();
...@@ -867,7 +860,7 @@ void CpuCalcDispersionPmeReciprocalForceKernel::runMainThread() { ...@@ -867,7 +860,7 @@ void CpuCalcDispersionPmeReciprocalForceKernel::runMainThread() {
} }
threads.resumeThreads(); // Signal threads to perform reciprocal convolution. threads.resumeThreads(); // Signal threads to perform reciprocal convolution.
threads.waitForThreads(); threads.waitForThreads();
fftwf_execute_dft_c2r(backwardFFT, complexGrid, realGrid); pocketfft::c2r(gridShape, complexGridStride, realGridStride, fftAxes, false, complexGrid.data(), realGrids[0].data(), 1.0f, 0);
atomicCounter = 0; atomicCounter = 0;
threads.resumeThreads(); // Signal threads to interpolate forces. threads.resumeThreads(); // Signal threads to interpolate forces.
threads.waitForThreads(); threads.waitForThreads();
...@@ -890,14 +883,14 @@ void CpuCalcDispersionPmeReciprocalForceKernel::runWorkerThread(ThreadPool& thre ...@@ -890,14 +883,14 @@ void CpuCalcDispersionPmeReciprocalForceKernel::runWorkerThread(ThreadPool& thre
int complexStart = std::max(1, ((index*complexSize)/numThreads)); int complexStart = std::max(1, ((index*complexSize)/numThreads));
int complexEnd = (((index+1)*complexSize)/numThreads); int complexEnd = (((index+1)*complexSize)/numThreads);
const float epsilonFactor = 1.0f; const float epsilonFactor = 1.0f;
spreadCharge(posq, tempGrid[index], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor, index, numThreads, deterministic); spreadCharge(posq, realGrids[index], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor, index, numThreads, deterministic);
threads.syncThreads(); threads.syncThreads();
int numGrids = tempGrid.size(); int numGrids = realGrids.size();
for (int i = gridStart; i < gridEnd; i += 4) { for (int i = gridStart; i < gridEnd; i += 4) {
fvec4 sum(&realGrid[i]); fvec4 sum(&realGrids[0][i]);
for (int j = 1; j < numGrids; j++) for (int j = 1; j < numGrids; j++)
sum += fvec4(&tempGrid[j][i]); sum += fvec4(&realGrids[j][i]);
sum.store(&realGrid[i]); sum.store(&realGrids[0][i]);
} }
threads.syncThreads(); threads.syncThreads();
if (lastBoxVectors[0] != periodicBoxVectors[0] || lastBoxVectors[1] != periodicBoxVectors[1] || lastBoxVectors[2] != periodicBoxVectors[2]) { if (lastBoxVectors[0] != periodicBoxVectors[0] || lastBoxVectors[1] != periodicBoxVectors[1] || lastBoxVectors[2] != periodicBoxVectors[2]) {
...@@ -912,7 +905,7 @@ void CpuCalcDispersionPmeReciprocalForceKernel::runWorkerThread(ThreadPool& thre ...@@ -912,7 +905,7 @@ void CpuCalcDispersionPmeReciprocalForceKernel::runWorkerThread(ThreadPool& thre
complexStart = (index*complexSize)/numThreads; complexStart = (index*complexSize)/numThreads;
reciprocalConvolution(complexStart, complexEnd, complexGrid, recipEterm); reciprocalConvolution(complexStart, complexEnd, complexGrid, recipEterm);
threads.syncThreads(); threads.syncThreads();
interpolateForces(posq, &force[0], realGrid, gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor, numThreads); interpolateForces(posq, force, realGrids[0], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor, numThreads);
} }
void CpuCalcDispersionPmeReciprocalForceKernel::beginComputation(CalcPmeReciprocalForceKernel::IO& io, const Vec3* periodicBoxVectors, bool includeEnergy) { void CpuCalcDispersionPmeReciprocalForceKernel::beginComputation(CalcPmeReciprocalForceKernel::IO& io, const Vec3* periodicBoxVectors, bool includeEnergy) {
...@@ -960,24 +953,18 @@ void CpuCalcDispersionPmeReciprocalForceKernel::getPMEParameters(double& alpha, ...@@ -960,24 +953,18 @@ void CpuCalcDispersionPmeReciprocalForceKernel::getPMEParameters(double& alpha,
nz = gridz; nz = gridz;
} }
int CpuCalcDispersionPmeReciprocalForceKernel::findFFTDimension(int minimum, bool isZ) { int CpuCalcDispersionPmeReciprocalForceKernel::findFFTDimension(int minimum) {
if (minimum < 1) if (minimum < 1)
return 1; return 1;
while (true) { while (true) {
// Attempt to factor the current value. // Attempt to factor the current value.
if (isZ && minimum%2 == 1) {
// Force the last dimension to be even, since this produces better performance in FFTW.
minimum++;
continue;
}
int unfactored = minimum; int unfactored = minimum;
for (int factor = 2; factor < 8; factor++) { for (int factor = 2; factor < 9; factor++) {
while (unfactored > 1 && unfactored%factor == 0) while (unfactored > 1 && unfactored%factor == 0)
unfactored /= factor; unfactored /= factor;
} }
if (unfactored == 1 || unfactored == 11 || unfactored == 13) if (unfactored == 1 || unfactored == 11)
return minimum; return minimum;
minimum++; minimum++;
} }
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. * * Portions copyright (c) 2013-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
#include "openmm/Vec3.h" #include "openmm/Vec3.h"
#include "openmm/internal/ThreadPool.h" #include "openmm/internal/ThreadPool.h"
#include <atomic> #include <atomic>
#include <fftw3.h> #include <complex>
#include <pthread.h> #include <pthread.h>
#include <vector> #include <vector>
...@@ -46,13 +46,13 @@ namespace OpenMM { ...@@ -46,13 +46,13 @@ namespace OpenMM {
/** /**
* This is an optimized CPU implementation of CalcPmeReciprocalForceKernel. It is both * This is an optimized CPU implementation of CalcPmeReciprocalForceKernel. It is both
* vectorized (requiring SSE 4.1) and multithreaded. It uses FFTW to perform the FFTs. * vectorized (requiring SSE 4.1) and multithreaded. It uses PocketFFT to perform the FFTs.
*/ */
class OPENMM_EXPORT_PME CpuCalcPmeReciprocalForceKernel : public CalcPmeReciprocalForceKernel { class OPENMM_EXPORT_PME CpuCalcPmeReciprocalForceKernel : public CalcPmeReciprocalForceKernel {
public: public:
CpuCalcPmeReciprocalForceKernel(const std::string& name, const Platform& platform) : CalcPmeReciprocalForceKernel(name, platform), CpuCalcPmeReciprocalForceKernel(const std::string& name, const Platform& platform) : CalcPmeReciprocalForceKernel(name, platform),
hasCreatedPlan(false), isDeleted(false), realGrid(NULL), complexGrid(NULL) { isDeleted(false) {
} }
/** /**
* Initialize the kernel. * Initialize the kernel.
...@@ -104,24 +104,24 @@ public: ...@@ -104,24 +104,24 @@ public:
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const; void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private: private:
/** /**
* Select a size for one grid dimension that FFTW can handle efficiently. * Select a size for one grid dimension that PocketFFT can handle efficiently.
*/ */
int findFFTDimension(int minimum, bool isZ); int findFFTDimension(int minimum);
static bool hasInitializedThreads; static bool hasInitializedThreads;
static int numThreads; static int numThreads;
int gridx, gridy, gridz, numParticles; int gridx, gridy, gridz, numParticles;
double alpha; double alpha;
bool deterministic; bool deterministic;
bool hasCreatedPlan, isFinished, isDeleted; bool isFinished, isDeleted;
std::vector<float> force; std::vector<float> force;
std::vector<float> bsplineModuli[3]; std::vector<float> bsplineModuli[3];
std::vector<float> recipEterm; std::vector<float> recipEterm;
Vec3 lastBoxVectors[3]; Vec3 lastBoxVectors[3];
std::vector<float> threadEnergy; std::vector<float> threadEnergy;
std::vector<float*> tempGrid; std::vector<std::vector<float> > realGrids;
float* realGrid; std::vector<std::complex<float> > complexGrid;
fftwf_complex* complexGrid; std::vector<std::size_t> gridShape, fftAxes;
fftwf_plan forwardFFT, backwardFFT; std::vector<std::ptrdiff_t> realGridStride, complexGridStride;
int waitCount; int waitCount;
pthread_cond_t startCondition, endCondition; pthread_cond_t startCondition, endCondition;
pthread_mutex_t lock; pthread_mutex_t lock;
...@@ -139,13 +139,13 @@ private: ...@@ -139,13 +139,13 @@ private:
/** /**
* This is an optimized CPU implementation of CalcDispersionPmeReciprocalForceKernel. It is both * This is an optimized CPU implementation of CalcDispersionPmeReciprocalForceKernel. It is both
* vectorized (requiring SSE 4.1) and multithreaded. It uses FFTW to perform the FFTs. * vectorized (requiring SSE 4.1) and multithreaded. It uses PocketFFT to perform the FFTs.
*/ */
class OPENMM_EXPORT_PME CpuCalcDispersionPmeReciprocalForceKernel : public CalcDispersionPmeReciprocalForceKernel { class OPENMM_EXPORT_PME CpuCalcDispersionPmeReciprocalForceKernel : public CalcDispersionPmeReciprocalForceKernel {
public: public:
CpuCalcDispersionPmeReciprocalForceKernel(const std::string& name, const Platform& platform) : CalcDispersionPmeReciprocalForceKernel(name, platform), CpuCalcDispersionPmeReciprocalForceKernel(const std::string& name, const Platform& platform) : CalcDispersionPmeReciprocalForceKernel(name, platform),
hasCreatedPlan(false), isDeleted(false), realGrid(NULL), complexGrid(NULL) { isDeleted(false) {
} }
/** /**
* Initialize the kernel. * Initialize the kernel.
...@@ -198,24 +198,24 @@ public: ...@@ -198,24 +198,24 @@ public:
private: private:
class ComputeTask; class ComputeTask;
/** /**
* Select a size for one grid dimension that FFTW can handle efficiently. * Select a size for one grid dimension that PocketFFT can handle efficiently.
*/ */
int findFFTDimension(int minimum, bool isZ); int findFFTDimension(int minimum);
static bool hasInitializedThreads; static bool hasInitializedThreads;
static int numThreads; static int numThreads;
int gridx, gridy, gridz, numParticles; int gridx, gridy, gridz, numParticles;
double alpha; double alpha;
bool deterministic; bool deterministic;
bool hasCreatedPlan, isFinished, isDeleted; bool isFinished, isDeleted;
std::vector<float> force; std::vector<float> force;
std::vector<float> bsplineModuli[3]; std::vector<float> bsplineModuli[3];
std::vector<float> recipEterm; std::vector<float> recipEterm;
Vec3 lastBoxVectors[3]; Vec3 lastBoxVectors[3];
std::vector<float> threadEnergy; std::vector<float> threadEnergy;
std::vector<float*> tempGrid; std::vector<std::vector<float> > realGrids;
float* realGrid; std::vector<std::complex<float> > complexGrid;
fftwf_complex* complexGrid; std::vector<std::size_t> gridShape, fftAxes;
fftwf_plan forwardFFT, backwardFFT; std::vector<std::ptrdiff_t> realGridStride, complexGridStride;
int waitCount; int waitCount;
pthread_cond_t startCondition, endCondition; pthread_cond_t startCondition, endCondition;
pthread_mutex_t lock; pthread_mutex_t lock;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2020 Stanford University and the Authors. * * Portions copyright (c) 2011-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -33,6 +33,11 @@ ...@@ -33,6 +33,11 @@
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
#include "openmm/internal/ContextImpl.h" #include "openmm/internal/ContextImpl.h"
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#ifdef _MSC_VER
#define POCKETFFT_NO_VECTORS
#endif
#include "pocketfft_hdronly.h"
#include <complex>
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -52,14 +57,6 @@ static vector<Vec3>& extractForces(ContextImpl& context) { ...@@ -52,14 +57,6 @@ static vector<Vec3>& extractForces(ContextImpl& context) {
return *data->forces; return *data->forces;
} }
ReferenceIntegrateRPMDStepKernel::~ReferenceIntegrateRPMDStepKernel() {
if (fft != NULL)
fftpack_destroy(fft);
for (auto& c : contractionFFT)
if (c.second != NULL)
fftpack_destroy(c.second);
}
void ReferenceIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) { void ReferenceIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
int numCopies = integrator.getNumCopies(); int numCopies = integrator.getNumCopies();
int numParticles = system.getNumParticles(); int numParticles = system.getNumParticles();
...@@ -71,7 +68,6 @@ void ReferenceIntegrateRPMDStepKernel::initialize(const System& system, const RP ...@@ -71,7 +68,6 @@ void ReferenceIntegrateRPMDStepKernel::initialize(const System& system, const RP
velocities[i].resize(numParticles); velocities[i].resize(numParticles);
forces[i].resize(numParticles); forces[i].resize(numParticles);
} }
fftpack_init_1d(&fft, numCopies);
SimTKOpenMMUtilities::setRandomNumberSeed((unsigned int) integrator.getRandomNumberSeed()); SimTKOpenMMUtilities::setRandomNumberSeed((unsigned int) integrator.getRandomNumberSeed());
// Build a list of contractions. // Build a list of contractions.
...@@ -89,8 +85,6 @@ void ReferenceIntegrateRPMDStepKernel::initialize(const System& system, const RP ...@@ -89,8 +85,6 @@ void ReferenceIntegrateRPMDStepKernel::initialize(const System& system, const RP
if (copies != numCopies) { if (copies != numCopies) {
if (groupsByCopies.find(copies) == groupsByCopies.end()) { if (groupsByCopies.find(copies) == groupsByCopies.end()) {
groupsByCopies[copies] = 1<<group; groupsByCopies[copies] = 1<<group;
contractionFFT[copies] = NULL;
fftpack_init_1d(&contractionFFT[copies], copies);
if (copies > maxContractedCopies) if (copies > maxContractedCopies)
maxContractedCopies = copies; maxContractedCopies = copies;
} }
...@@ -128,8 +122,8 @@ void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDI ...@@ -128,8 +122,8 @@ void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDI
// Apply the PILE-L thermostat. // Apply the PILE-L thermostat.
vector<t_complex> v(numCopies); vector<complex<double>> v(numCopies);
vector<t_complex> q(numCopies); vector<complex<double>> q(numCopies);
const double hbar = 1.054571628e-34*AVOGADRO/(1000*1e-12); const double hbar = 1.054571628e-34*AVOGADRO/(1000*1e-12);
const double scale = 1.0/sqrt((double) numCopies); const double scale = 1.0/sqrt((double) numCopies);
const double nkT = numCopies*BOLTZ*integrator.getTemperature(); const double nkT = numCopies*BOLTZ*integrator.getTemperature();
...@@ -143,12 +137,12 @@ void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDI ...@@ -143,12 +137,12 @@ void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDI
const double c3_0 = c2_0*sqrt(nkT/system.getParticleMass(particle)); const double c3_0 = c2_0*sqrt(nkT/system.getParticleMass(particle));
for (int component = 0; component < 3; component++) { for (int component = 0; component < 3; component++) {
for (int k = 0; k < numCopies; k++) for (int k = 0; k < numCopies; k++)
v[k] = t_complex(scale*velocities[k][particle][component], 0.0); v[k] = complex<double>(scale*velocities[k][particle][component], 0.0);
fftpack_exec_1d(fft, FFTPACK_FORWARD, &v[0], &v[0]); pocketfft::c2c({(size_t) numCopies}, {sizeof(complex<double>)}, {sizeof(complex<double>)}, {0}, true, v.data(), v.data(), 1.0, 0);
// Apply a local Langevin thermostat to the centroid mode. // Apply a local Langevin thermostat to the centroid mode.
v[0].re = v[0].re*c1_0 + c3_0*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber(); v[0].real(v[0].real()*c1_0 + c3_0*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber());
// Use critical damping white noise for the remaining modes. // Use critical damping white noise for the remaining modes.
...@@ -160,13 +154,13 @@ void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDI ...@@ -160,13 +154,13 @@ void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDI
const double c3 = c2*sqrt(nkT/system.getParticleMass(particle)); const double c3 = c2*sqrt(nkT/system.getParticleMass(particle));
double rand1 = c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber(); double rand1 = c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber();
double rand2 = (isCenter ? 0.0 : c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber()); double rand2 = (isCenter ? 0.0 : c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber());
v[k] = v[k]*c1 + t_complex(rand1, rand2); v[k] = v[k]*c1 + complex<double>(rand1, rand2);
if (k < numCopies-k) if (k < numCopies-k)
v[numCopies-k] = v[numCopies-k]*c1 + t_complex(rand1, -rand2); v[numCopies-k] = v[numCopies-k]*c1 + complex<double>(rand1, -rand2);
} }
fftpack_exec_1d(fft, FFTPACK_BACKWARD, &v[0], &v[0]); pocketfft::c2c({(size_t) numCopies}, {sizeof(complex<double>)}, {sizeof(complex<double>)}, {0}, false, v.data(), v.data(), 1.0, 0);
for (int k = 0; k < numCopies; k++) for (int k = 0; k < numCopies; k++)
velocities[k][particle][component] = scale*v[k].re; velocities[k][particle][component] = scale*v[k].real();
} }
} }
} }
...@@ -185,11 +179,11 @@ void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDI ...@@ -185,11 +179,11 @@ void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDI
continue; continue;
for (int component = 0; component < 3; component++) { for (int component = 0; component < 3; component++) {
for (int k = 0; k < numCopies; k++) { for (int k = 0; k < numCopies; k++) {
q[k] = t_complex(scale*positions[k][particle][component], 0.0); q[k] = complex<double>(scale*positions[k][particle][component], 0.0);
v[k] = t_complex(scale*velocities[k][particle][component], 0.0); v[k] = complex<double>(scale*velocities[k][particle][component], 0.0);
} }
fftpack_exec_1d(fft, FFTPACK_FORWARD, &q[0], &q[0]); pocketfft::c2c({(size_t) numCopies}, {sizeof(complex<double>)}, {sizeof(complex<double>)}, {0}, true, q.data(), q.data(), 1.0, 0);
fftpack_exec_1d(fft, FFTPACK_FORWARD, &v[0], &v[0]); pocketfft::c2c({(size_t) numCopies}, {sizeof(complex<double>)}, {sizeof(complex<double>)}, {0}, true, v.data(), v.data(), 1.0, 0);
q[0] += v[0]*dt; q[0] += v[0]*dt;
for (int k = 1; k < numCopies; k++) { for (int k = 1; k < numCopies; k++) {
const double wk = twown*sin(k*M_PI/numCopies); const double wk = twown*sin(k*M_PI/numCopies);
...@@ -197,15 +191,15 @@ void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDI ...@@ -197,15 +191,15 @@ void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDI
const double coswt = cos(wt); const double coswt = cos(wt);
const double sinwt = sin(wt); const double sinwt = sin(wt);
const double wm = wk*system.getParticleMass(particle); const double wm = wk*system.getParticleMass(particle);
const t_complex vprime = v[k]*coswt - q[k]*(wk*sinwt); // Advance velocity from t to t+dt const complex<double> vprime = v[k]*coswt - q[k]*(wk*sinwt); // Advance velocity from t to t+dt
q[k] = v[k]*(sinwt/wk) + q[k]*coswt; // Advance position from t to t+dt q[k] = v[k]*(sinwt/wk) + q[k]*coswt; // Advance position from t to t+dt
v[k] = vprime; v[k] = vprime;
} }
fftpack_exec_1d(fft, FFTPACK_BACKWARD, &q[0], &q[0]); pocketfft::c2c({(size_t) numCopies}, {sizeof(complex<double>)}, {sizeof(complex<double>)}, {0}, false, q.data(), q.data(), 1.0, 0);
fftpack_exec_1d(fft, FFTPACK_BACKWARD, &v[0], &v[0]); pocketfft::c2c({(size_t) numCopies}, {sizeof(complex<double>)}, {sizeof(complex<double>)}, {0}, false, v.data(), v.data(), 1.0, 0);
for (int k = 0; k < numCopies; k++) { for (int k = 0; k < numCopies; k++) {
positions[k][particle][component] = scale*q[k].re; positions[k][particle][component] = scale*q[k].real();
velocities[k][particle][component] = scale*v[k].re; velocities[k][particle][component] = scale*v[k].real();
} }
} }
} }
...@@ -230,12 +224,12 @@ void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDI ...@@ -230,12 +224,12 @@ void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDI
const double c3_0 = c2_0*sqrt(nkT/system.getParticleMass(particle)); const double c3_0 = c2_0*sqrt(nkT/system.getParticleMass(particle));
for (int component = 0; component < 3; component++) { for (int component = 0; component < 3; component++) {
for (int k = 0; k < numCopies; k++) for (int k = 0; k < numCopies; k++)
v[k] = t_complex(scale*velocities[k][particle][component], 0.0); v[k] = complex<double>(scale*velocities[k][particle][component], 0.0);
fftpack_exec_1d(fft, FFTPACK_FORWARD, &v[0], &v[0]); pocketfft::c2c({(size_t) numCopies}, {sizeof(complex<double>)}, {sizeof(complex<double>)}, {0}, true, v.data(), v.data(), 1.0, 0);
// Apply a local Langevin thermostat to the centroid mode. // Apply a local Langevin thermostat to the centroid mode.
v[0].re = v[0].re*c1_0 + c3_0*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber(); v[0].real(v[0].real()*c1_0 + c3_0*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber());
// Use critical damping white noise for the remaining modes. // Use critical damping white noise for the remaining modes.
...@@ -247,13 +241,13 @@ void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDI ...@@ -247,13 +241,13 @@ void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDI
const double c3 = c2*sqrt(nkT/system.getParticleMass(particle)); const double c3 = c2*sqrt(nkT/system.getParticleMass(particle));
double rand1 = c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber(); double rand1 = c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber();
double rand2 = (isCenter ? 0.0 : c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber()); double rand2 = (isCenter ? 0.0 : c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber());
v[k] = v[k]*c1 + t_complex(rand1, rand2); v[k] = v[k]*c1 + complex<double>(rand1, rand2);
if (k < numCopies-k) if (k < numCopies-k)
v[numCopies-k] = v[numCopies-k]*c1 + t_complex(rand1, -rand2); v[numCopies-k] = v[numCopies-k]*c1 + complex<double>(rand1, -rand2);
} }
fftpack_exec_1d(fft, FFTPACK_BACKWARD, &v[0], &v[0]); pocketfft::c2c({(size_t) numCopies}, {sizeof(complex<double>)}, {sizeof(complex<double>)}, {0}, false, v.data(), v.data(), 1.0, 0);
for (int k = 0; k < numCopies; k++) for (int k = 0; k < numCopies; k++)
velocities[k][particle][component] = scale*v[k].re; velocities[k][particle][component] = scale*v[k].real();
} }
} }
} }
...@@ -294,28 +288,27 @@ void ReferenceIntegrateRPMDStepKernel::computeForces(ContextImpl& context, const ...@@ -294,28 +288,27 @@ void ReferenceIntegrateRPMDStepKernel::computeForces(ContextImpl& context, const
for (auto& g : groupsByCopies) { for (auto& g : groupsByCopies) {
int copies = g.first; int copies = g.first;
int groupFlags = g.second; int groupFlags = g.second;
fftpack* shortFFT = contractionFFT[copies];
// Find the contracted positions. // Find the contracted positions.
vector<t_complex> q(totalCopies); vector<complex<double>> q(totalCopies);
const double scale1 = 1.0/totalCopies; const double scale1 = 1.0/totalCopies;
for (int particle = 0; particle < numParticles; particle++) { for (int particle = 0; particle < numParticles; particle++) {
for (int component = 0; component < 3; component++) { for (int component = 0; component < 3; component++) {
// Transform to the frequency domain, set high frequency components to zero, and transform back. // Transform to the frequency domain, set high frequency components to zero, and transform back.
for (int k = 0; k < totalCopies; k++) for (int k = 0; k < totalCopies; k++)
q[k] = t_complex(positions[k][particle][component], 0.0); q[k] = complex<double>(positions[k][particle][component], 0.0);
fftpack_exec_1d(fft, FFTPACK_FORWARD, &q[0], &q[0]); pocketfft::c2c({(size_t) totalCopies}, {sizeof(complex<double>)}, {sizeof(complex<double>)}, {0}, true, q.data(), q.data(), 1.0, 0);
if (copies > 1) { if (copies > 1) {
int start = (copies+1)/2; int start = (copies+1)/2;
int end = totalCopies-copies+start; int end = totalCopies-copies+start;
for (int k = end; k < totalCopies; k++) for (int k = end; k < totalCopies; k++)
q[k-(totalCopies-copies)] = q[k]; q[k-(totalCopies-copies)] = q[k];
fftpack_exec_1d(shortFFT, FFTPACK_BACKWARD, &q[0], &q[0]); pocketfft::c2c({(size_t) copies}, {sizeof(complex<double>)}, {sizeof(complex<double>)}, {0}, false, q.data(), q.data(), 1.0, 0);
} }
for (int k = 0; k < copies; k++) for (int k = 0; k < copies; k++)
contractedPositions[k][particle][component] = scale1*q[k].re; contractedPositions[k][particle][component] = scale1*q[k].real();
} }
} }
...@@ -336,18 +329,18 @@ void ReferenceIntegrateRPMDStepKernel::computeForces(ContextImpl& context, const ...@@ -336,18 +329,18 @@ void ReferenceIntegrateRPMDStepKernel::computeForces(ContextImpl& context, const
// Transform to the frequency domain, pad with zeros, and transform back. // Transform to the frequency domain, pad with zeros, and transform back.
for (int k = 0; k < copies; k++) for (int k = 0; k < copies; k++)
q[k] = t_complex(contractedForces[k][particle][component], 0.0); q[k] = complex<double>(contractedForces[k][particle][component], 0.0);
if (copies > 1) if (copies > 1)
fftpack_exec_1d(shortFFT, FFTPACK_FORWARD, &q[0], &q[0]); pocketfft::c2c({(size_t) copies}, {sizeof(complex<double>)}, {sizeof(complex<double>)}, {0}, true, q.data(), q.data(), 1.0, 0);
int start = (copies+1)/2; int start = (copies+1)/2;
int end = totalCopies-copies+start; int end = totalCopies-copies+start;
for (int k = end; k < totalCopies; k++) for (int k = end; k < totalCopies; k++)
q[k] = q[k-(totalCopies-copies)]; q[k] = q[k-(totalCopies-copies)];
for (int k = start; k < end; k++) for (int k = start; k < end; k++)
q[k] = t_complex(0, 0); q[k] = complex<double>(0, 0);
fftpack_exec_1d(fft, FFTPACK_BACKWARD, &q[0], &q[0]); pocketfft::c2c({(size_t) totalCopies}, {sizeof(complex<double>)}, {sizeof(complex<double>)}, {0}, false, q.data(), q.data(), 1.0, 0);
for (int k = 0; k < totalCopies; k++) for (int k = 0; k < totalCopies; k++)
forces[k][particle][component] += scale2*q[k].re; forces[k][particle][component] += scale2*q[k].real();
} }
} }
} }
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. * * Portions copyright (c) 2011-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -35,7 +35,6 @@ ...@@ -35,7 +35,6 @@
#include "ReferencePlatform.h" #include "ReferencePlatform.h"
#include "openmm/RpmdKernels.h" #include "openmm/RpmdKernels.h"
#include "openmm/Vec3.h" #include "openmm/Vec3.h"
#include "fftpack.h"
namespace OpenMM { namespace OpenMM {
...@@ -46,9 +45,8 @@ namespace OpenMM { ...@@ -46,9 +45,8 @@ namespace OpenMM {
class ReferenceIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel { class ReferenceIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel {
public: public:
ReferenceIntegrateRPMDStepKernel(const std::string& name, const Platform& platform) : ReferenceIntegrateRPMDStepKernel(const std::string& name, const Platform& platform) :
IntegrateRPMDStepKernel(name, platform), fft(NULL) { IntegrateRPMDStepKernel(name, platform) {
} }
~ReferenceIntegrateRPMDStepKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -93,8 +91,6 @@ private: ...@@ -93,8 +91,6 @@ private:
std::vector<std::vector<Vec3> > contractedForces; std::vector<std::vector<Vec3> > contractedForces;
std::map<int, int> groupsByCopies; std::map<int, int> groupsByCopies;
int groupsNotContracted; int groupsNotContracted;
fftpack* fft;
std::map<int, fftpack*> contractionFFT;
}; };
} // namespace OpenMM } // namespace OpenMM
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment