Use PocketFFT (#3667)

* Use PocketFFT instead of FFTW * Minor cleanup * Use PocketFFT instead of fftpack for reference platform * Remove FFTW as a dependency * Converted a test case to use PocketFFT * Fixed an incorrect comment

Use PocketFFT (#3667)
* Use PocketFFT instead of FFTW * Minor cleanup * Use PocketFFT instead of fftpack for reference platform * Remove FFTW as a dependency * Converted a test case to use PocketFFT * Fixed an incorrect comment
1dac981a · Peter Eastman · GitHub · 583471a6 · 1dac981a · 1dac981a
Unverified Commit 1dac981a authored Jun 30, 2022 by Peter Eastman Committed by GitHub Jun 30, 2022
20 changed files
--- a/.azure-pipelines/azure-pipelines-windows.yml
+++ b/.azure-pipelines/azure-pipelines-windows.yml
@@ -42,7 +42,6 @@ jobs:
      - script: |
          conda install cmake ^
                        cython ^
-                        fftw ^
                        ninja ^
                        numpy ^
                        swig ^

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -88,7 +88,7 @@ ENDIF(${CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT})

 # The source is organized into subdirectories, but we handle them all from
 # this CMakeLists file rather than letting CMake visit them as SUBDIRS.
-SET(OPENMM_SOURCE_SUBDIRS . openmmapi olla libraries/jama libraries/quern libraries/lepton libraries/sfmt libraries/lbfgs libraries/hilbert libraries/csha1 platforms/reference serialization libraries/irrxml)
+SET(OPENMM_SOURCE_SUBDIRS . openmmapi olla libraries/jama libraries/quern libraries/lepton libraries/sfmt libraries/lbfgs libraries/hilbert libraries/csha1 libraries/pocketfft platforms/reference serialization libraries/irrxml)
 IF(X86 OR ARM)
    SET(OPENMM_SOURCE_SUBDIRS ${OPENMM_SOURCE_SUBDIRS} libraries/vecmath)
 ENDIF()
@@ -392,12 +392,7 @@ ENDIF(OPENMM_BUILD_DRUDE_PLUGIN)

 # CPU PME plugin

-FIND_PACKAGE(FFTW QUIET)
-IF(FFTW_FOUND)
-    SET(OPENMM_BUILD_PME_PLUGIN ON CACHE BOOL "Build CPU PME plugin")
-ELSE(FFTW_FOUND)
-    SET(OPENMM_BUILD_PME_PLUGIN OFF CACHE BOOL "Build CPU PME plugin")
-ENDIF(FFTW_FOUND)
+SET(OPENMM_BUILD_PME_PLUGIN ON CACHE BOOL "Build CPU PME plugin")
 SET(OPENMM_BUILD_PME_PATH)
 IF(OPENMM_BUILD_PME_PLUGIN)
   SET(OPENMM_BUILD_PME_PATH ${CMAKE_CURRENT_SOURCE_DIR}/plugins/cpupme)

--- a/appveyor.yml
+++ b/appveyor.yml
@@ -19,14 +19,6 @@ install:
  - "set PATH=%APPVEYOR_BUILD_FOLDER%\\cclash-0.3.14;%PATH%"
  - "set CCLASH_DIR=C:\\ProgramData\\cclash"

-# Download FFTW3 for PME plugin
-  - ps: Invoke-WebRequest ftp://ftp.fftw.org/pub/fftw/fftw-3.3.4-dll64.zip -OutFile fftw-3.3.4-dll64.zip
-  - 7z x fftw-3.3.4-dll64.zip -oC:\fftw > null
-  - cd C:\fftw
-  - lib /def:libfftw3f-3.def
-  - cd %APPVEYOR_BUILD_FOLDER%
-  - "set PATH=C:\\fftw;%PATH%"
-
 # Download and install some OpenMM build dependencies (doxygen, swig)
  - choco install -y doxygen.install swig > null

@@ -53,8 +45,6 @@ build_script:
      "-DOPENMM_BUILD_PME_PLUGIN=ON
       -DOPENCL_INCLUDE_DIR=C:/opencl/inc
       -DOPENCL_LIBRARY=C:/opencl/lib/OpenCL.lib
-       -DFFTW_LIBRARY=C:/fftw/libfftw3f-3.lib
-       -DFFTW_INCLUDES=C:/fftw
       -DOPENMM_BUILD_EXAMPLES=OFF
       -DOPENMM_BUILD_OPENCL_TESTS=OFF
       -DCMAKE_BUILD_TYPE=Release

--- a/cmake_modules/FindFFTW.cmake
+++ b/cmake_modules/FindFFTW.cmake
-# - Find FFTW
-# Find the native FFTW includes and library
-#
-#  FFTW_INCLUDES        - where to find fftw3.h
-#  FFTW_LIBRARY         - the main FFTW library.
-#  FFTW_THREADS_LIBRARY - the FFTW multithreading support library.
-#  FFTW_FOUND           - True if FFTW found.
-
-if (FFTW_INCLUDES)
-  # Already in cache, be silent
-  set (FFTW_FIND_QUIETLY TRUE)
-endif (FFTW_INCLUDES)
-
-find_path (FFTW_INCLUDES fftw3.h)
-
-find_library (FFTW_LIBRARY NAMES fftw3f)
-find_library (FFTW_THREADS_LIBRARY NAMES fftw3f_threads)
-
-# handle the QUIETLY and REQUIRED arguments and set FFTW_FOUND to TRUE if
-# all listed variables are TRUE
-include (FindPackageHandleStandardArgs)
-find_package_handle_standard_args (FFTW DEFAULT_MSG FFTW_LIBRARY FFTW_INCLUDES)
-
-mark_as_advanced (FFTW_LIBRARY FFTW_THREADS_LIBRARY FFTW_INCLUDES)
--- a/devtools/ci/gh-actions/conda-envs/build-M1-arm64.yml
+++ b/devtools/ci/gh-actions/conda-envs/build-M1-arm64.yml
@@ -10,7 +10,6 @@ dependencies:
 - python
 - cython
 - swig
- fftw
 - numpy
 - doxygen 1.9.1
 # test

--- a/devtools/ci/gh-actions/conda-envs/build-macos-latest.yml
+++ b/devtools/ci/gh-actions/conda-envs/build-macos-latest.yml
@@ -10,7 +10,6 @@ dependencies:
 - python
 - cython
 - swig
- fftw
 - numpy
 - doxygen 1.8.14
 # test

--- a/devtools/ci/gh-actions/conda-envs/build-ubuntu-latest.yml
+++ b/devtools/ci/gh-actions/conda-envs/build-ubuntu-latest.yml
@@ -12,7 +12,6 @@ dependencies:
 - python
 - cython
 - swig
- fftw
 - numpy
 - ocl-icd-system
 - doxygen 1.8.14

--- a/devtools/ci/gh-actions/conda-envs/build-windows-latest.yml
+++ b/devtools/ci/gh-actions/conda-envs/build-windows-latest.yml
@@ -12,7 +12,6 @@ dependencies:
 - python
 - cython
 - swig
- fftw
 - numpy
 - doxygen 1.8.14
 - khronos-opencl-icd-loader

--- a/devtools/packaging/scripts/linux/build.sh
+++ b/devtools/packaging/scripts/linux/build.sh
@@ -71,8 +71,6 @@ CMAKE_FLAGS+=" -DCMAKE_C_COMPILER=$CLANG_PREFIX/bin/clang -DCMAKE_CXX_COMPILER=$
 # Ensure we build a release
 CMAKE_FLAGS+=" -DCMAKE_BUILD_TYPE=Release"

-# setting the rpath so that libOpenMMPME.so finds the right libfftw3
-#CMAKE_FLAGS+=" -DCMAKE_INSTALL_RPATH=.."
 # Use NVIDIA CUDA 8.0
 CMAKE_FLAGS+=" -DCUDA_CUDART_LIBRARY=/usr/local/cuda-8.0/lib64/libcudart.so"
 CMAKE_FLAGS+=" -DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-8.0/bin/nvcc"
@@ -84,11 +82,6 @@ CMAKE_FLAGS+=" -DOPENCL_INCLUDE_DIR=/opt/AMDAPPSDK-3.0/include/"
 CMAKE_FLAGS+=" -DOPENCL_LIBRARY=/opt/AMDAPPSDK-3.0/lib/x86_64/libOpenCL.so"
 # Generate API docs
 CMAKE_FLAGS+=" -DOPENMM_GENERATE_API_DOCS=ON"
-# Set location for FFTW3
-PREFIX="$WORKSPACE/miniconda"
-CMAKE_FLAGS+=" -DFFTW_INCLUDES=$PREFIX/include"
-CMAKE_FLAGS+=" -DFFTW_LIBRARY=$PREFIX/lib/libfftw3f.so"
-CMAKE_FLAGS+=" -DFFTW_THREADS_LIBRARY=$PREFIX/lib/libfftw3f_threads.so"
 # Necessary to find GL headers
 CMAKE_FLAGS+=" -DCMAKE_CXX_FLAGS_RELEASE=-I/usr/include/nvidia/"


--- a/devtools/packaging/scripts/linux/prepare.sh
+++ b/devtools/packaging/scripts/linux/prepare.sh
@@ -29,4 +29,4 @@ yum install -y libgcrypt

 # Ensure configuration is up to date.
 conda config --add channels omnia
-conda install --yes --quiet swig fftw3f pip doxygen sphinx sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen lxml cmake
+conda install --yes --quiet swig pip doxygen sphinx sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen lxml cmake
--- a/devtools/packaging/scripts/osx/build.sh
+++ b/devtools/packaging/scripts/osx/build.sh
@@ -17,20 +17,11 @@ fi

 CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=$INSTALL"

-# setting the rpath so that libOpenMMPME.so finds the right libfftw3
-#CMAKE_FLAGS+=" -DCMAKE_INSTALL_RPATH=.."
 CMAKE_FLAGS+=" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++"
 CMAKE_FLAGS+=" -DCMAKE_OSX_DEPLOYMENT_TARGET=10.9"
 CMAKE_FLAGS+=" -DCMAKE_OSX_SYSROOT=/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.11.sdk"
 CMAKE_FLAGS+=" -DOPENMM_GENERATE_API_DOCS=ON"

-# Build in subdirectory.
-# Set location for FFTW3
-PREFIX="$WORKSPACE/miniconda"
-CMAKE_FLAGS+=" -DFFTW_INCLUDES=$PREFIX/include"
-CMAKE_FLAGS+=" -DFFTW_LIBRARY=$PREFIX/lib/libfftw3f.dylib"
-CMAKE_FLAGS+=" -DFFTW_THREADS_LIBRARY=$PREFIX/lib/libfftw3f_threads.dylib"
-
 # Build in subdirectory.
 if [ -e build ]; then
    rm -rf build

--- a/devtools/packaging/scripts/osx/prepare.sh
+++ b/devtools/packaging/scripts/osx/prepare.sh
@@ -26,5 +26,5 @@ export PATH=$WORKSPACE/miniconda/bin:$PATH

 # Ensure configuration is up to date.
 conda config --add channels http://conda.binstar.org/omnia
-conda install --yes --quiet swig fftw3f pip doxygen sphinx sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen lxml cmake
+conda install --yes --quiet swig pip doxygen sphinx sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen lxml cmake
 pip install sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen
--- a/devtools/packaging/scripts/source/build.sh
+++ b/devtools/packaging/scripts/source/build.sh
@@ -71,8 +71,6 @@ CMAKE_FLAGS+=" -DCMAKE_C_COMPILER=$CLANG_PREFIX/bin/clang -DCMAKE_CXX_COMPILER=$
 # Ensure we build a release
 CMAKE_FLAGS+=" -DCMAKE_BUILD_TYPE=Release"

-# setting the rpath so that libOpenMMPME.so finds the right libfftw3
-#CMAKE_FLAGS+=" -DCMAKE_INSTALL_RPATH=.."
 # Use NVIDIA CUDA 8.0
 CMAKE_FLAGS+=" -DCUDA_CUDART_LIBRARY=/usr/local/cuda-8.0/lib64/libcudart.so"
 CMAKE_FLAGS+=" -DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-8.0/bin/nvcc"
@@ -84,11 +82,6 @@ CMAKE_FLAGS+=" -DOPENCL_INCLUDE_DIR=/opt/AMDAPPSDK-3.0/include/"
 CMAKE_FLAGS+=" -DOPENCL_LIBRARY=/opt/AMDAPPSDK-3.0/lib/x86_64/libOpenCL.so"
 # Generate API docs
 CMAKE_FLAGS+=" -DOPENMM_GENERATE_API_DOCS=ON"
-# Set location for FFTW3
-PREFIX="$WORKSPACE/miniconda"
-CMAKE_FLAGS+=" -DFFTW_INCLUDES=$PREFIX/include"
-CMAKE_FLAGS+=" -DFFTW_LIBRARY=$PREFIX/lib/libfftw3f.so"
-CMAKE_FLAGS+=" -DFFTW_THREADS_LIBRARY=$PREFIX/lib/libfftw3f_threads.so"
 # Necessary to find GL headers
 CMAKE_FLAGS+=" -DCMAKE_CXX_FLAGS_RELEASE=-I/usr/include/nvidia/"


--- a/devtools/packaging/scripts/source/prepare.sh
+++ b/devtools/packaging/scripts/source/prepare.sh
@@ -29,4 +29,4 @@ yum install -y libgcrypt

 # Ensure configuration is up to date.
 conda config --add channels omnia
-conda install --yes --quiet swig fftw3f pip doxygen sphinx sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen lxml cmake
+conda install --yes --quiet swig pip doxygen sphinx sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen lxml cmake
--- a/devtools/packaging/scripts/windows/build.bat
+++ b/devtools/packaging/scripts/windows/build.bat
 mkdir build
 cd build

-set FFTW=C:\Miniconda3\pkgs\fftw3f-3.3.4-vc14_2\Library
 set APPSDK=C:\Program Files (x86)\AMD APP SDK\2.9-1
 "C:\Program Files\CMake\bin\cmake.exe" .. -G "NMake Makefiles JOM" -DCMAKE_BUILD_TYPE=Release -DOPENMM_GENERATE_API_DOCS=ON ^
-    -DOPENCL_INCLUDE_DIR="%APPSDK%\include" -DOPENCL_LIBRARY="%APPSDK%\lib\x86_64\OpenCL.lib" ^
-    -DFFTW_INCLUDES="%FFTW%/include" -DFFTW_LIBRARY="%FFTW%/lib/libfftw3f-3.lib"
+    -DOPENCL_INCLUDE_DIR="%APPSDK%\include" -DOPENCL_LIBRARY="%APPSDK%\lib\x86_64\OpenCL.lib"

 jom
 jom PythonInstall

--- a/devtools/packaging/scripts/windows/prepare.ps1
+++ b/devtools/packaging/scripts/windows/prepare.ps1
@@ -19,7 +19,7 @@ wget https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -U
 # Install software with conda.

 & "C:\Miniconda3\Scripts\conda.exe" config --add channels omnia --add channels conda-forge
-& "C:\Miniconda3\Scripts\conda.exe" install -y fftw3f==3.3.4=vc14_2 jinja2 lxml sphinx sphinxcontrib-autodoc_doxygen sphinxcontrib-lunrsearch conda-build anaconda-client
+& "C:\Miniconda3\Scripts\conda.exe" install -y jinja2 lxml sphinx sphinxcontrib-autodoc_doxygen sphinxcontrib-lunrsearch conda-build anaconda-client
 & "C:\Miniconda3\Scripts\pip.exe" install sphinxcontrib.bibtex

 # Install software with choco.

--- a/docs-source/usersguide/library/02_compiling.rst
+++ b/docs-source/usersguide/library/02_compiling.rst
@@ -78,7 +78,7 @@ them is with conda.  The following command will install everything needed to
 build OpenMM.
 ::

-    conda install -c conda-forge cmake make cython swig fftw doxygen numpy
+    conda install -c conda-forge cmake make cython swig doxygen numpy

 Step 1: Configure with CMake
 ============================
@@ -213,7 +213,7 @@ Before building OpenMM from source, you will need certain tools.
 C++ compiler
 ------------

-On Windows systems, use the C++ compiler in Visual Studio 2015 or later.  You
+On Windows systems, use the C++ compiler in Visual Studio 2017 or later.  You
 can download a free version of Visual Studio from https://visualstudio.microsoft.com.

 Python
@@ -267,7 +267,7 @@ It will open a command window that is preconfigured for conda.  Enter the
 following command to install everything needed to build OpenMM.
 ::

-    conda install -c conda-forge cython swig fftw doxygen numpy
+    conda install -c conda-forge cython swig doxygen numpy

 Step 1: Configure with CMake
 ============================

--- a/docs-source/usersguide/library/04_platform_specifics.rst
+++ b/docs-source/usersguide/library/04_platform_specifics.rst
@@ -35,9 +35,7 @@ The OpenCL Platform recognizes the following Platform-specific properties:
  is the most accurate option, but is usually much slower than the others.
 * UseCpuPme: This selects whether to use the CPU-based PME
  implementation.  The allowed values are “true” or “false”.  Depending on your
-  hardware, this might (or might not) improve performance.  To use this option,
-  you must have FFTW (single precision, multithreaded) installed, and your CPU
-  must support SSE 4.1.
+  hardware, this might (or might not) improve performance.
 * OpenCLPlatformIndex: When multiple OpenCL implementations are installed on
  your computer, this is used to select which one to use.  The value is the
  zero-based index of the platform (in the OpenCL sense, not the OpenMM sense) to use,
@@ -74,9 +72,7 @@ The CUDA Platform recognizes the following Platform-specific properties:
  is the most accurate option, but is usually much slower than the others.
 * UseCpuPme: This selects whether to use the CPU-based PME implementation.
  The allowed values are “true” or “false”.  Depending on your hardware, this
-  might (or might not) improve performance.  To use this option, you must have
-  FFTW (single precision, multithreaded) installed, and your CPU must support SSE
-  4.1.
+  might (or might not) improve performance.
 * CudaCompiler: This specifies the path to the CUDA kernel compiler.  Versions
  of CUDA before 7.0 require a separate compiler executable.  If you do
  not specify this, OpenMM will try to locate the compiler itself.  Specify this

--- a/libraries/pocketfft/include/pocketfft_hdronly.h
+++ b/libraries/pocketfft/include/pocketfft_hdronly.h
--- a/platforms/cuda/tests/TestCudaFFT3D.cpp
+++ b/platforms/cuda/tests/TestCudaFFT3D.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011-2016 Stanford University and the Authors.      *
+ * Portions copyright (c) 2011-2022 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -30,7 +30,7 @@
 * -------------------------------------------------------------------------- */

 /**
- * This tests the CUDA implementation of sorting.
+ * This tests the CUDA implementation of FFT.
 */

 #include "openmm/internal/AssertionUtilities.h"
@@ -38,12 +38,16 @@
 #include "CudaContext.h"
 #include "CudaFFT3D.h"
 #include "CudaSort.h"
-#include "fftpack.h"
 #include "sfmt/SFMT.h"
 #include "openmm/System.h"
+#include <complex>
 #include <iostream>
 #include <cmath>
 #include <set>
+#ifdef _MSC_VER
+  #define POCKETFFT_NO_VECTORS
+#endif
+#include "pocketfft_hdronly.h"

 using namespace OpenMM;
 using namespace std;
@@ -63,19 +67,19 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
    OpenMM_SFMT::SFMT sfmt;
    init_gen_rand(0, sfmt);
    vector<Real2> original(xsize*ysize*zsize);
-    vector<t_complex> reference(original.size());
+    vector<complex<double>> reference(original.size());
    for (int i = 0; i < (int) original.size(); i++) {
        Real2 value;
        value.x = (float) genrand_real2(sfmt);
        value.y = (float) genrand_real2(sfmt);
        original[i] = value;
-        reference[i] = t_complex(value.x, value.y);
+        reference[i] = complex<double>(value.x, value.y);
    }
    for (int i = 0; i < (int) reference.size(); i++) {
        if (realToComplex)
-            reference[i] = t_complex(i%2 == 0 ? original[i/2].x : original[i/2].y, 0);
+            reference[i] = complex<double>(i%2 == 0 ? original[i/2].x : original[i/2].y, 0);
        else
-            reference[i] = t_complex(original[i].x, original[i].y);
+            reference[i] = complex<double>(original[i].x, original[i].y);
    }
    CudaArray grid1(context, original.size(), sizeof(Real2), "grid1");
    CudaArray grid2(context, original.size(), sizeof(Real2), "grid2");
@@ -87,19 +91,21 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
    fft.execFFT(grid1, grid2, true);
    vector<Real2> result;
    grid2.download(result);
-    fftpack_t plan;
-    fftpack_init_3d(&plan, xsize, ysize, zsize);
-    fftpack_exec_3d(plan, FFTPACK_FORWARD, &reference[0], &reference[0]);
+    vector<size_t> shape = {(size_t) xsize, (size_t) ysize, (size_t) zsize};
+    vector<size_t> axes = {0, 1, 2};
+    vector<ptrdiff_t> stride = {(ptrdiff_t) (ysize*zsize*sizeof(complex<double>)),
+                                (ptrdiff_t) (zsize*sizeof(complex<double>)),
+                                (ptrdiff_t) sizeof(complex<double>)};
+    pocketfft::c2c(shape, stride, stride, axes, true, reference.data(), reference.data(), 1.0);
    int outputZSize = (realToComplex ? zsize/2+1 : zsize);
    for (int x = 0; x < xsize; x++)
        for (int y = 0; y < ysize; y++)
            for (int z = 0; z < outputZSize; z++) {
                int index1 = x*ysize*zsize + y*zsize + z;
                int index2 = x*ysize*outputZSize + y*outputZSize + z;
-                ASSERT_EQUAL_TOL(reference[index1].re, result[index2].x, 1e-3);
-                ASSERT_EQUAL_TOL(reference[index1].im, result[index2].y, 1e-3);
+                ASSERT_EQUAL_TOL(reference[index1].real(), result[index2].x, 1e-3);
+                ASSERT_EQUAL_TOL(reference[index1].imag(), result[index2].y, 1e-3);
            }
-    fftpack_destroy(plan);

    // Perform a backward transform and see if we get the original values.