Unverified Commit 1dac981a authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Use PocketFFT (#3667)

* Use PocketFFT instead of FFTW

* Minor cleanup

* Use PocketFFT instead of fftpack for reference platform

* Remove FFTW as a dependency

* Converted a test case to use PocketFFT

* Fixed an incorrect comment
parent 583471a6
......@@ -42,7 +42,6 @@ jobs:
- script: |
conda install cmake ^
cython ^
fftw ^
ninja ^
numpy ^
swig ^
......
......@@ -88,7 +88,7 @@ ENDIF(${CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT})
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET(OPENMM_SOURCE_SUBDIRS . openmmapi olla libraries/jama libraries/quern libraries/lepton libraries/sfmt libraries/lbfgs libraries/hilbert libraries/csha1 platforms/reference serialization libraries/irrxml)
SET(OPENMM_SOURCE_SUBDIRS . openmmapi olla libraries/jama libraries/quern libraries/lepton libraries/sfmt libraries/lbfgs libraries/hilbert libraries/csha1 libraries/pocketfft platforms/reference serialization libraries/irrxml)
IF(X86 OR ARM)
SET(OPENMM_SOURCE_SUBDIRS ${OPENMM_SOURCE_SUBDIRS} libraries/vecmath)
ENDIF()
......@@ -392,12 +392,7 @@ ENDIF(OPENMM_BUILD_DRUDE_PLUGIN)
# CPU PME plugin
FIND_PACKAGE(FFTW QUIET)
IF(FFTW_FOUND)
SET(OPENMM_BUILD_PME_PLUGIN ON CACHE BOOL "Build CPU PME plugin")
ELSE(FFTW_FOUND)
SET(OPENMM_BUILD_PME_PLUGIN OFF CACHE BOOL "Build CPU PME plugin")
ENDIF(FFTW_FOUND)
SET(OPENMM_BUILD_PME_PLUGIN ON CACHE BOOL "Build CPU PME plugin")
SET(OPENMM_BUILD_PME_PATH)
IF(OPENMM_BUILD_PME_PLUGIN)
SET(OPENMM_BUILD_PME_PATH ${CMAKE_CURRENT_SOURCE_DIR}/plugins/cpupme)
......
......@@ -19,14 +19,6 @@ install:
- "set PATH=%APPVEYOR_BUILD_FOLDER%\\cclash-0.3.14;%PATH%"
- "set CCLASH_DIR=C:\\ProgramData\\cclash"
# Download FFTW3 for PME plugin
- ps: Invoke-WebRequest ftp://ftp.fftw.org/pub/fftw/fftw-3.3.4-dll64.zip -OutFile fftw-3.3.4-dll64.zip
- 7z x fftw-3.3.4-dll64.zip -oC:\fftw > null
- cd C:\fftw
- lib /def:libfftw3f-3.def
- cd %APPVEYOR_BUILD_FOLDER%
- "set PATH=C:\\fftw;%PATH%"
# Download and install some OpenMM build dependencies (doxygen, swig)
- choco install -y doxygen.install swig > null
......@@ -53,8 +45,6 @@ build_script:
"-DOPENMM_BUILD_PME_PLUGIN=ON
-DOPENCL_INCLUDE_DIR=C:/opencl/inc
-DOPENCL_LIBRARY=C:/opencl/lib/OpenCL.lib
-DFFTW_LIBRARY=C:/fftw/libfftw3f-3.lib
-DFFTW_INCLUDES=C:/fftw
-DOPENMM_BUILD_EXAMPLES=OFF
-DOPENMM_BUILD_OPENCL_TESTS=OFF
-DCMAKE_BUILD_TYPE=Release
......
# - Find FFTW
# Find the native FFTW includes and library
#
# FFTW_INCLUDES - where to find fftw3.h
# FFTW_LIBRARY - the main FFTW library.
# FFTW_THREADS_LIBRARY - the FFTW multithreading support library.
# FFTW_FOUND - True if FFTW found.
if (FFTW_INCLUDES)
# Already in cache, be silent
set (FFTW_FIND_QUIETLY TRUE)
endif (FFTW_INCLUDES)
find_path (FFTW_INCLUDES fftw3.h)
find_library (FFTW_LIBRARY NAMES fftw3f)
find_library (FFTW_THREADS_LIBRARY NAMES fftw3f_threads)
# handle the QUIETLY and REQUIRED arguments and set FFTW_FOUND to TRUE if
# all listed variables are TRUE
include (FindPackageHandleStandardArgs)
find_package_handle_standard_args (FFTW DEFAULT_MSG FFTW_LIBRARY FFTW_INCLUDES)
mark_as_advanced (FFTW_LIBRARY FFTW_THREADS_LIBRARY FFTW_INCLUDES)
......@@ -10,7 +10,6 @@ dependencies:
- python
- cython
- swig
- fftw
- numpy
- doxygen 1.9.1
# test
......
......@@ -10,7 +10,6 @@ dependencies:
- python
- cython
- swig
- fftw
- numpy
- doxygen 1.8.14
# test
......
......@@ -12,7 +12,6 @@ dependencies:
- python
- cython
- swig
- fftw
- numpy
- ocl-icd-system
- doxygen 1.8.14
......
......@@ -12,7 +12,6 @@ dependencies:
- python
- cython
- swig
- fftw
- numpy
- doxygen 1.8.14
- khronos-opencl-icd-loader
......
......@@ -71,8 +71,6 @@ CMAKE_FLAGS+=" -DCMAKE_C_COMPILER=$CLANG_PREFIX/bin/clang -DCMAKE_CXX_COMPILER=$
# Ensure we build a release
CMAKE_FLAGS+=" -DCMAKE_BUILD_TYPE=Release"
# setting the rpath so that libOpenMMPME.so finds the right libfftw3
#CMAKE_FLAGS+=" -DCMAKE_INSTALL_RPATH=.."
# Use NVIDIA CUDA 8.0
CMAKE_FLAGS+=" -DCUDA_CUDART_LIBRARY=/usr/local/cuda-8.0/lib64/libcudart.so"
CMAKE_FLAGS+=" -DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-8.0/bin/nvcc"
......@@ -84,11 +82,6 @@ CMAKE_FLAGS+=" -DOPENCL_INCLUDE_DIR=/opt/AMDAPPSDK-3.0/include/"
CMAKE_FLAGS+=" -DOPENCL_LIBRARY=/opt/AMDAPPSDK-3.0/lib/x86_64/libOpenCL.so"
# Generate API docs
CMAKE_FLAGS+=" -DOPENMM_GENERATE_API_DOCS=ON"
# Set location for FFTW3
PREFIX="$WORKSPACE/miniconda"
CMAKE_FLAGS+=" -DFFTW_INCLUDES=$PREFIX/include"
CMAKE_FLAGS+=" -DFFTW_LIBRARY=$PREFIX/lib/libfftw3f.so"
CMAKE_FLAGS+=" -DFFTW_THREADS_LIBRARY=$PREFIX/lib/libfftw3f_threads.so"
# Necessary to find GL headers
CMAKE_FLAGS+=" -DCMAKE_CXX_FLAGS_RELEASE=-I/usr/include/nvidia/"
......
......@@ -29,4 +29,4 @@ yum install -y libgcrypt
# Ensure configuration is up to date.
conda config --add channels omnia
conda install --yes --quiet swig fftw3f pip doxygen sphinx sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen lxml cmake
conda install --yes --quiet swig pip doxygen sphinx sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen lxml cmake
......@@ -17,20 +17,11 @@ fi
CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=$INSTALL"
# setting the rpath so that libOpenMMPME.so finds the right libfftw3
#CMAKE_FLAGS+=" -DCMAKE_INSTALL_RPATH=.."
CMAKE_FLAGS+=" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++"
CMAKE_FLAGS+=" -DCMAKE_OSX_DEPLOYMENT_TARGET=10.9"
CMAKE_FLAGS+=" -DCMAKE_OSX_SYSROOT=/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.11.sdk"
CMAKE_FLAGS+=" -DOPENMM_GENERATE_API_DOCS=ON"
# Build in subdirectory.
# Set location for FFTW3
PREFIX="$WORKSPACE/miniconda"
CMAKE_FLAGS+=" -DFFTW_INCLUDES=$PREFIX/include"
CMAKE_FLAGS+=" -DFFTW_LIBRARY=$PREFIX/lib/libfftw3f.dylib"
CMAKE_FLAGS+=" -DFFTW_THREADS_LIBRARY=$PREFIX/lib/libfftw3f_threads.dylib"
# Build in subdirectory.
if [ -e build ]; then
rm -rf build
......
......@@ -26,5 +26,5 @@ export PATH=$WORKSPACE/miniconda/bin:$PATH
# Ensure configuration is up to date.
conda config --add channels http://conda.binstar.org/omnia
conda install --yes --quiet swig fftw3f pip doxygen sphinx sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen lxml cmake
conda install --yes --quiet swig pip doxygen sphinx sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen lxml cmake
pip install sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen
......@@ -71,8 +71,6 @@ CMAKE_FLAGS+=" -DCMAKE_C_COMPILER=$CLANG_PREFIX/bin/clang -DCMAKE_CXX_COMPILER=$
# Ensure we build a release
CMAKE_FLAGS+=" -DCMAKE_BUILD_TYPE=Release"
# setting the rpath so that libOpenMMPME.so finds the right libfftw3
#CMAKE_FLAGS+=" -DCMAKE_INSTALL_RPATH=.."
# Use NVIDIA CUDA 8.0
CMAKE_FLAGS+=" -DCUDA_CUDART_LIBRARY=/usr/local/cuda-8.0/lib64/libcudart.so"
CMAKE_FLAGS+=" -DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-8.0/bin/nvcc"
......@@ -84,11 +82,6 @@ CMAKE_FLAGS+=" -DOPENCL_INCLUDE_DIR=/opt/AMDAPPSDK-3.0/include/"
CMAKE_FLAGS+=" -DOPENCL_LIBRARY=/opt/AMDAPPSDK-3.0/lib/x86_64/libOpenCL.so"
# Generate API docs
CMAKE_FLAGS+=" -DOPENMM_GENERATE_API_DOCS=ON"
# Set location for FFTW3
PREFIX="$WORKSPACE/miniconda"
CMAKE_FLAGS+=" -DFFTW_INCLUDES=$PREFIX/include"
CMAKE_FLAGS+=" -DFFTW_LIBRARY=$PREFIX/lib/libfftw3f.so"
CMAKE_FLAGS+=" -DFFTW_THREADS_LIBRARY=$PREFIX/lib/libfftw3f_threads.so"
# Necessary to find GL headers
CMAKE_FLAGS+=" -DCMAKE_CXX_FLAGS_RELEASE=-I/usr/include/nvidia/"
......
......@@ -29,4 +29,4 @@ yum install -y libgcrypt
# Ensure configuration is up to date.
conda config --add channels omnia
conda install --yes --quiet swig fftw3f pip doxygen sphinx sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen lxml cmake
conda install --yes --quiet swig pip doxygen sphinx sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen lxml cmake
mkdir build
cd build
set FFTW=C:\Miniconda3\pkgs\fftw3f-3.3.4-vc14_2\Library
set APPSDK=C:\Program Files (x86)\AMD APP SDK\2.9-1
"C:\Program Files\CMake\bin\cmake.exe" .. -G "NMake Makefiles JOM" -DCMAKE_BUILD_TYPE=Release -DOPENMM_GENERATE_API_DOCS=ON ^
-DOPENCL_INCLUDE_DIR="%APPSDK%\include" -DOPENCL_LIBRARY="%APPSDK%\lib\x86_64\OpenCL.lib" ^
-DFFTW_INCLUDES="%FFTW%/include" -DFFTW_LIBRARY="%FFTW%/lib/libfftw3f-3.lib"
-DOPENCL_INCLUDE_DIR="%APPSDK%\include" -DOPENCL_LIBRARY="%APPSDK%\lib\x86_64\OpenCL.lib"
jom
jom PythonInstall
......
......@@ -19,7 +19,7 @@ wget https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -U
# Install software with conda.
& "C:\Miniconda3\Scripts\conda.exe" config --add channels omnia --add channels conda-forge
& "C:\Miniconda3\Scripts\conda.exe" install -y fftw3f==3.3.4=vc14_2 jinja2 lxml sphinx sphinxcontrib-autodoc_doxygen sphinxcontrib-lunrsearch conda-build anaconda-client
& "C:\Miniconda3\Scripts\conda.exe" install -y jinja2 lxml sphinx sphinxcontrib-autodoc_doxygen sphinxcontrib-lunrsearch conda-build anaconda-client
& "C:\Miniconda3\Scripts\pip.exe" install sphinxcontrib.bibtex
# Install software with choco.
......
......@@ -78,7 +78,7 @@ them is with conda. The following command will install everything needed to
build OpenMM.
::
conda install -c conda-forge cmake make cython swig fftw doxygen numpy
conda install -c conda-forge cmake make cython swig doxygen numpy
Step 1: Configure with CMake
============================
......@@ -213,7 +213,7 @@ Before building OpenMM from source, you will need certain tools.
C++ compiler
------------
On Windows systems, use the C++ compiler in Visual Studio 2015 or later. You
On Windows systems, use the C++ compiler in Visual Studio 2017 or later. You
can download a free version of Visual Studio from https://visualstudio.microsoft.com.
Python
......@@ -267,7 +267,7 @@ It will open a command window that is preconfigured for conda. Enter the
following command to install everything needed to build OpenMM.
::
conda install -c conda-forge cython swig fftw doxygen numpy
conda install -c conda-forge cython swig doxygen numpy
Step 1: Configure with CMake
============================
......
......@@ -35,9 +35,7 @@ The OpenCL Platform recognizes the following Platform-specific properties:
is the most accurate option, but is usually much slower than the others.
* UseCpuPme: This selects whether to use the CPU-based PME
implementation. The allowed values are “true” or “false”. Depending on your
hardware, this might (or might not) improve performance. To use this option,
you must have FFTW (single precision, multithreaded) installed, and your CPU
must support SSE 4.1.
hardware, this might (or might not) improve performance.
* OpenCLPlatformIndex: When multiple OpenCL implementations are installed on
your computer, this is used to select which one to use. The value is the
zero-based index of the platform (in the OpenCL sense, not the OpenMM sense) to use,
......@@ -74,9 +72,7 @@ The CUDA Platform recognizes the following Platform-specific properties:
is the most accurate option, but is usually much slower than the others.
* UseCpuPme: This selects whether to use the CPU-based PME implementation.
The allowed values are “true” or “false”. Depending on your hardware, this
might (or might not) improve performance. To use this option, you must have
FFTW (single precision, multithreaded) installed, and your CPU must support SSE
4.1.
might (or might not) improve performance.
* CudaCompiler: This specifies the path to the CUDA kernel compiler. Versions
of CUDA before 7.0 require a separate compiler executable. If you do
not specify this, OpenMM will try to locate the compiler itself. Specify this
......
This diff is collapsed.
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Portions copyright (c) 2011-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -30,7 +30,7 @@
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of sorting.
* This tests the CUDA implementation of FFT.
*/
#include "openmm/internal/AssertionUtilities.h"
......@@ -38,12 +38,16 @@
#include "CudaContext.h"
#include "CudaFFT3D.h"
#include "CudaSort.h"
#include "fftpack.h"
#include "sfmt/SFMT.h"
#include "openmm/System.h"
#include <complex>
#include <iostream>
#include <cmath>
#include <set>
#ifdef _MSC_VER
#define POCKETFFT_NO_VECTORS
#endif
#include "pocketfft_hdronly.h"
using namespace OpenMM;
using namespace std;
......@@ -63,19 +67,19 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<Real2> original(xsize*ysize*zsize);
vector<t_complex> reference(original.size());
vector<complex<double>> reference(original.size());
for (int i = 0; i < (int) original.size(); i++) {
Real2 value;
value.x = (float) genrand_real2(sfmt);
value.y = (float) genrand_real2(sfmt);
original[i] = value;
reference[i] = t_complex(value.x, value.y);
reference[i] = complex<double>(value.x, value.y);
}
for (int i = 0; i < (int) reference.size(); i++) {
if (realToComplex)
reference[i] = t_complex(i%2 == 0 ? original[i/2].x : original[i/2].y, 0);
reference[i] = complex<double>(i%2 == 0 ? original[i/2].x : original[i/2].y, 0);
else
reference[i] = t_complex(original[i].x, original[i].y);
reference[i] = complex<double>(original[i].x, original[i].y);
}
CudaArray grid1(context, original.size(), sizeof(Real2), "grid1");
CudaArray grid2(context, original.size(), sizeof(Real2), "grid2");
......@@ -87,19 +91,21 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
fft.execFFT(grid1, grid2, true);
vector<Real2> result;
grid2.download(result);
fftpack_t plan;
fftpack_init_3d(&plan, xsize, ysize, zsize);
fftpack_exec_3d(plan, FFTPACK_FORWARD, &reference[0], &reference[0]);
vector<size_t> shape = {(size_t) xsize, (size_t) ysize, (size_t) zsize};
vector<size_t> axes = {0, 1, 2};
vector<ptrdiff_t> stride = {(ptrdiff_t) (ysize*zsize*sizeof(complex<double>)),
(ptrdiff_t) (zsize*sizeof(complex<double>)),
(ptrdiff_t) sizeof(complex<double>)};
pocketfft::c2c(shape, stride, stride, axes, true, reference.data(), reference.data(), 1.0);
int outputZSize = (realToComplex ? zsize/2+1 : zsize);
for (int x = 0; x < xsize; x++)
for (int y = 0; y < ysize; y++)
for (int z = 0; z < outputZSize; z++) {
int index1 = x*ysize*zsize + y*zsize + z;
int index2 = x*ysize*outputZSize + y*outputZSize + z;
ASSERT_EQUAL_TOL(reference[index1].re, result[index2].x, 1e-3);
ASSERT_EQUAL_TOL(reference[index1].im, result[index2].y, 1e-3);
ASSERT_EQUAL_TOL(reference[index1].real(), result[index2].x, 1e-3);
ASSERT_EQUAL_TOL(reference[index1].imag(), result[index2].y, 1e-3);
}
fftpack_destroy(plan);
// Perform a backward transform and see if we get the original values.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment