/* -------------------------------------------------------------------------- * * OpenMMAmoeba * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2008-2020 Stanford University and the Authors. * * Authors: Peter Eastman, Mark Friedrichs * * Contributors: * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * * by the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU Lesser General Public License for more details. * * * * You should have received a copy of the GNU Lesser General Public License * * along with this program. If not, see . * * -------------------------------------------------------------------------- */ #ifdef WIN32 #define _USE_MATH_DEFINES // Needed to get M_PI #endif #include "AmoebaCudaKernels.h" #include "CudaAmoebaKernelSources.h" #include "openmm/internal/ContextImpl.h" #include "openmm/internal/AmoebaGeneralizedKirkwoodForceImpl.h" #include "openmm/internal/AmoebaMultipoleForceImpl.h" #include "openmm/internal/AmoebaWcaDispersionForceImpl.h" #include "openmm/internal/AmoebaTorsionTorsionForceImpl.h" #include "openmm/internal/AmoebaVdwForceImpl.h" #include "openmm/internal/NonbondedForceImpl.h" #include "CudaBondedUtilities.h" #include "CudaFFT3D.h" #include "CudaForceInfo.h" #include "CudaKernelSources.h" #include "SimTKOpenMMRealType.h" #include "jama_lu.h" #include #include #ifdef _MSC_VER #include #endif using namespace OpenMM; using namespace std; #define CHECK_RESULT(result, prefix) \ if (result != CUDA_SUCCESS) { \ std::stringstream m; \ m<setArg(index++, mm_double4(a[0], b[1], c[2], 0.0)); kernel->setArg(index++, mm_double4(1.0/a[0], 1.0/b[1], 1.0/c[2], 0.0)); kernel->setArg(index++, mm_double4(a[0], a[1], a[2], 0.0)); kernel->setArg(index++, mm_double4(b[0], b[1], b[2], 0.0)); kernel->setArg(index, mm_double4(c[0], c[1], c[2], 0.0)); } else { kernel->setArg(index++, mm_float4((float) a[0], (float) b[1], (float) c[2], 0.0f)); kernel->setArg(index++, mm_float4(1.0f/(float) a[0], 1.0f/(float) b[1], 1.0f/(float) c[2], 0.0f)); kernel->setArg(index++, mm_float4((float) a[0], (float) a[1], (float) a[2], 0.0f)); kernel->setArg(index++, mm_float4((float) b[0], (float) b[1], (float) b[2], 0.0f)); kernel->setArg(index, mm_float4((float) c[0], (float) c[1], (float) c[2], 0.0f)); } } /* -------------------------------------------------------------------------- * * AmoebaMultipole * * -------------------------------------------------------------------------- */ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() { cc.setAsCurrent(); if (hasInitializedFFT) cufftDestroy(fft); } void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const AmoebaMultipoleForce& force) { CommonCalcAmoebaMultipoleForceKernel::initialize(system, force); if (usePME) { cufftResult result = cufftPlan3d(&fft, gridSizeX, gridSizeY, gridSizeZ, cc.getUseDoublePrecision() ? CUFFT_Z2Z : CUFFT_C2C); if (result != CUFFT_SUCCESS) throw OpenMMException("Error initializing FFT: "+cc.intToString(result)); hasInitializedFFT = true; } } void CudaCalcAmoebaMultipoleForceKernel::computeFFT(bool forward) { CudaArray& grid1 = dynamic_cast(cc).unwrap(pmeGrid1); CudaArray& grid2 = dynamic_cast(cc).unwrap(pmeGrid2); if (forward) { if (cc.getUseDoublePrecision()) cufftExecZ2Z(fft, (double2*) grid1.getDevicePointer(), (double2*) grid2.getDevicePointer(), CUFFT_FORWARD); else cufftExecC2C(fft, (float2*) grid1.getDevicePointer(), (float2*) grid2.getDevicePointer(), CUFFT_FORWARD); } else { if (cc.getUseDoublePrecision()) cufftExecZ2Z(fft, (double2*) grid2.getDevicePointer(), (double2*) grid1.getDevicePointer(), CUFFT_INVERSE); else cufftExecC2C(fft, (float2*) grid2.getDevicePointer(), (float2*) grid1.getDevicePointer(), CUFFT_INVERSE); } } /* -------------------------------------------------------------------------- * * HippoNonbondedForce * * -------------------------------------------------------------------------- */ CudaCalcHippoNonbondedForceKernel::~CudaCalcHippoNonbondedForceKernel() { cc.setAsCurrent(); if (sort != NULL) delete sort; if (hasInitializedFFT) { cufftDestroy(fftForward); cufftDestroy(fftBackward); cufftDestroy(dfftForward); cufftDestroy(dfftBackward); } } void CudaCalcHippoNonbondedForceKernel::initialize(const System& system, const HippoNonbondedForce& force) { CommonCalcHippoNonbondedForceKernel::initialize(system, force); if (usePME) { CudaContext& cu = dynamic_cast(cc); sort = new CudaSort(cu, new SortTrait(), cc.getNumAtoms()); cufftResult result = cufftPlan3d(&fftForward, gridSizeX, gridSizeY, gridSizeZ, cc.getUseDoublePrecision() ? CUFFT_D2Z : CUFFT_R2C); if (result != CUFFT_SUCCESS) throw OpenMMException("Error initializing FFT: "+cc.intToString(result)); result = cufftPlan3d(&fftBackward, gridSizeX, gridSizeY, gridSizeZ, cc.getUseDoublePrecision() ? CUFFT_Z2D : CUFFT_C2R); if (result != CUFFT_SUCCESS) throw OpenMMException("Error initializing FFT: "+cc.intToString(result)); result = cufftPlan3d(&dfftForward, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ, cc.getUseDoublePrecision() ? CUFFT_D2Z : CUFFT_R2C); if (result != CUFFT_SUCCESS) throw OpenMMException("Error initializing FFT: "+cc.intToString(result)); result = cufftPlan3d(&dfftBackward, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ, cc.getUseDoublePrecision() ? CUFFT_Z2D : CUFFT_C2R); if (result != CUFFT_SUCCESS) throw OpenMMException("Error initializing FFT: "+cc.intToString(result)); hasInitializedFFT = true; } } void CudaCalcHippoNonbondedForceKernel::computeFFT(bool forward, bool dispersion) { CudaArray& grid1 = dynamic_cast(cc).unwrap(pmeGrid1); CudaArray& grid2 = dynamic_cast(cc).unwrap(pmeGrid2); if (forward) { cufftHandle fft = dispersion ? dfftForward : fftForward; if (cc.getUseDoublePrecision()) cufftExecD2Z(fft, (double*) grid1.getDevicePointer(), (double2*) grid2.getDevicePointer()); else cufftExecR2C(fft, (float*) grid1.getDevicePointer(), (float2*) grid2.getDevicePointer()); } else { cufftHandle fft = dispersion ? dfftBackward : fftBackward; if (cc.getUseDoublePrecision()) cufftExecZ2D(fft, (double2*) grid2.getDevicePointer(), (double*) grid1.getDevicePointer()); else cufftExecC2R(fft, (float2*) grid2.getDevicePointer(), (float*) grid1.getDevicePointer()); } } void CudaCalcHippoNonbondedForceKernel::sortGridIndex() { sort->sort(dynamic_cast(cc).unwrap(pmeAtomGridIndex)); }