/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
#include "openmm/OpenMMException.h"
#include
#include
#include
using namespace std;
#define SQRT sqrtf
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
extern __global__ void kFindInteractionsWithinBlocksPeriodic_kernel(unsigned int*);
void SetCalculateAmoebaMultipolePotentialSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaMultipolePotentialSim: cudaMemcpyToSymbol: SetSim copy to cSim failed");
status = cudaMemcpyToSymbol(cAmoebaSim, &amoebaGpu->amoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaMultipolePotentialSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed");
}
void GetCalculateAmoebaMultipolePotentialSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaMultipolePotentialSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed");
status = cudaMemcpyFromSymbol(&amoebaGpu->amoebaSim, cAmoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaMultipolePotentialSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}
struct ElectrostaticPotentialParticle {
// coordinates charge
float x;
float y;
float z;
float q;
// lab frame dipole
float labFrameDipole[3];
// lab frame quadrupole
float labFrameQuadrupole[9];
// induced dipole
float inducedDipole[3];
};
/**---------------------------------------------------------------------------------------
Load data for particle w/ index=atomI
@param sa address to store atomI's coordinates and multipole moments
@param atomI index of atom whose data is to be stored
--------------------------------------------------------------------------------------- */
static __device__ void loadElectrostaticPotentialParticle( volatile struct ElectrostaticPotentialParticle* sA, unsigned int atomI ){
// coordinates & charge
sA->x = cSim.pPosq[atomI].x;
sA->y = cSim.pPosq[atomI].y;
sA->z = cSim.pPosq[atomI].z;
sA->q = cSim.pPosq[atomI].w;
// lab dipole
sA->labFrameDipole[0] = cAmoebaSim.pLabFrameDipole[atomI*3];
sA->labFrameDipole[1] = cAmoebaSim.pLabFrameDipole[atomI*3+1];
sA->labFrameDipole[2] = cAmoebaSim.pLabFrameDipole[atomI*3+2];
// lab quadrupole
sA->labFrameQuadrupole[0] = cAmoebaSim.pLabFrameQuadrupole[atomI*9];
sA->labFrameQuadrupole[1] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+1];
sA->labFrameQuadrupole[2] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+2];
sA->labFrameQuadrupole[3] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+3];
sA->labFrameQuadrupole[4] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+4];
sA->labFrameQuadrupole[5] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+5];
sA->labFrameQuadrupole[6] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+6];
sA->labFrameQuadrupole[7] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+7];
sA->labFrameQuadrupole[8] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+8];
// induced dipole
sA->inducedDipole[0] = cAmoebaSim.pInducedDipole[atomI*3];
sA->inducedDipole[1] = cAmoebaSim.pInducedDipole[atomI*3+1];
sA->inducedDipole[2] = cAmoebaSim.pInducedDipole[atomI*3+2];
}
/**---------------------------------------------------------------------------------------
Calculate potential at grid point due atomI
Code adapted from TINKER routine potpoint in potpoint.f
@param atomI atomI's coordinates and multipole moments
@param gridPoint grid coordinates
@param potential output potential
--------------------------------------------------------------------------------------- */
__device__ void calculateElectrostaticPotentialForAtomGridPoint_kernel( volatile ElectrostaticPotentialParticle& atomI, volatile float4& gridPoint, float* potential ){
float xr = atomI.x - gridPoint.x;
float yr = atomI.y - gridPoint.y;
float zr = atomI.z - gridPoint.z;
xr -= floorf(xr*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
yr -= floorf(yr*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
zr -= floorf(zr*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
float r2 = xr*xr + yr*yr + zr*zr;
float r = sqrtf( r2 );
float rr1 = 1.0f/r;
*potential = atomI.q*rr1;
float rr2 = rr1*rr1;
float rr3 = rr1*rr2;
float scd = atomI.labFrameDipole[0]*xr + atomI.labFrameDipole[1]*yr + atomI.labFrameDipole[2]*zr;
float scu = atomI.inducedDipole[0]*xr + atomI.inducedDipole[1]*yr + atomI.inducedDipole[2]*zr;
*potential -= (scd + scu)*rr3;
float rr5 = 3.0f*rr3*rr2;
float scq = xr*(atomI.labFrameQuadrupole[0]*xr + atomI.labFrameQuadrupole[1]*yr + atomI.labFrameQuadrupole[2]*zr);
scq += yr*(atomI.labFrameQuadrupole[1]*xr + atomI.labFrameQuadrupole[4]*yr + atomI.labFrameQuadrupole[5]*zr);
scq += zr*(atomI.labFrameQuadrupole[2]*xr + atomI.labFrameQuadrupole[5]*yr + atomI.labFrameQuadrupole[8]*zr);
*potential += scq*rr5;
return;
}
// Include versions of the kernels for N x PotentialGridSize calculations.
#undef USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##NxG##b
#include "kCalculateAmoebaCudaElectrostaticPotential.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##NxGByWarp##b
#include "kCalculateAmoebaCudaElectrostaticPotential.h"
// Kernel to reduce potential
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kReducePotential_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
float conversionFactor = (cAmoebaSim.electric/cAmoebaSim.dielec);
// Reduce potential
while (pos < cAmoebaSim.paddedPotentialGridSize)
{
float totalPotential = 0.0f;
float* pFt = cAmoebaSim.pPotential + pos;
int i = cSim.outputBuffers;
while (i >= 4)
{
float f1 = *pFt;
pFt += cAmoebaSim.paddedPotentialGridSize;
float f2 = *pFt;
pFt += cAmoebaSim.paddedPotentialGridSize;
float f3 = *pFt;
pFt += cAmoebaSim.paddedPotentialGridSize;
float f4 = *pFt;
pFt += cAmoebaSim.paddedPotentialGridSize;
totalPotential += f1 + f2 + f3 + f4;
i -= 4;
}
if (i >= 2)
{
float f1 = *pFt;
pFt += cAmoebaSim.paddedPotentialGridSize;
float f2 = *pFt;
pFt += cAmoebaSim.paddedPotentialGridSize;
totalPotential += f1 + f2;
i -= 2;
}
if (i > 0)
{
totalPotential += *pFt;
}
totalPotential *= conversionFactor;
pFt = cAmoebaSim.pPotential + pos;
*pFt = totalPotential;
pos += gridDim.x*blockDim.x;
}
}
/**---------------------------------------------------------------------------------------
Reduce Amoeba electrostatic potential
@param gpu gpu context
--------------------------------------------------------------------------------------- */
void kReducePotential(gpuContext gpu)
{
kReducePotential_kernel<<sim.blocks, gpu->sim.bsf_reduce_threads_per_block>>>();
LAUNCHERROR("kReducePotential");
}
/**---------------------------------------------------------------------------------------
Compute Amoeba electrostatic potential
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
void cudaComputeAmoebaElectrostaticPotential( amoebaGpuContext amoebaGpu ){
// ---------------------------------------------------------------------------------------
gpuContext gpu = amoebaGpu->gpuContext;
// on first pass, set threads/block
static unsigned int threadsPerBlock = 0;
if( threadsPerBlock == 0 ){
unsigned int maxThreads;
if (gpu->sm_version >= SM_20)
//maxThreads = 384;
maxThreads = 512;
else if (gpu->sm_version >= SM_12)
maxThreads = 128;
else
maxThreads = 64;
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(ElectrostaticPotentialParticle), gpu->sharedMemoryPerBlock), maxThreads);
}
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaCudaElectrostaticPotentialNxGByWarp_kernel<<sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticPotentialParticle)*threadsPerBlock>>>( );
} else {
kCalculateAmoebaCudaElectrostaticPotentialNxG_kernel<<sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticPotentialParticle)*threadsPerBlock>>>( );
}
LAUNCHERROR("kCalculateAmoebaCudaElectrostaticPotential");
kReducePotential( amoebaGpu->gpuContext );
// ---------------------------------------------------------------------------------------
}
void kCalculateAmoebaMultipolePotential(amoebaGpuContext amoebaGpu )
{
// setup
kSetupAmoebaMultipoleForces(amoebaGpu, false );
// calculate electrostatic potential
cudaComputeAmoebaElectrostaticPotential( amoebaGpu );
}