/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
void SetCalculateAmoebaCudaFixedEAndGKFieldsSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaCudaFixedEAndGKFieldSim: cudaMemcpyToSymbol: SetSim copy to cSim failed");
status = cudaMemcpyToSymbol(cAmoebaSim, &amoebaGpu->amoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaCudaFixedEAndGKFieldSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed");
}
void GetCalculateAmoebaCudaFixedEAndGKFieldSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaCudaFixedEAndGKFieldSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed");
status = cudaMemcpyFromSymbol(&amoebaGpu->amoebaSim, cAmoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaCudaFixedEAndGKFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}
// reduce psWorkArray_3_1 -> E_Field
// reduce psWorkArray_3_2 -> E_FieldPolar
// reduce psWorkArray_3_3 -> Gk_FieldPolar
static void kReduceEAndGkFields(amoebaGpuContext amoebaGpu )
{
gpuContext gpu = amoebaGpu->gpuContext;
kReduceFields_kernel<<sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
gpu->sim.paddedNumberOfAtoms*3, gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_1->_pDevData, amoebaGpu->psE_Field->_pDevData, 0 );
LAUNCHERROR("kReduceEAndGK_Fields1");
kReduceFields_kernel<<sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
gpu->sim.paddedNumberOfAtoms*3, gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_2->_pDevData, amoebaGpu->psE_FieldPolar->_pDevData, 0 );
LAUNCHERROR("kReduceEAndGK_Fields2");
kReduceFields_kernel<<sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
gpu->sim.paddedNumberOfAtoms*3, gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_3->_pDevData, amoebaGpu->psGk_Field->_pDevData, 0 );
LAUNCHERROR("kReduceEAndGK_Fields3");
}
// file includes FixedFieldParticle struct definition/load/unload struct and kernel body for fixed E-field
#define GK
#include "kCalculateAmoebaCudaFixedFieldParticle.h"
#undef GK
__device__ void calculateFixedGkFieldPairIxn_kernel( float4 atomCoordinatesI, float4 atomCoordinatesJ,
float* labFrameDipoleI, float* labFrameDipoleJ,
float* labFrameQuadrupoleI, float* labFrameQuadrupoleJ,
float rb2,
float outputField[2][3]
){
float xi,yi,zi;
float xr,yr,zr;
float xr2,yr2,zr2;
float ci,ck;
float uxi,uyi,uzi;
float uxk,uyk,uzk;
float qxxi,qxyi,qxzi;
float qyyi,qyzi,qzzi;
float qxxk,qxyk,qxzk;
float qyyk,qyzk,qzzk;
float r2;
float fc,fd,fq;
float expterm;
float gf,gf2,gf3,gf5;
float gf7;
float expc,dexpc;
float expc1,expcdexpc;
float a[4][4];
float gc[5];
float gux[11],guy[11],guz[11];
float gqxx[5],gqxy[5];
float gqxz[5],gqyy[5];
float gqyz[5],gqzz[5];
float gkc;
gkc = cAmoebaSim.gkc;
fc = cAmoebaSim.fc;
fd = cAmoebaSim.fd;
fq = cAmoebaSim.fq;
xi = atomCoordinatesI.x;
yi = atomCoordinatesI.y;
zi = atomCoordinatesI.z;
ci = atomCoordinatesI.w;
uxi = labFrameDipoleI[0];
uyi = labFrameDipoleI[1];
uzi = labFrameDipoleI[2];
qxxi = labFrameQuadrupoleI[0];
qxyi = labFrameQuadrupoleI[1];
qxzi = labFrameQuadrupoleI[2];
qyyi = labFrameQuadrupoleI[4];
qyzi = labFrameQuadrupoleI[5];
qzzi = labFrameQuadrupoleI[8];
xr = atomCoordinatesJ.x - xi;
yr = atomCoordinatesJ.y - yi;
zr = atomCoordinatesJ.z - zi;
ck = atomCoordinatesJ.w;
xr2 = xr*xr;
yr2 = yr*yr;
zr2 = zr*zr;
r2 = xr2 + yr2 + zr2;
uxk = labFrameDipoleJ[0];
uyk = labFrameDipoleJ[1];
uzk = labFrameDipoleJ[2];
qxxk = labFrameQuadrupoleJ[0];
qxyk = labFrameQuadrupoleJ[1];
qxzk = labFrameQuadrupoleJ[2];
qyyk = labFrameQuadrupoleJ[4];
qyzk = labFrameQuadrupoleJ[5];
qzzk = labFrameQuadrupoleJ[8];
expterm = exp(-r2/(gkc*rb2));
expc = expterm / gkc;
dexpc = -2.0f / (gkc*rb2);
gf2 = 1.0f / (r2+rb2*expterm);
gf = sqrtf(gf2);
gf3 = gf2 * gf;
gf5 = gf3 * gf2;
gf7 = gf5 * gf2;
// reaction potential auxiliary terms
a[0][0] = gf;
a[1][0] = -gf3;
a[2][0] = 3.0f * gf5;
a[3][0] = -15.0f * gf7;
// reaction potential gradient auxiliary terms
expc1 = 1.0f - expc;
a[0][1] = expc1 * a[1][0];
a[1][1] = expc1 * a[2][0];
a[2][1] = expc1 * a[3][0];
// dipole second reaction potential gradient auxiliary term
expcdexpc = -expc * dexpc;
a[1][2] = expc1*a[2][1] + expcdexpc*a[2][0];
// multiply the auxillary terms by dielectric functions;
a[0][1] = fc * a[0][1];
a[1][0] = fd * a[1][0];
a[1][1] = fd * a[1][1];
a[1][2] = fd * a[1][2];
a[2][0] = fq * a[2][0];
a[2][1] = fq * a[2][1];
// unweighted dipole reaction potential tensor
gux[1] = xr * a[1][0];
guy[1] = yr * a[1][0];
guz[1] = zr * a[1][0];
// unweighted reaction potential gradient tensor
gc[2] = xr * a[0][1];
gc[3] = yr * a[0][1];
gc[4] = zr * a[0][1];
gux[2] = a[1][0] + xr2*a[1][1];
gux[3] = xr * yr * a[1][1];
gux[4] = xr * zr * a[1][1];
guy[2] = gux[3];
guy[3] = a[1][0] + yr2*a[1][1];
guy[4] = yr * zr * a[1][1];
guz[2] = gux[4];
guz[3] = guy[4];
guz[4] = a[1][0] + zr2*a[1][1];
gqxx[2] = xr * (2.0f*a[2][0]+xr2*a[2][1]);
gqxx[3] = yr * xr2*a[2][1];
gqxx[4] = zr * xr2*a[2][1];
gqyy[2] = xr * yr2*a[2][1];
gqyy[3] = yr * (2.0f*a[2][0]+yr2*a[2][1]);
gqyy[4] = zr * yr2 * a[2][1];
gqzz[2] = xr * zr2 * a[2][1];
gqzz[3] = yr * zr2 * a[2][1];
gqzz[4] = zr * (2.0f*a[2][0]+zr2*a[2][1]);
gqxy[2] = yr * (a[2][0]+xr2*a[2][1]);
gqxy[3] = xr * (a[2][0]+yr2*a[2][1]);
gqxy[4] = zr * xr * yr * a[2][1];
gqxz[2] = zr * (a[2][0]+xr2*a[2][1]);
gqxz[3] = gqxy[4];
gqxz[4] = xr * (a[2][0]+zr2*a[2][1]);
gqyz[2] = gqxy[4];
gqyz[3] = zr * (a[2][0]+yr2*a[2][1]);
gqyz[4] = yr * (a[2][0]+zr2*a[2][1]);
// unweighted dipole second reaction potential gradient tensor
gux[5] = xr * (3.0f*a[1][1]+xr2*a[1][2]);
gux[6] = yr * (a[1][1]+xr2*a[1][2]);
gux[7] = zr * (a[1][1]+xr2*a[1][2]);
gux[8] = xr * (a[1][1]+yr2*a[1][2]);
gux[9] = zr * xr * yr * a[1][2];
gux[10] = xr * (a[1][1]+zr2*a[1][2]);
guy[5] = yr * (a[1][1]+xr2*a[1][2]);
guy[6] = xr * (a[1][1]+yr2*a[1][2]);
guy[7] = gux[9];
guy[8] = yr * (3.0f*a[1][1]+yr2*a[1][2]);
guy[9] = zr * (a[1][1]+yr2*a[1][2]);
guy[10] = yr * (a[1][1]+zr2*a[1][2]);
guz[5] = zr * (a[1][1]+xr2*a[1][2]);
guz[6] = gux[9];
guz[7] = xr * (a[1][1]+zr2*a[1][2]);
guz[8] = zr * (a[1][1]+yr2*a[1][2]);
guz[9] = yr * (a[1][1]+zr2*a[1][2]);
guz[10] = zr * (3.0f*a[1][1]+zr2*a[1][2]);
// generalized Kirkwood permanent reaction field
outputField[0][0] = uxk*gux[2] + uyk*gux[3] + uzk*gux[4]
+ 0.5f * (ck*gux[1] + qxxk*gux[5]
+ qyyk*gux[8] + qzzk*gux[10]
+ 2.0f*(qxyk*gux[6]+qxzk*gux[7]
+ qyzk*gux[9]))
+ 0.5f * (ck*gc[2] + qxxk*gqxx[2]
+ qyyk*gqyy[2] + qzzk*gqzz[2]
+ 2.0f*(qxyk*gqxy[2]+qxzk*gqxz[2]
+ qyzk*gqyz[2]));
outputField[0][1] = uxk*guy[2] + uyk*guy[3] + uzk*guy[4]
+ 0.5f * (ck*guy[1] + qxxk*guy[5]
+ qyyk*guy[8] + qzzk*guy[10]
+ 2.0f*(qxyk*guy[6]+qxzk*guy[7]
+ qyzk*guy[9]))
+ 0.5f * (ck*gc[3] + qxxk*gqxx[3]
+ qyyk*gqyy[3] + qzzk*gqzz[3]
+ 2.0f*(qxyk*gqxy[3]+qxzk*gqxz[3]
+ qyzk*gqyz[3]));
outputField[0][2] = uxk*guz[2] + uyk*guz[3] + uzk*guz[4]
+ 0.5f * (ck*guz[1] + qxxk*guz[5]
+ qyyk*guz[8] + qzzk*guz[10]
+ 2.0f*(qxyk*guz[6]+qxzk*guz[7]
+ qyzk*guz[9]))
+ 0.5f * (ck*gc[4] + qxxk*gqxx[4]
+ qyyk*gqyy[4] + qzzk*gqzz[4]
+ 2.0f*(qxyk*gqxy[4]+qxzk*gqxz[4]
+ qyzk*gqyz[4]));
outputField[1][0] = uxi*gux[2] + uyi*gux[3] + uzi*gux[4]
- 0.5f * (ci*gux[1] + qxxi*gux[5]
+ qyyi*gux[8] + qzzi*gux[10]
+ 2.0f*(qxyi*gux[6]+qxzi*gux[7]
+ qyzi*gux[9]))
- 0.5f * (ci*gc[2] + qxxi*gqxx[2]
+ qyyi*gqyy[2] + qzzi*gqzz[2]
+ 2.0f*(qxyi*gqxy[2]+qxzi*gqxz[2]
+ qyzi*gqyz[2]));
outputField[1][1] = uxi*guy[2] + uyi*guy[3] + uzi*guy[4]
- 0.5f * (ci*guy[1] + qxxi*guy[5]
+ qyyi*guy[8] + qzzi*guy[10]
+ 2.0f*(qxyi*guy[6]+qxzi*guy[7]
+ qyzi*guy[9]))
- 0.5f * (ci*gc[3] + qxxi*gqxx[3]
+ qyyi*gqyy[3] + qzzi*gqzz[3]
+ 2.0f*(qxyi*gqxy[3]+qxzi*gqxz[3]
+ qyzi*gqyz[3]));
outputField[1][2] = uxi*guz[2] + uyi*guz[3] + uzi*guz[4]
- 0.5f * (ci*guz[1] + qxxi*guz[5]
+ qyyi*guz[8] + qzzi*guz[10]
+ 2.0f*(qxyi*guz[6]+qxzi*guz[7]
+ qyzi*guz[9]))
- 0.5f * (ci*gc[4] + qxxi*gqxx[4]
+ qyyi*gqyy[4] + qzzi*gqzz[4]
+ 2.0f*(qxyi*gqxy[4]+qxzi*gqxz[4]
+ qyzi*gqyz[4]));
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateAmoebaCudaFixedEAndGkFields.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaFixedEAndGkFields.h"
/**---------------------------------------------------------------------------------------
Compute fixed electric field
@param amoebaGpu amoebaGpu context
@param gpu OpenMM gpu Cuda context
--------------------------------------------------------------------------------------- */
void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
{
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
gpuContext gpu = amoebaGpu->gpuContext;
// on first pass, set threads/block
static unsigned int threadsPerBlock = 0;
if( threadsPerBlock == 0 ){
unsigned int maxThreads;
if (gpu->sm_version >= SM_20)
maxThreads = 256;
else if (gpu->sm_version >= SM_12)
maxThreads = 128;
else
maxThreads = 64;
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(FixedFieldParticle), gpu->sharedMemoryPerBlock ), maxThreads);
}
kClearFields_3( amoebaGpu, 3 );
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaFixedEAndGkFieldN2ByWarp_kernel<<sim.nonbond_blocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData,
amoebaGpu->psWorkArray_3_1->_pDevData, amoebaGpu->psWorkArray_3_2->_pDevData,
amoebaGpu->psWorkArray_3_3->_pDevData );
} else {
kCalculateAmoebaFixedEAndGkFieldN2_kernel<<sim.nonbond_blocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData,
amoebaGpu->psWorkArray_3_1->_pDevData, amoebaGpu->psWorkArray_3_2->_pDevData,
amoebaGpu->psWorkArray_3_3->_pDevData );
}
LAUNCHERROR("kCalculateAmoebaFixedEAndGkFieldN2_kernel");
kReduceEAndGkFields( amoebaGpu );
}