Commit 222378c6 authored by Peter Eastman's avatar Peter Eastman
Browse files

Began converting AmoebaGeneralizedKirkwoodForce to new CUDA platform

parent 235a88e5
...@@ -58,6 +58,9 @@ public: ...@@ -58,6 +58,9 @@ public:
return std::map<std::string, double>(); // This force field doesn't define any parameters. return std::map<std::string, double>(); // This force field doesn't define any parameters.
} }
std::vector<std::string> getKernelNames(); std::vector<std::string> getKernelNames();
Kernel& getKernel() {
return kernel;
}
private: private:
AmoebaGeneralizedKirkwoodForce& owner; AmoebaGeneralizedKirkwoodForce& owner;
Kernel kernel; Kernel kernel;
......
...@@ -95,8 +95,8 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl ...@@ -95,8 +95,8 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl
if (name == CalcAmoebaMultipoleForceKernel::Name()) if (name == CalcAmoebaMultipoleForceKernel::Name())
return new CudaCalcAmoebaMultipoleForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcAmoebaMultipoleForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcAmoebaGeneralizedKirkwoodForceKernel::Name()) if (name == CalcAmoebaGeneralizedKirkwoodForceKernel::Name())
// return new CudaCalcAmoebaGeneralizedKirkwoodForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcAmoebaGeneralizedKirkwoodForceKernel(name, platform, cu, context.getSystem());
if (name == CalcAmoebaVdwForceKernel::Name()) if (name == CalcAmoebaVdwForceKernel::Name())
return new CudaCalcAmoebaVdwForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcAmoebaVdwForceKernel(name, platform, cu, context.getSystem());
......
...@@ -37,6 +37,8 @@ ...@@ -37,6 +37,8 @@
namespace OpenMM { namespace OpenMM {
class CudaCalcAmoebaGeneralizedKirkwoodForceKernel;
/** /**
* This kernel is invoked by AmoebaHarmonicBondForce to calculate the forces acting on the system and the energy of the system. * This kernel is invoked by AmoebaHarmonicBondForce to calculate the forces acting on the system and the energy of the system.
*/ */
...@@ -427,6 +429,7 @@ private: ...@@ -427,6 +429,7 @@ private:
CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel; CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel;
CUfunction pmeUpdateBsplinesKernel, pmeAtomRangeKernel, pmeZIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeConvolutionKernel, pmeFixedPotentialKernel, pmeInducedPotentialKernel; CUfunction pmeUpdateBsplinesKernel, pmeAtomRangeKernel, pmeZIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeConvolutionKernel, pmeFixedPotentialKernel, pmeInducedPotentialKernel;
CUfunction pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel; CUfunction pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel* gkKernel;
static const int PmeOrder = 5; static const int PmeOrder = 5;
}; };
...@@ -453,10 +456,38 @@ public: ...@@ -453,10 +456,38 @@ public:
* @return the potential energy due to the force * @return the potential energy due to the force
*/ */
double execute(ContextImpl& context, bool includeForces, bool includeEnergy); double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Perform the computation of Born radii.
*/
void computeBornRadii();
/**
* Perform the final parts of the force/energy computation.
*/
void finishComputation(CudaArray& torque, CudaArray& labFrameDipoles, CudaArray& labFrameQuadrupoles, CudaArray& inducedDipole, CudaArray& inducedDipolePolar, CudaArray& dampingAndThole, CudaArray& covalentFlags, CudaArray& polarizationGroupFlags);
CudaArray* getBornRadii() {
return bornRadii;
}
CudaArray* getField() {
return field;
}
CudaArray* getInducedDipoles() {
return inducedDipoleS;
}
CudaArray* getInducedDipolesPolar() {
return inducedDipolePolarS;
}
private: private:
class ForceInfo; class ForceInfo;
CudaContext& cu; CudaContext& cu;
System& system; System& system;
CudaArray* params;
CudaArray* bornSum;
CudaArray* bornRadii;
CudaArray* bornForce;
CudaArray* field;
CudaArray* inducedDipoleS;
CudaArray* inducedDipolePolarS;
CUfunction computeBornSumKernel, reduceBornSumKernel, gkForceKernel, chainRuleKernel, ediffKernel;
}; };
/** /**
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -135,8 +135,8 @@ extern "C" __global__ void computeElectrostatics( ...@@ -135,8 +135,8 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ; localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
localData[threadIdx.x].inducedDipole = data.inducedDipole; localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar; localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
localData[threadIdx.x].thole = data.thole; // IS THIS CORRECT? localData[threadIdx.x].thole = data.thole;
localData[threadIdx.x].damp = data.damp; // IS THIS CORRECT? localData[threadIdx.x].damp = data.damp;
uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx]; uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx]; unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
...@@ -260,6 +260,8 @@ extern "C" __global__ void computeElectrostatics( ...@@ -260,6 +260,8 @@ extern "C" __global__ void computeElectrostatics(
// Compute torques. // Compute torques.
data.force = make_real3(0);
localData[threadIdx.x].force = make_real3(0);
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
if ((flags&(1<<j)) != 0) { if ((flags&(1<<j)) != 0) {
int atom2 = tbx+j; int atom2 = tbx+j;
...@@ -362,6 +364,8 @@ extern "C" __global__ void computeElectrostatics( ...@@ -362,6 +364,8 @@ extern "C" __global__ void computeElectrostatics(
// Compute torques. // Compute torques.
data.force = make_real3(0);
localData[threadIdx.x].force = make_real3(0);
covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0)); covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0));
polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0); polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0);
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx)); covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx));
......
...@@ -7,6 +7,10 @@ typedef struct { ...@@ -7,6 +7,10 @@ typedef struct {
real quadrupoleXX, quadrupoleXY, quadrupoleXZ; real quadrupoleXX, quadrupoleXY, quadrupoleXZ;
real quadrupoleYY, quadrupoleYZ, quadrupoleZZ; real quadrupoleYY, quadrupoleYZ, quadrupoleZZ;
float thole, damp; float thole, damp;
#ifdef USE_GK
real3 gkField;
real bornRadius;
#endif
} AtomData; } AtomData;
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const float2* __restrict__ dampingAndThole) { inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const float2* __restrict__ dampingAndThole) {
...@@ -178,6 +182,205 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -178,6 +182,205 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
} }
#endif #endif
#ifdef USE_GK
__device__ void computeOneGkInteraction(AtomData& atom1, AtomData& atom2, real3 delta, real3* fields) {
real a[4][4];
real gc[5];
real gux[11],guy[11],guz[11];
real gqxx[5],gqxy[5];
real gqxz[5],gqyy[5];
real gqyz[5],gqzz[5];
real ci = atom1.posq.w;
real ck = atom2.posq.w;
real uxi = atom1.dipole.x;
real uyi = atom1.dipole.y;
real uzi = atom1.dipole.z;
real uxk = atom2.dipole.x;
real uyk = atom2.dipole.y;
real uzk = atom2.dipole.z;
real qxxi = atom1.quadrupoleXX;
real qxyi = atom1.quadrupoleXY;
real qxzi = atom1.quadrupoleXZ;
real qyyi = atom1.quadrupoleYY;
real qyzi = atom1.quadrupoleYZ;
real qzzi = atom1.quadrupoleZZ;
real qxxk = atom2.quadrupoleXX;
real qxyk = atom2.quadrupoleXY;
real qxzk = atom2.quadrupoleXZ;
real qyyk = atom2.quadrupoleYY;
real qyzk = atom2.quadrupoleYZ;
real qzzk = atom2.quadrupoleZZ;
real xr2 = delta.x*delta.x;
real yr2 = delta.y*delta.y;
real zr2 = delta.z*delta.z;
real r2 = xr2 + yr2 + zr2;
real rb2 = atom1.bornRadius*atom2.bornRadius;
real expterm = EXP(-r2/(GK_C*rb2));
real expc = expterm / GK_C;
real dexpc = -2/(GK_C*rb2);
real gf2 = RECIP(r2+rb2*expterm);
real gf = SQRT(gf2);
real gf3 = gf2*gf;
real gf5 = gf3*gf2;
real gf7 = gf5*gf2;
// reaction potential auxiliary terms
a[0][0] = gf;
a[1][0] = -gf3;
a[2][0] = 3*gf5;
a[3][0] = -15*gf7;
// reaction potential gradient auxiliary terms
real expc1 = 1 - expc;
a[0][1] = expc1*a[1][0];
a[1][1] = expc1*a[2][0];
a[2][1] = expc1*a[3][0];
// dipole second reaction potential gradient auxiliary term
real expcdexpc = -expc*dexpc;
a[1][2] = expc1*a[2][1] + expcdexpc*a[2][0];
// multiply the auxillary terms by dielectric functions;
a[0][1] = GK_FC*a[0][1];
a[1][0] = GK_FD*a[1][0];
a[1][1] = GK_FD*a[1][1];
a[1][2] = GK_FD*a[1][2];
a[2][0] = GK_FQ*a[2][0];
a[2][1] = GK_FQ*a[2][1];
// unweighted dipole reaction potential tensor
gux[1] = delta.x*a[1][0];
guy[1] = delta.y*a[1][0];
guz[1] = delta.z*a[1][0];
// unweighted reaction potential gradient tensor
gc[2] = delta.x*a[0][1];
gc[3] = delta.y*a[0][1];
gc[4] = delta.z*a[0][1];
gux[2] = a[1][0] + xr2*a[1][1];
gux[3] = delta.x*delta.y*a[1][1];
gux[4] = delta.x*delta.z*a[1][1];
guy[2] = gux[3];
guy[3] = a[1][0] + yr2*a[1][1];
guy[4] = delta.y*delta.z*a[1][1];
guz[2] = gux[4];
guz[3] = guy[4];
guz[4] = a[1][0] + zr2*a[1][1];
gqxx[2] = delta.x*(2*a[2][0]+xr2*a[2][1]);
gqxx[3] = delta.y*xr2*a[2][1];
gqxx[4] = delta.z*xr2*a[2][1];
gqyy[2] = delta.x*yr2*a[2][1];
gqyy[3] = delta.y*(2*a[2][0]+yr2*a[2][1]);
gqyy[4] = delta.z*yr2*a[2][1];
gqzz[2] = delta.x*zr2*a[2][1];
gqzz[3] = delta.y*zr2*a[2][1];
gqzz[4] = delta.z*(2*a[2][0]+zr2*a[2][1]);
gqxy[2] = delta.y*(a[2][0]+xr2*a[2][1]);
gqxy[3] = delta.x*(a[2][0]+yr2*a[2][1]);
gqxy[4] = delta.z*delta.x*delta.y*a[2][1];
gqxz[2] = delta.z*(a[2][0]+xr2*a[2][1]);
gqxz[3] = gqxy[4];
gqxz[4] = delta.x*(a[2][0]+zr2*a[2][1]);
gqyz[2] = gqxy[4];
gqyz[3] = delta.z*(a[2][0]+yr2*a[2][1]);
gqyz[4] = delta.y*(a[2][0]+zr2*a[2][1]);
// unweighted dipole second reaction potential gradient tensor
gux[5] = delta.x*(3*a[1][1]+xr2*a[1][2]);
gux[6] = delta.y*(a[1][1]+xr2*a[1][2]);
gux[7] = delta.z*(a[1][1]+xr2*a[1][2]);
gux[8] = delta.x*(a[1][1]+yr2*a[1][2]);
gux[9] = delta.z*delta.x*delta.y*a[1][2];
gux[10] = delta.x*(a[1][1]+zr2*a[1][2]);
guy[5] = delta.y*(a[1][1]+xr2*a[1][2]);
guy[6] = delta.x*(a[1][1]+yr2*a[1][2]);
guy[7] = gux[9];
guy[8] = delta.y*(3*a[1][1]+yr2*a[1][2]);
guy[9] = delta.z*(a[1][1]+yr2*a[1][2]);
guy[10] = delta.y*(a[1][1]+zr2*a[1][2]);
guz[5] = delta.z*(a[1][1]+xr2*a[1][2]);
guz[6] = gux[9];
guz[7] = delta.x*(a[1][1]+zr2*a[1][2]);
guz[8] = delta.z*(a[1][1]+yr2*a[1][2]);
guz[9] = delta.y*(a[1][1]+zr2*a[1][2]);
guz[10] = delta.z*(3*a[1][1]+zr2*a[1][2]);
// generalized Kirkwood permanent reaction field
fields[0].x = uxk*gux[2] + uyk*gux[3] + uzk*gux[4]
+ 0.5f*(ck*gux[1] + qxxk*gux[5]
+ qyyk*gux[8] + qzzk*gux[10]
+ 2*(qxyk*gux[6]+qxzk*gux[7]
+ qyzk*gux[9]))
+ 0.5f*(ck*gc[2] + qxxk*gqxx[2]
+ qyyk*gqyy[2] + qzzk*gqzz[2]
+ 2*(qxyk*gqxy[2]+qxzk*gqxz[2]
+ qyzk*gqyz[2]));
fields[0].y = uxk*guy[2] + uyk*guy[3] + uzk*guy[4]
+ 0.5f*(ck*guy[1] + qxxk*guy[5]
+ qyyk*guy[8] + qzzk*guy[10]
+ 2*(qxyk*guy[6]+qxzk*guy[7]
+ qyzk*guy[9]))
+ 0.5f*(ck*gc[3] + qxxk*gqxx[3]
+ qyyk*gqyy[3] + qzzk*gqzz[3]
+ 2*(qxyk*gqxy[3]+qxzk*gqxz[3]
+ qyzk*gqyz[3]));
fields[0].z = uxk*guz[2] + uyk*guz[3] + uzk*guz[4]
+ 0.5f*(ck*guz[1] + qxxk*guz[5]
+ qyyk*guz[8] + qzzk*guz[10]
+ 2*(qxyk*guz[6]+qxzk*guz[7]
+ qyzk*guz[9]))
+ 0.5f*(ck*gc[4] + qxxk*gqxx[4]
+ qyyk*gqyy[4] + qzzk*gqzz[4]
+ 2*(qxyk*gqxy[4]+qxzk*gqxz[4]
+ qyzk*gqyz[4]));
fields[1].x = uxi*gux[2] + uyi*gux[3] + uzi*gux[4]
- 0.5f*(ci*gux[1] + qxxi*gux[5]
+ qyyi*gux[8] + qzzi*gux[10]
+ 2*(qxyi*gux[6]+qxzi*gux[7]
+ qyzi*gux[9]))
- 0.5f*(ci*gc[2] + qxxi*gqxx[2]
+ qyyi*gqyy[2] + qzzi*gqzz[2]
+ 2*(qxyi*gqxy[2]+qxzi*gqxz[2]
+ qyzi*gqyz[2]));
fields[1].y = uxi*guy[2] + uyi*guy[3] + uzi*guy[4]
- 0.5f*(ci*guy[1] + qxxi*guy[5]
+ qyyi*guy[8] + qzzi*guy[10]
+ 2*(qxyi*guy[6]+qxzi*guy[7]
+ qyzi*guy[9]))
- 0.5f*(ci*gc[3] + qxxi*gqxx[3]
+ qyyi*gqyy[3] + qzzi*gqzz[3]
+ 2*(qxyi*gqxy[3]+qxzi*gqxz[3]
+ qyzi*gqyz[3]));
fields[1].z = uxi*guz[2] + uyi*guz[3] + uzi*guz[4]
- 0.5f*(ci*guz[1] + qxxi*guz[5]
+ qyyi*guz[8] + qzzi*guz[10]
+ 2*(qxyi*guz[6]+qxzi*guz[7]
+ qyzi*guz[9]))
- 0.5f*(ci*gc[4] + qxxi*gqxx[4]
+ qyyi*gqyy[4] + qzzi*gqzz[4]
+ 2*(qxyi*gqxy[4]+qxzi*gqxz[4]
+ qyzi*gqyz[4]));
}
#endif
__device__ real computeDScaleFactor(unsigned int polarizationGroup) { __device__ real computeDScaleFactor(unsigned int polarizationGroup) {
return (polarizationGroup & 1 ? 0 : 1); return (polarizationGroup & 1 ? 0 : 1);
} }
...@@ -198,6 +401,8 @@ extern "C" __global__ void computeFixedField( ...@@ -198,6 +401,8 @@ extern "C" __global__ void computeFixedField(
const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, unsigned int startTileIndex, unsigned int numTileIndices, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const unsigned int* __restrict__ interactionFlags, const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const unsigned int* __restrict__ interactionFlags,
#elif defined USE_GK
const real* __restrict__ bornRadii, unsigned long long* __restrict__ gkFieldBuffers,
#endif #endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const float2* __restrict__ dampingAndThole) { const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const float2* __restrict__ dampingAndThole) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE; unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
...@@ -246,6 +451,9 @@ extern "C" __global__ void computeFixedField( ...@@ -246,6 +451,9 @@ extern "C" __global__ void computeFixedField(
} }
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
loadAtomData(data, atom1, posq, labFrameDipole, labFrameQuadrupole, dampingAndThole); loadAtomData(data, atom1, posq, labFrameDipole, labFrameQuadrupole, dampingAndThole);
#ifdef USE_GK
data.bornRadius = bornRadii[atom1];
#endif
// Locate the exclusion data for this tile. // Locate the exclusion data for this tile.
...@@ -271,26 +479,32 @@ extern "C" __global__ void computeFixedField( ...@@ -271,26 +479,32 @@ extern "C" __global__ void computeFixedField(
localData[localAtomIndex].quadrupoleYY = data.quadrupoleYY; localData[localAtomIndex].quadrupoleYY = data.quadrupoleYY;
localData[localAtomIndex].quadrupoleYZ = data.quadrupoleYZ; localData[localAtomIndex].quadrupoleYZ = data.quadrupoleYZ;
localData[localAtomIndex].quadrupoleZZ = data.quadrupoleZZ; localData[localAtomIndex].quadrupoleZZ = data.quadrupoleZZ;
localData[localAtomIndex].thole = data.thole; // IS THIS CORRECT? localData[localAtomIndex].thole = data.thole;
localData[localAtomIndex].damp = data.damp; // IS THIS CORRECT? localData[localAtomIndex].damp = data.damp;
#ifdef USE_GK
localData[localAtomIndex].bornRadius = data.bornRadius;
#endif
uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx]; uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx]; unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+j; real3 delta = trimTo3(localData[tbx+j].posq-data.posq);
real3 delta = make_real3(localData[atom2].posq.x-data.posq.x, localData[atom2].posq.y-data.posq.y, localData[atom2].posq.z-data.posq.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z; delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 fields[4]; real3 fields[4];
float d = computeDScaleFactor(polarizationGroup); float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup); float p = computePScaleFactor(covalent, polarizationGroup);
computeOneInteraction(data, localData[atom2], delta, d, p, fields); computeOneInteraction(data, localData[tbx+j], delta, d, p, fields);
atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
data.field += fields[0]; data.field += fields[0];
data.fieldPolar += fields[1]; data.fieldPolar += fields[1];
#ifdef USE_GK
computeOneGkInteraction(data, localData[tbx+j], delta, fields);
data.gkField += fields[0];
#endif
} }
covalent.x >>= 1; covalent.x >>= 1;
covalent.y >>= 1; covalent.y >>= 1;
...@@ -305,6 +519,10 @@ extern "C" __global__ void computeFixedField( ...@@ -305,6 +519,10 @@ extern "C" __global__ void computeFixedField(
loadAtomData(localData[localAtomIndex], j, posq, labFrameDipole, labFrameQuadrupole, dampingAndThole); loadAtomData(localData[localAtomIndex], j, posq, labFrameDipole, labFrameQuadrupole, dampingAndThole);
localData[localAtomIndex].field = make_real3(0); localData[localAtomIndex].field = make_real3(0);
localData[localAtomIndex].fieldPolar = make_real3(0); localData[localAtomIndex].fieldPolar = make_real3(0);
#ifdef USE_GK
localData[localAtomIndex].bornRadius = bornRadii[j];
localData[localAtomIndex].gkField = make_real3(0);
#endif
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF); unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (!hasExclusions && flags != 0xFFFFFFFF) { if (!hasExclusions && flags != 0xFFFFFFFF) {
...@@ -385,22 +603,27 @@ extern "C" __global__ void computeFixedField( ...@@ -385,22 +603,27 @@ extern "C" __global__ void computeFixedField(
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx)); polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx));
unsigned int tj = tgx; unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+tj; real3 delta = trimTo3(localData[tbx+tj].posq-data.posq);
real3 delta = make_real3(localData[atom2].posq.x-data.posq.x, localData[atom2].posq.y-data.posq.y, localData[atom2].posq.z-data.posq.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z; delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 fields[4]; real3 fields[4];
float d = computeDScaleFactor(polarizationGroup); float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup); float p = computePScaleFactor(covalent, polarizationGroup);
computeOneInteraction(data, localData[atom2], delta, d, p, fields); computeOneInteraction(data, localData[tbx+tj], delta, d, p, fields);
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
data.field += fields[0]; data.field += fields[0];
data.fieldPolar += fields[1]; data.fieldPolar += fields[1];
localData[atom2].field += fields[2]; localData[tbx+tj].field += fields[2];
localData[atom2].fieldPolar += fields[3]; localData[tbx+tj].fieldPolar += fields[3];
#ifdef USE_GK
computeOneGkInteraction(data, localData[tbx+tj], delta, fields);
data.gkField += fields[0];
localData[tbx+tj].gkField += fields[1];
#endif
} }
covalent.x >>= 1; covalent.x >>= 1;
covalent.y >>= 1; covalent.y >>= 1;
...@@ -421,6 +644,11 @@ extern "C" __global__ void computeFixedField( ...@@ -421,6 +644,11 @@ extern "C" __global__ void computeFixedField(
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0xFFFFFFFF)));
#ifdef USE_GK
atomicAdd(&gkFieldBuffers[offset], static_cast<unsigned long long>((long long) (data.gkField.x*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.gkField.y*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.gkField.z*0xFFFFFFFF)));
#endif
} }
if (pos < end && x != y) { if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx; const unsigned int offset = y*TILE_SIZE + tgx;
...@@ -430,6 +658,11 @@ extern "C" __global__ void computeFixedField( ...@@ -430,6 +658,11 @@ extern "C" __global__ void computeFixedField(
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0xFFFFFFFF)));
#ifdef USE_GK
atomicAdd(&gkFieldBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.x*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.y*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.z*0xFFFFFFFF)));
#endif
} }
pos++; pos++;
} while (pos < end); } while (pos < end);
......
...@@ -207,6 +207,9 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4 ...@@ -207,6 +207,9 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
} }
extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fieldBuffers, const long long* __restrict__ fieldPolarBuffers, extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fieldBuffers, const long long* __restrict__ fieldPolarBuffers,
#ifdef USE_GK
const long long* __restrict__ gkFieldBuffers, real* __restrict__ inducedDipoleS, real* __restrict__ inducedDipolePolarS,
#endif
real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability) { real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability) {
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) { for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
real scale = polarizability[atom]/(real) 0xFFFFFFFF; real scale = polarizability[atom]/(real) 0xFFFFFFFF;
...@@ -216,16 +219,17 @@ extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fi ...@@ -216,16 +219,17 @@ extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fi
inducedDipolePolar[3*atom] = scale*fieldPolarBuffers[atom]; inducedDipolePolar[3*atom] = scale*fieldPolarBuffers[atom];
inducedDipolePolar[3*atom+1] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS]; inducedDipolePolar[3*atom+1] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS];
inducedDipolePolar[3*atom+2] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2]; inducedDipolePolar[3*atom+2] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2];
#ifdef USE_GK
inducedDipoleS[3*atom] = scale*(fieldBuffers[atom]+gkFieldBuffers[atom]);
inducedDipoleS[3*atom+1] = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
inducedDipoleS[3*atom+2] = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
inducedDipolePolarS[3*atom] = scale*(fieldPolarBuffers[atom]+gkFieldBuffers[atom]);
inducedDipolePolarS[3*atom+1] = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
inducedDipolePolarS[3*atom+2] = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
#endif
} }
} }
/**
* Convert a real4 to a real3 by removing its last element.
*/
inline __device__ real3 trim(real4 v) {
return make_real3(v.x, v.y, v.z);
}
/** /**
* Normalize a vector and return what its magnitude was. * Normalize a vector and return what its magnitude was.
*/ */
...@@ -274,11 +278,11 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for ...@@ -274,11 +278,11 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
// NoAxisType // NoAxisType
if (axisType < 5 && particles.z >= 0) { if (axisType < 5 && particles.z >= 0) {
real3 atomPos = trim(posq[atom]); real3 atomPos = trimTo3(posq[atom]);
vector[U] = atomPos - trim(posq[axisAtom]); vector[U] = atomPos - trimTo3(posq[axisAtom]);
norms[U] = normVector(vector[U]); norms[U] = normVector(vector[U]);
if (axisType != 4 && particles.x >= 0) if (axisType != 4 && particles.x >= 0)
vector[V] = atomPos - trim(posq[particles.x]); vector[V] = atomPos - trimTo3(posq[particles.x]);
else else
vector[V] = make_real3(0.1f); vector[V] = make_real3(0.1f);
norms[V] = normVector(vector[V]); norms[V] = normVector(vector[V]);
...@@ -288,7 +292,7 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for ...@@ -288,7 +292,7 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
if (axisType < 2 || axisType > 3) if (axisType < 2 || axisType > 3)
vector[W] = cross(vector[U], vector[V]); vector[W] = cross(vector[U], vector[V]);
else else
vector[W] = atomPos - trim(posq[particles.y]); vector[W] = atomPos - trimTo3(posq[particles.y]);
norms[W] = normVector(vector[W]); norms[W] = normVector(vector[W]);
vector[UV] = cross(vector[V], vector[U]); vector[UV] = cross(vector[V], vector[U]);
......
...@@ -260,8 +260,8 @@ extern "C" __global__ void computeElectrostatics( ...@@ -260,8 +260,8 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ; localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
localData[threadIdx.x].inducedDipole = data.inducedDipole; localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar; localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
localData[threadIdx.x].thole = data.thole; // IS THIS CORRECT? localData[threadIdx.x].thole = data.thole;
localData[threadIdx.x].damp = data.damp; // IS THIS CORRECT? localData[threadIdx.x].damp = data.damp;
uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx]; uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx]; unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment