Commit 5c090de2 authored by peastman's avatar peastman
Browse files

Simplification and cleanup to spherical harmonics code

parent 474f600e
...@@ -1158,20 +1158,13 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1158,20 +1158,13 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
buildMatrixKernel = cu.getKernel(module, "computeDIISMatrix"); buildMatrixKernel = cu.getKernel(module, "computeDIISMatrix");
} }
stringstream electrostaticsSource; stringstream electrostaticsSource;
if (usePME) {
electrostaticsSource << CudaKernelSources::vectorOps; electrostaticsSource << CudaKernelSources::vectorOps;
electrostaticsSource << CudaAmoebaKernelSources::sphericalMultipoles; electrostaticsSource << CudaAmoebaKernelSources::sphericalMultipoles;
if (usePME)
electrostaticsSource << CudaAmoebaKernelSources::pmeMultipoleElectrostatics; electrostaticsSource << CudaAmoebaKernelSources::pmeMultipoleElectrostatics;
electrostaticsThreadMemory = 24*elementSize+3*sizeof(float)+3*sizeof(int)/(double) cu.TileSize; else
}
else {
electrostaticsSource << CudaKernelSources::vectorOps;
electrostaticsSource << CudaAmoebaKernelSources::sphericalMultipoles;
electrostaticsSource << CudaAmoebaKernelSources::multipoleElectrostatics; electrostaticsSource << CudaAmoebaKernelSources::multipoleElectrostatics;
electrostaticsThreadMemory = 24*elementSize+2*sizeof(float)+3*sizeof(int)/(double) cu.TileSize; electrostaticsThreadMemory = 24*elementSize+3*sizeof(float)+3*sizeof(int)/(double) cu.TileSize;
if (gk != NULL)
electrostaticsThreadMemory += 4*elementSize;
}
electrostaticsThreads = min(maxThreads, cu.computeThreadBlockSize(electrostaticsThreadMemory)); electrostaticsThreads = min(maxThreads, cu.computeThreadBlockSize(electrostaticsThreadMemory));
defines["THREAD_BLOCK_SIZE"] = cu.intToString(electrostaticsThreads); defines["THREAD_BLOCK_SIZE"] = cu.intToString(electrostaticsThreads);
module = cu.createModule(electrostaticsSource.str(), defines); module = cu.createModule(electrostaticsSource.str(), defines);
...@@ -1492,7 +1485,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1492,7 +1485,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void* electrostaticsArgs[] = {&cu.getForce().getDevicePointer(), &torque->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), void* electrostaticsArgs[] = {&cu.getForce().getDevicePointer(), &torque->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&cu.getPosq().getDevicePointer(), &covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &cu.getPosq().getDevicePointer(), &covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &startTileIndex, &numTileIndices, &nb.getExclusionTiles().getDevicePointer(), &startTileIndex, &numTileIndices,
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &sphericalDipoles->getDevicePointer(), &sphericalQuadrupoles->getDevicePointer(), &sphericalDipoles->getDevicePointer(), &sphericalQuadrupoles->getDevicePointer(),
&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()}; &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*electrostaticsThreads, electrostaticsThreads); cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*electrostaticsThreads, electrostaticsThreads);
if (gkKernel != NULL) if (gkKernel != NULL)
...@@ -1647,7 +1640,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1647,7 +1640,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&nb.getInteractingTiles().getDevicePointer(), &nb.getInteractionCount().getDevicePointer(), &nb.getInteractingTiles().getDevicePointer(), &nb.getInteractionCount().getDevicePointer(),
cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(), cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
&maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(), &maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(),
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &sphericalDipoles->getDevicePointer(), &sphericalQuadrupoles->getDevicePointer(), &sphericalDipoles->getDevicePointer(), &sphericalQuadrupoles->getDevicePointer(),
&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()}; &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*electrostaticsThreads, electrostaticsThreads); cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*electrostaticsThreads, electrostaticsThreads);
void* pmeTransformInducedPotentialArgs[] = {&pmePhidp->getDevicePointer(), &pmeCphi->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]}; void* pmeTransformInducedPotentialArgs[] = {&pmePhidp->getDevicePointer(), &pmeCphi->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
......
...@@ -382,8 +382,7 @@ extern "C" __global__ void computeElectrostatics( ...@@ -382,8 +382,7 @@ extern "C" __global__ void computeElectrostatics(
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const unsigned int* __restrict__ interactingAtoms, const unsigned int* __restrict__ interactingAtoms,
#endif #endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) { const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE; const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
......
...@@ -411,32 +411,23 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has ...@@ -411,32 +411,23 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
/** /**
* Compute the self energy and self torque. * Compute the self energy and self torque.
*/ */
__device__ void computeSelfEnergyAndTorque(AtomData& atom1, int atomIndex, real& energy, const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole) { __device__ void computeSelfEnergyAndTorque(AtomData& atom1, real& energy) {
real term = 2*EWALD_ALPHA*EWALD_ALPHA;
real fterm = -EWALD_ALPHA/SQRT_PI;
real cii = atom1.q*atom1.q; real cii = atom1.q*atom1.q;
real3 dipole = make_real3(labFrameDipole[atomIndex*3], labFrameDipole[atomIndex*3+1], labFrameDipole[atomIndex*3+2]); real3 dipole = make_real3(atom1.sphericalDipole.y, atom1.sphericalDipole.z, atom1.sphericalDipole.x);
real dii = dot(dipole, dipole); real dii = dot(dipole, dipole+atom1.inducedDipole);
#ifdef INCLUDE_QUADRUPOLES #ifdef INCLUDE_QUADRUPOLES
real quadrupoleXX = labFrameQuadrupole[atomIndex*5]; real qii = (atom1.sphericalQuadrupole[0]*atom1.sphericalQuadrupole[0] +
real quadrupoleXY = labFrameQuadrupole[atomIndex*5+1]; atom1.sphericalQuadrupole[1]*atom1.sphericalQuadrupole[1] +
real quadrupoleXZ = labFrameQuadrupole[atomIndex*5+2]; atom1.sphericalQuadrupole[2]*atom1.sphericalQuadrupole[2] +
real quadrupoleYY = labFrameQuadrupole[atomIndex*5+3]; atom1.sphericalQuadrupole[3]*atom1.sphericalQuadrupole[3] +
real quadrupoleYZ = labFrameQuadrupole[atomIndex*5+4]; atom1.sphericalQuadrupole[4]*atom1.sphericalQuadrupole[4]);
real qii = 2*(quadrupoleXX*quadrupoleXX +
quadrupoleYY*quadrupoleYY +
quadrupoleXX*quadrupoleYY +
quadrupoleXY*quadrupoleXY +
quadrupoleXZ*quadrupoleXZ +
quadrupoleYZ*quadrupoleYZ);
#else #else
real qii = 0; real qii = 0;
#endif #endif
real uii = dot(dipole, atom1.inducedDipole); real prefac = -EWALD_ALPHA/SQRT_PI;
real selfEnergy = (cii + term*(dii/3 + 2*term*qii/5)); real a2 = EWALD_ALPHA*EWALD_ALPHA;
selfEnergy += term*uii/3; real a4 = a2*a2;
selfEnergy *= fterm; energy += prefac*(cii + ((real)2/3)*a2*dii + ((real) 4/15)*a4*qii);
energy += selfEnergy;
// self-torque for PME // self-torque for PME
...@@ -456,9 +447,8 @@ extern "C" __global__ void computeElectrostatics( ...@@ -456,9 +447,8 @@ extern "C" __global__ void computeElectrostatics(
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const unsigned int* __restrict__ interactingAtoms, const unsigned int* __restrict__ interactingAtoms,
#endif #endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
const float2* __restrict__ dampingAndThole) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE; const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
...@@ -511,7 +501,7 @@ extern "C" __global__ void computeElectrostatics( ...@@ -511,7 +501,7 @@ extern "C" __global__ void computeElectrostatics(
} }
} }
if (atom1 < NUM_ATOMS) if (atom1 < NUM_ATOMS)
computeSelfEnergyAndTorque(data, atom1, energy, labFrameDipole, labFrameQuadrupole); computeSelfEnergyAndTorque(data, energy);
data.force *= -ENERGY_SCALE_FACTOR; data.force *= -ENERGY_SCALE_FACTOR;
data.torque *= ENERGY_SCALE_FACTOR; data.torque *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000))); atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment