Commit e5b7f0b3 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to convert AmoebaMultipoleForce

parent 155fe172
...@@ -866,7 +866,7 @@ private: ...@@ -866,7 +866,7 @@ private:
CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) :
CalcAmoebaMultipoleForceKernel(name, platform), cu(cu), system(system), hasInitializedScaleFactors(false), CalcAmoebaMultipoleForceKernel(name, platform), cu(cu), system(system), hasInitializedScaleFactors(false),
multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL), multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL),
labFrameDipoles(NULL), labFrameQuadrupoles(NULL), field(NULL), fieldPolar(NULL), dampingAndThole(NULL), labFrameDipoles(NULL), labFrameQuadrupoles(NULL), field(NULL), fieldPolar(NULL), torque(NULL), dampingAndThole(NULL),
inducedDipole(NULL), inducedDipolePolar(NULL), currentEpsilon(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL), inducedDipole(NULL), inducedDipolePolar(NULL), currentEpsilon(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL),
pmeGrid(NULL) { pmeGrid(NULL) {
} }
...@@ -887,6 +887,8 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() { ...@@ -887,6 +887,8 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete field; delete field;
if (fieldPolar != NULL) if (fieldPolar != NULL)
delete fieldPolar; delete fieldPolar;
if (torque != NULL)
delete torque;
if (dampingAndThole != NULL) if (dampingAndThole != NULL)
delete dampingAndThole; delete dampingAndThole;
if (inducedDipole != NULL) if (inducedDipole != NULL)
...@@ -966,10 +968,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -966,10 +968,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
labFrameQuadrupoles = new CudaArray(cu, 5*paddedNumAtoms, elementSize, "labFrameQuadrupoles"); labFrameQuadrupoles = new CudaArray(cu, 5*paddedNumAtoms, elementSize, "labFrameQuadrupoles");
field = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "field"); field = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "field");
fieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "fieldPolar"); fieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "fieldPolar");
torque = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "torque");
inducedDipole = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipole"); inducedDipole = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipole");
inducedDipolePolar = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolar"); inducedDipolePolar = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolar");
cu.addAutoclearBuffer(*field); cu.addAutoclearBuffer(*field);
cu.addAutoclearBuffer(*fieldPolar); cu.addAutoclearBuffer(*fieldPolar);
cu.addAutoclearBuffer(*torque);
// Record which atoms should be flagged as exclusions based on covalent groups, and determine // Record which atoms should be flagged as exclusions based on covalent groups, and determine
// the values for the covalent group flags. // the values for the covalent group flags.
...@@ -1025,6 +1029,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1025,6 +1029,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoles, defines); CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoles, defines);
computeMomentsKernel = cu.getKernel(module, "computeLabFrameMoments"); computeMomentsKernel = cu.getKernel(module, "computeLabFrameMoments");
recordInducedDipolesKernel = cu.getKernel(module, "recordInducedDipoles"); recordInducedDipolesKernel = cu.getKernel(module, "recordInducedDipoles");
mapTorqueKernel = cu.getKernel(module, "mapTorqueToForce");
module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleFixedField, defines); module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleFixedField, defines);
computeFixedFieldKernel = cu.getKernel(module, "computeFixedField"); computeFixedFieldKernel = cu.getKernel(module, "computeFixedField");
stringstream electrostaticsSource; stringstream electrostaticsSource;
...@@ -1437,12 +1442,15 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1437,12 +1442,15 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Compute electrostatic force. // Compute electrostatic force.
void* electrostaticsArgs[] = {&cu.getForce().getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), void* electrostaticsArgs[] = {&cu.getForce().getDevicePointer(), &torque->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&cu.getPosq().getDevicePointer(), &nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(), &cu.getPosq().getDevicePointer(), &nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(),
&covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &startTileIndex, &numTileIndices, &covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &startTileIndex, &numTileIndices,
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()}; &inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
void* mapTorqueArgs[] = {&cu.getForce().getDevicePointer(), &torque->getDevicePointer(),
&cu.getPosq().getDevicePointer(), &multipoleParticles->getDevicePointer()};
cu.executeKernel(mapTorqueKernel, mapTorqueArgs, cu.getNumAtoms());
} }
return 0.0; return 0.0;
} }
......
...@@ -388,6 +388,7 @@ private: ...@@ -388,6 +388,7 @@ private:
CudaArray* labFrameQuadrupoles; CudaArray* labFrameQuadrupoles;
CudaArray* field; CudaArray* field;
CudaArray* fieldPolar; CudaArray* fieldPolar;
CudaArray* torque;
CudaArray* dampingAndThole; CudaArray* dampingAndThole;
CudaArray* inducedDipole; CudaArray* inducedDipole;
CudaArray* inducedDipolePolar; CudaArray* inducedDipolePolar;
...@@ -412,7 +413,7 @@ private: ...@@ -412,7 +413,7 @@ private:
CudaArray* pmeAtomGridIndex; CudaArray* pmeAtomGridIndex;
CudaSort* sort; CudaSort* sort;
cufftHandle fft; cufftHandle fft;
CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, electrostaticsKernel; CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, electrostaticsKernel, mapTorqueKernel;
}; };
/** /**
......
...@@ -92,12 +92,12 @@ __device__ void computeOneInteractionT3(AtomData& atom1, volatile AtomData& atom ...@@ -92,12 +92,12 @@ __device__ void computeOneInteractionT3(AtomData& atom1, volatile AtomData& atom
real dsc7 = rr7*scale7*dScale; real dsc7 = rr7*scale7*dScale;
real psc7 = rr7*scale7*pScale; real psc7 = rr7*scale7*pScale;
real atom2quadrupoleZZ = 1-atom2.quadrupoleXX-atom2.quadrupoleYY; real atom2quadrupoleZZ = -(atom2.quadrupoleXX+atom2.quadrupoleYY);
real qJr_0 = atom2.quadrupoleXX*xr + atom2.quadrupoleXY*yr + atom2.quadrupoleXZ*zr; real qJr_0 = atom2.quadrupoleXX*xr + atom2.quadrupoleXY*yr + atom2.quadrupoleXZ*zr;
real qJr_1 = atom2.quadrupoleXY*xr + atom2.quadrupoleYY*yr + atom2.quadrupoleYZ*zr; real qJr_1 = atom2.quadrupoleXY*xr + atom2.quadrupoleYY*yr + atom2.quadrupoleYZ*zr;
real qJr_2 = atom2.quadrupoleXZ*xr + atom2.quadrupoleYZ*yr + atom2quadrupoleZZ*zr; real qJr_2 = atom2.quadrupoleXZ*xr + atom2.quadrupoleYZ*yr + atom2quadrupoleZZ*zr;
real atom1quadrupoleZZ = 1-atom1.quadrupoleXX-atom1.quadrupoleYY; real atom1quadrupoleZZ = -(atom1.quadrupoleXX+atom1.quadrupoleYY);
real qIr_0 = atom1.quadrupoleXX*xr + atom1.quadrupoleXY*yr + atom1.quadrupoleXZ*zr; real qIr_0 = atom1.quadrupoleXX*xr + atom1.quadrupoleXY*yr + atom1.quadrupoleXZ*zr;
real qIr_1 = atom1.quadrupoleXY*xr + atom1.quadrupoleYY*yr + atom1.quadrupoleYZ*zr; real qIr_1 = atom1.quadrupoleXY*xr + atom1.quadrupoleYY*yr + atom1.quadrupoleYZ*zr;
real qIr_2 = atom1.quadrupoleXZ*xr + atom1.quadrupoleYZ*yr + atom1quadrupoleZZ*zr; real qIr_2 = atom1.quadrupoleXZ*xr + atom1.quadrupoleYZ*yr + atom1quadrupoleZZ*zr;
......
...@@ -56,7 +56,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr ...@@ -56,7 +56,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
* Compute electrostatic interactions. * Compute electrostatic interactions.
*/ */
extern "C" __global__ void computeElectrostatics( extern "C" __global__ void computeElectrostatics(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer,
const real4* __restrict__ posq, const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices, const real4* __restrict__ posq, const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices,
const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, unsigned int startTileIndex, unsigned int numTileIndices, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -183,9 +183,9 @@ extern "C" __global__ void computeElectrostatics( ...@@ -183,9 +183,9 @@ extern "C" __global__ void computeElectrostatics(
polarizationGroup >>= 1; polarizationGroup >>= 1;
} }
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
} }
else { else {
// This is an off-diagonal tile. // This is an off-diagonal tile.
...@@ -338,18 +338,18 @@ extern "C" __global__ void computeElectrostatics( ...@@ -338,18 +338,18 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR; localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
} }
} }
} }
} }
pos++; pos++;
} while (pos < end); } while (pos < end);
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy*ENERGY_SCALE_FACTOR;
} }
...@@ -19,7 +19,7 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res ...@@ -19,7 +19,7 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
data.quadrupoleXZ = labFrameQuadrupole[atom*5+2]; data.quadrupoleXZ = labFrameQuadrupole[atom*5+2];
data.quadrupoleYY = labFrameQuadrupole[atom*5+3]; data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4]; data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
data.quadrupoleZZ = 1-data.quadrupoleXX-data.quadrupoleYY; data.quadrupoleZZ = -(data.quadrupoleXX+data.quadrupoleYY);
float2 temp = dampingAndThole[atom]; float2 temp = dampingAndThole[atom];
data.damp = temp.x; data.damp = temp.x;
data.thole = temp.y; data.thole = temp.y;
......
...@@ -170,27 +170,27 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4 ...@@ -170,27 +170,27 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
real mPoleXZ = molecularQuadrupoles[offset+2]; real mPoleXZ = molecularQuadrupoles[offset+2];
real mPoleYY = molecularQuadrupoles[offset+3]; real mPoleYY = molecularQuadrupoles[offset+3];
real mPoleYZ = molecularQuadrupoles[offset+4]; real mPoleYZ = molecularQuadrupoles[offset+4];
real mPoleZZ = 1-mPoleXX-mPoleYY; real mPoleZZ = -(mPoleXX+mPoleYY);
if (reverse) { if (reverse) {
mPoleXY *= -1; mPoleXY *= -1;
mPoleYZ *= -1; mPoleYZ *= -1;
} }
labFrameQuadrupoles[offset] = vectorX.x*(vectorX.x*mPoleXX + vectorY.x*mPoleXY + vectorZ.x*mPoleXZ); labFrameQuadrupoles[offset] = vectorX.x*(vectorX.x*mPoleXX + vectorY.x*mPoleXY + vectorZ.x*mPoleXZ)
+ vectorY.x*(vectorX.x*mPoleXY + vectorY.x*mPoleYY + vectorZ.x*mPoleYZ); + vectorY.x*(vectorX.x*mPoleXY + vectorY.x*mPoleYY + vectorZ.x*mPoleYZ)
+ vectorZ.x*(vectorX.x*mPoleXZ + vectorY.x*mPoleYZ + vectorZ.x*mPoleZZ); + vectorZ.x*(vectorX.x*mPoleXZ + vectorY.x*mPoleYZ + vectorZ.x*mPoleZZ);
labFrameQuadrupoles[offset+1] = vectorX.x*(vectorX.y*mPoleXX + vectorY.y*mPoleXY + vectorZ.y*mPoleXZ); labFrameQuadrupoles[offset+1] = vectorX.x*(vectorX.y*mPoleXX + vectorY.y*mPoleXY + vectorZ.y*mPoleXZ)
+ vectorY.x*(vectorX.y*mPoleXY + vectorY.y*mPoleYY + vectorZ.y*mPoleYZ); + vectorY.x*(vectorX.y*mPoleXY + vectorY.y*mPoleYY + vectorZ.y*mPoleYZ)
+ vectorZ.x*(vectorX.y*mPoleXZ + vectorY.y*mPoleYZ + vectorZ.y*mPoleZZ); + vectorZ.x*(vectorX.y*mPoleXZ + vectorY.y*mPoleYZ + vectorZ.y*mPoleZZ);
labFrameQuadrupoles[offset+2] = vectorX.x*(vectorX.z*mPoleXX + vectorY.z*mPoleXY + vectorZ.z*mPoleXZ); labFrameQuadrupoles[offset+2] = vectorX.x*(vectorX.z*mPoleXX + vectorY.z*mPoleXY + vectorZ.z*mPoleXZ)
+ vectorY.x*(vectorX.z*mPoleXY + vectorY.z*mPoleYY + vectorZ.z*mPoleYZ); + vectorY.x*(vectorX.z*mPoleXY + vectorY.z*mPoleYY + vectorZ.z*mPoleYZ)
+ vectorZ.x*(vectorX.z*mPoleXZ + vectorY.z*mPoleYZ + vectorZ.z*mPoleZZ); + vectorZ.x*(vectorX.z*mPoleXZ + vectorY.z*mPoleYZ + vectorZ.z*mPoleZZ);
labFrameQuadrupoles[offset+3] = vectorX.y*(vectorX.y*mPoleXX + vectorY.y*mPoleXY + vectorZ.y*mPoleXZ); labFrameQuadrupoles[offset+3] = vectorX.y*(vectorX.y*mPoleXX + vectorY.y*mPoleXY + vectorZ.y*mPoleXZ)
+ vectorY.y*(vectorX.y*mPoleXY + vectorY.y*mPoleYY + vectorZ.y*mPoleYZ); + vectorY.y*(vectorX.y*mPoleXY + vectorY.y*mPoleYY + vectorZ.y*mPoleYZ)
+ vectorZ.y*(vectorX.y*mPoleXZ + vectorY.y*mPoleYZ + vectorZ.y*mPoleZZ); + vectorZ.y*(vectorX.y*mPoleXZ + vectorY.y*mPoleYZ + vectorZ.y*mPoleZZ);
labFrameQuadrupoles[offset+4] = vectorX.y*(vectorX.z*mPoleXX + vectorY.z*mPoleXY + vectorZ.z*mPoleXZ); labFrameQuadrupoles[offset+4] = vectorX.y*(vectorX.z*mPoleXX + vectorY.z*mPoleXY + vectorZ.z*mPoleXZ)
+ vectorY.y*(vectorX.z*mPoleXY + vectorY.z*mPoleYY + vectorZ.z*mPoleYZ); + vectorY.y*(vectorX.z*mPoleXY + vectorY.z*mPoleYY + vectorZ.z*mPoleYZ)
+ vectorZ.y*(vectorX.z*mPoleXZ + vectorY.z*mPoleYZ + vectorZ.z*mPoleZZ); + vectorZ.y*(vectorX.z*mPoleXZ + vectorY.z*mPoleYZ + vectorZ.z*mPoleZZ);
} }
} }
...@@ -208,3 +208,220 @@ extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fi ...@@ -208,3 +208,220 @@ extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fi
inducedDipolePolar[3*atom+2] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2]; inducedDipolePolar[3*atom+2] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2];
} }
} }
/**
* Convert a real4 to a real3 by removing its last element.
*/
inline __device__ real3 trim(real4 v) {
return make_real3(v.x, v.y, v.z);
}
/**
* Normalize a vector and return what its magnitude was.
*/
inline __device__ real normVector(real3& v) {
real n = SQRT(dot(v, v));
v *= (n > 0 ? RECIP(n) : 0);
return n;
}
extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ forceBuffers, const long long* __restrict__ torqueBuffers,
const real4* __restrict__ posq, const int4* __restrict__ multipoleParticles) {
const int U = 0;
const int V = 1;
const int W = 2;
const int R = 3;
const int S = 4;
const int UV = 5;
const int UW = 6;
const int VW = 7;
const int UR = 8;
const int US = 9;
const int VS = 10;
const int WS = 11;
const int LastVectorIndex = 12;
const int X = 0;
const int Y = 1;
const int Z = 2;
const int I = 3;
const real torqueScale = RECIP((double) 0xFFFFFFFF);
real3 forces[4];
real norms[LastVectorIndex];
real3 vector[LastVectorIndex];
real angles[LastVectorIndex][2];
for (int atom = blockIdx.x*blockDim.x + threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
int4 particles = multipoleParticles[atom];
int axisAtom = particles.z;
int axisType = particles.w;
// NoAxisType
if (axisType < 5 && particles.z >= 0) {
real3 atomPos = trim(posq[atom]);
vector[U] = atomPos - trim(posq[axisAtom]);
norms[U] = normVector(vector[U]);
if (axisType != 4 && particles.x >= 0)
vector[V] = atomPos - trim(posq[particles.x]);
else
vector[V] = make_real3(0.1f);
norms[V] = normVector(vector[V]);
// W = UxV
if (axisType < 2 || axisType > 3)
vector[W] = cross(vector[U], vector[V]);
else
vector[W] = atomPos - trim(posq[particles.y]);
norms[W] = normVector(vector[W]);
vector[UV] = cross(vector[V], vector[U]);
vector[UW] = cross(vector[W], vector[U]);
vector[VW] = cross(vector[W], vector[V]);
norms[UV] = normVector(vector[UV]);
norms[UW] = normVector(vector[UW]);
norms[VW] = normVector(vector[VW]);
angles[UV][0] = dot(vector[U], vector[V]);
angles[UV][1] = SQRT(1 - angles[UV][0]*angles[UV][0]);
angles[UW][0] = dot(vector[U], vector[W]);
angles[UW][1] = SQRT(1 - angles[UW][0]*angles[UW][0]);
angles[VW][0] = dot(vector[V], vector[W]);
angles[VW][1] = SQRT(1 - angles[VW][0]*angles[VW][0]);
real dphi[3];
real3 torque = make_real3(torqueScale*torqueBuffers[atom], torqueScale*torqueBuffers[atom+PADDED_NUM_ATOMS], torqueScale*torqueBuffers[atom+PADDED_NUM_ATOMS*2]);
dphi[U] = -dot(vector[U], torque);
dphi[V] = -dot(vector[V], torque);
dphi[W] = -dot(vector[W], torque);
// z-then-x and bisector
if (axisType == 0 || axisType == 1) {
real factor1 = dphi[V]/(norms[U]*angles[UV][1]);
real factor2 = dphi[W]/(norms[U]);
real factor3 = -dphi[U]/(norms[V]*angles[UV][1]);
real factor4 = 0;
if (axisType == 1) {
factor2 *= 0.5f;
factor4 = 0.5f*dphi[W]/(norms[V]);
}
forces[Z] = vector[UV]*factor1 + factor2*vector[UW];
forces[X] = vector[UV]*factor3 + factor4*vector[VW];
forces[I] = -(forces[X]+forces[Z]);
forces[Y] = make_real3(0);
}
else if (axisType == 2) {
// z-bisect
vector[R] = vector[V] + vector[W];
vector[S] = cross(vector[U], vector[R]);
norms[R] = normVector(vector[R]);
norms[S] = normVector(vector[S]);
vector[UR] = cross(vector[R], vector[U]);
vector[US] = cross(vector[S], vector[U]);
vector[VS] = cross(vector[S], vector[V]);
vector[WS] = cross(vector[S], vector[W]);
norms[UR] = normVector(vector[UR]);
norms[US] = normVector(vector[US]);
norms[VS] = normVector(vector[VS]);
norms[WS] = normVector(vector[WS]);
angles[UR][0] = dot(vector[U], vector[R]);
angles[UR][1] = SQRT(1 - angles[UR][0]*angles[UR][0]);
angles[US][0] = dot(vector[U], vector[S]);
angles[US][1] = SQRT(1 - angles[US][0]*angles[US][0]);
angles[VS][0] = dot(vector[V], vector[S]);
angles[VS][1] = SQRT(1 - angles[VS][0]*angles[VS][0]);
angles[WS][0] = dot(vector[W], vector[S]);
angles[WS][1] = SQRT(1 - angles[WS][0]*angles[WS][0]);
real3 t1 = vector[V] - vector[S]*angles[VS][0];
real3 t2 = vector[W] - vector[S]*angles[WS][0];
normVector(t1);
normVector(t2);
real ut1cos = dot(vector[U], t1);
real ut1sin = SQRT(1 - ut1cos*ut1cos);
real ut2cos = dot(vector[U], t2);
real ut2sin = SQRT(1 - ut2cos*ut2cos);
real dphiR = -dot(vector[R], torque);
real dphiS = -dot(vector[S], torque);
real factor1 = dphiR/(norms[U]*angles[UR][1]);
real factor2 = dphiS/(norms[U]);
real factor3 = dphi[U]/(norms[V]*(ut1sin+ut2sin));
real factor4 = dphi[U]/(norms[W]*(ut1sin+ut2sin));
forces[Z] = vector[UR]*factor1 + factor2*vector[US];
forces[X] = (angles[VS][1]*vector[S] - angles[VS][0]*t1)*factor3;
forces[Y] = (angles[WS][1]*vector[S] - angles[WS][0]*t2)*factor4;
forces[I] = -(forces[X] + forces[Y] + forces[Z]);
}
else if (axisType == 3) {
// 3-fold
forces[Z] = (vector[UW]*dphi[W]/(norms[U]*angles[UW][1]) +
vector[UV]*dphi[V]/(norms[U]*angles[UV][1]) -
vector[UW]*dphi[U]/(norms[U]*angles[UW][1]) -
vector[UV]*dphi[U]/(norms[U]*angles[UV][1]))/3;
forces[X] = (vector[VW]*dphi[W]/(norms[V]*angles[VW][1]) -
vector[UV]*dphi[U]/(norms[V]*angles[UV][1]) -
vector[VW]*dphi[V]/(norms[V]*angles[VW][1]) +
vector[UV]*dphi[V]/(norms[V]*angles[UV][1]))/3;
forces[Y] = (-vector[UW]*dphi[U]/(norms[W]*angles[UW][1]) -
vector[VW]*dphi[V]/(norms[W]*angles[VW][1]) +
vector[UW]*dphi[W]/(norms[W]*angles[UW][1]) +
vector[VW]*dphi[W]/(norms[W]*angles[VW][1]))/3;
forces[I] = -(forces[X] + forces[Y] + forces[Z]);
}
else if (axisType == 4) {
// z-only
forces[Z] = vector[UV]*dphi[V]/(norms[U]*angles[UV][1]);
forces[X] = make_float3(0);
forces[Y] = make_float3(0);
forces[I] = -forces[Z];
}
else {
forces[Z] = make_float3(0);
forces[X] = make_float3(0);
forces[Y] = make_float3(0);
forces[I] = make_float3(0);
}
// Store results
atomicAdd(&forceBuffers[particles.z], static_cast<unsigned long long>((long long) (forces[Z].x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[particles.z+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Z].y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[particles.z+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Z].z*0xFFFFFFFF)));
if (axisType != 4) {
atomicAdd(&forceBuffers[particles.x], static_cast<unsigned long long>((long long) (forces[X].x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[particles.x+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[X].y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[particles.x+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[X].z*0xFFFFFFFF)));
}
if ((axisType == 2 || axisType == 3) && particles.y > -1) {
atomicAdd(&forceBuffers[particles.y], static_cast<unsigned long long>((long long) (forces[Y].x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[particles.y+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Y].y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[particles.y+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Y].z*0xFFFFFFFF)));
}
atomicAdd(&forceBuffers[atom], static_cast<unsigned long long>((long long) (forces[I].x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[I].y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[I].z*0xFFFFFFFF)));
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment