Unverified Commit 07b7d2d3 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Cleanup to AmoebaMultipoleForce (#3068)

* Cleanup to CUDA AmoebaMultipoleForce

* Deleted obsolete SOR code
parent 8c5479c5
...@@ -161,11 +161,11 @@ private: ...@@ -161,11 +161,11 @@ private:
bool iterateDipolesByDIIS(int iteration); bool iterateDipolesByDIIS(int iteration);
void computeExtrapolatedDipoles(void** recipBoxVectorPointer); void computeExtrapolatedDipoles(void** recipBoxVectorPointer);
void ensureMultipolesValid(ContextImpl& context); void ensureMultipolesValid(ContextImpl& context);
template <class T, class T4, class M4> void computeSystemMultipoleMoments(ContextImpl& context, std::vector<double>& outputMultipoleMoments); template <class T, class T3, class T4, class M4> void computeSystemMultipoleMoments(ContextImpl& context, std::vector<double>& outputMultipoleMoments);
int numMultipoles, maxInducedIterations, maxExtrapolationOrder; int numMultipoles, maxInducedIterations, maxExtrapolationOrder;
int fixedFieldThreads, inducedFieldThreads, electrostaticsThreads; int fixedFieldThreads, inducedFieldThreads, electrostaticsThreads;
int gridSizeX, gridSizeY, gridSizeZ; int gridSizeX, gridSizeY, gridSizeZ;
double alpha, inducedEpsilon; double pmeAlpha, inducedEpsilon;
bool usePME, hasQuadrupoles, hasInitializedScaleFactors, hasInitializedFFT, multipolesAreValid, hasCreatedEvent; bool usePME, hasQuadrupoles, hasInitializedScaleFactors, hasInitializedFFT, multipolesAreValid, hasCreatedEvent;
AmoebaMultipoleForce::PolarizationType polarizationType; AmoebaMultipoleForce::PolarizationType polarizationType;
CudaContext& cu; CudaContext& cu;
...@@ -173,10 +173,10 @@ private: ...@@ -173,10 +173,10 @@ private:
std::vector<int3> covalentFlagValues; std::vector<int3> covalentFlagValues;
std::vector<int2> polarizationFlagValues; std::vector<int2> polarizationFlagValues;
CudaArray multipoleParticles; CudaArray multipoleParticles;
CudaArray molecularDipoles; CudaArray localDipoles;
CudaArray molecularQuadrupoles; CudaArray localQuadrupoles;
CudaArray labFrameDipoles; CudaArray labDipoles;
CudaArray labFrameQuadrupoles; CudaArray labQuadrupoles;
CudaArray sphericalDipoles; CudaArray sphericalDipoles;
CudaArray sphericalQuadrupoles; CudaArray sphericalQuadrupoles;
CudaArray fracDipoles; CudaArray fracDipoles;
......
...@@ -189,7 +189,7 @@ __device__ void computeOneInteractionT2(AtomData2& atom1, volatile AtomData2& at ...@@ -189,7 +189,7 @@ __device__ void computeOneInteractionT2(AtomData2& atom1, volatile AtomData2& at
__device__ void computeOneInteractionB1B2(AtomData2& atom1, volatile AtomData2& atom2); __device__ void computeOneInteractionB1B2(AtomData2& atom1, volatile AtomData2& atom2);
inline __device__ void loadAtomData2(AtomData2& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole, inline __device__ void loadAtomData2(AtomData2& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const real* __restrict__ bornRadius) { const real* __restrict__ labFrameQuadrupole, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar, const real* __restrict__ bornRadius) {
real4 atomPosq = posq[atom]; real4 atomPosq = posq[atom];
data.pos = trimTo3(atomPosq); data.pos = trimTo3(atomPosq);
data.q = atomPosq.w; data.q = atomPosq.w;
...@@ -202,12 +202,8 @@ inline __device__ void loadAtomData2(AtomData2& data, int atom, const real4* __r ...@@ -202,12 +202,8 @@ inline __device__ void loadAtomData2(AtomData2& data, int atom, const real4* __r
data.quadrupoleYY = labFrameQuadrupole[atom*5+3]; data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4]; data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
data.quadrupoleZZ = -(data.quadrupoleXX+data.quadrupoleYY); data.quadrupoleZZ = -(data.quadrupoleXX+data.quadrupoleYY);
data.inducedDipole.x = inducedDipole[atom*3]; data.inducedDipole = inducedDipole[atom];
data.inducedDipole.y = inducedDipole[atom*3+1]; data.inducedDipolePolar = inducedDipolePolar[atom];
data.inducedDipole.z = inducedDipole[atom*3+2];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
data.bornRadius = bornRadius[atom]; data.bornRadius = bornRadius[atom];
} }
...@@ -222,7 +218,7 @@ inline __device__ void zeroAtomData(AtomData2& data) { ...@@ -222,7 +218,7 @@ inline __device__ void zeroAtomData(AtomData2& data) {
extern "C" __global__ void computeGKForces( extern "C" __global__ void computeGKForces(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
const real4* __restrict__ posq, unsigned int startTileIndex, unsigned int numTileIndices, const real* __restrict__ labFrameDipole, const real4* __restrict__ posq, unsigned int startTileIndex, unsigned int numTileIndices, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const real* __restrict__ labFrameQuadrupole, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar,
const real* __restrict__ bornRadii, unsigned long long* __restrict__ bornForce) { const real* __restrict__ bornRadii, unsigned long long* __restrict__ bornForce) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE; unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
...@@ -565,8 +561,8 @@ __device__ void computeOneEDiffInteractionT1(AtomData4& atom1, volatile AtomData ...@@ -565,8 +561,8 @@ __device__ void computeOneEDiffInteractionT1(AtomData4& atom1, volatile AtomData
__device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real3& outputForce); __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real3& outputForce);
inline __device__ void loadAtomData4(AtomData4& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole, inline __device__ void loadAtomData4(AtomData4& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const real* __restrict__ labFrameQuadrupole, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar,
const real* __restrict__ inducedDipoleS, const real* __restrict__ inducedDipolePolarS, const float2* __restrict__ dampingAndThole) { const real3* __restrict__ inducedDipoleS, const real3* __restrict__ inducedDipolePolarS, const float2* __restrict__ dampingAndThole) {
real4 atomPosq = posq[atom]; real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z); data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
data.q = atomPosq.w; data.q = atomPosq.w;
...@@ -579,18 +575,10 @@ inline __device__ void loadAtomData4(AtomData4& data, int atom, const real4* __r ...@@ -579,18 +575,10 @@ inline __device__ void loadAtomData4(AtomData4& data, int atom, const real4* __r
data.quadrupoleYY = labFrameQuadrupole[atom*5+3]; data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4]; data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
data.quadrupoleZZ = -(data.quadrupoleXX+data.quadrupoleYY); data.quadrupoleZZ = -(data.quadrupoleXX+data.quadrupoleYY);
data.inducedDipole.x = inducedDipole[atom*3]; data.inducedDipole = inducedDipole[atom];
data.inducedDipole.y = inducedDipole[atom*3+1]; data.inducedDipolePolar = inducedDipolePolar[atom];
data.inducedDipole.z = inducedDipole[atom*3+2]; data.inducedDipoleS = inducedDipoleS[atom];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3]; data.inducedDipolePolarS = inducedDipolePolarS[atom];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
data.inducedDipoleS.x = inducedDipoleS[atom*3];
data.inducedDipoleS.y = inducedDipoleS[atom*3+1];
data.inducedDipoleS.z = inducedDipoleS[atom*3+2];
data.inducedDipolePolarS.x = inducedDipolePolarS[atom*3];
data.inducedDipolePolarS.y = inducedDipolePolarS[atom*3+1];
data.inducedDipolePolarS.z = inducedDipolePolarS[atom*3+2];
float2 temp = dampingAndThole[atom]; float2 temp = dampingAndThole[atom];
data.damp = temp.x; data.damp = temp.x;
data.thole = temp.y; data.thole = temp.y;
...@@ -615,8 +603,8 @@ extern "C" __global__ void computeEDiffForce( ...@@ -615,8 +603,8 @@ extern "C" __global__ void computeEDiffForce(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const int2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices, const int2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real3* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const real* __restrict__ inducedDipoleS, const real* __restrict__ inducedDipolePolarS, const real3* __restrict__ inducedDipolePolar, const real3* __restrict__ inducedDipoleS, const real3* __restrict__ inducedDipolePolarS,
const float2* __restrict__ dampingAndThole) { const float2* __restrict__ dampingAndThole) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE; const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
......
...@@ -10,7 +10,7 @@ typedef struct { ...@@ -10,7 +10,7 @@ typedef struct {
} AtomData; } AtomData;
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ sphericalDipole, inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ sphericalDipole,
const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) { const real* __restrict__ sphericalQuadrupole, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
real4 atomPosq = posq[atom]; real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z); data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
data.q = atomPosq.w; data.q = atomPosq.w;
...@@ -24,12 +24,8 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res ...@@ -24,12 +24,8 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
data.sphericalQuadrupole[3] = sphericalQuadrupole[atom*5+3]; data.sphericalQuadrupole[3] = sphericalQuadrupole[atom*5+3];
data.sphericalQuadrupole[4] = sphericalQuadrupole[atom*5+4]; data.sphericalQuadrupole[4] = sphericalQuadrupole[atom*5+4];
#endif #endif
data.inducedDipole.x = inducedDipole[atom*3]; data.inducedDipole = inducedDipole[atom];
data.inducedDipole.y = inducedDipole[atom*3+1]; data.inducedDipolePolar = inducedDipolePolar[atom];
data.inducedDipole.z = inducedDipole[atom*3+2];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
float2 temp = dampingAndThole[atom]; float2 temp = dampingAndThole[atom];
data.damp = temp.x; data.damp = temp.x;
data.thole = temp.y; data.thole = temp.y;
...@@ -382,8 +378,8 @@ extern "C" __global__ void computeElectrostatics( ...@@ -382,8 +378,8 @@ extern "C" __global__ void computeElectrostatics(
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const unsigned int* __restrict__ interactingAtoms, const unsigned int* __restrict__ interactingAtoms,
#endif #endif
const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalQuadrupole, const real3* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) { const real3* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE; const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
......
...@@ -18,31 +18,23 @@ typedef struct { ...@@ -18,31 +18,23 @@ typedef struct {
} AtomData; } AtomData;
#ifdef USE_GK #ifdef USE_GK
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ inducedDipole, inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real3* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole, const real* __restrict__ inducedDipoleS, const real3* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole, const real3* __restrict__ inducedDipoleS,
const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii) { const real3* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii) {
#else #else
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ inducedDipole, inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real3* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) { const real3* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
#endif #endif
real4 atomPosq = posq[atom]; real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z); data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
data.inducedDipole.x = inducedDipole[atom*3]; data.inducedDipole = inducedDipole[atom];
data.inducedDipole.y = inducedDipole[atom*3+1]; data.inducedDipolePolar = inducedDipolePolar[atom];
data.inducedDipole.z = inducedDipole[atom*3+2];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
float2 temp = dampingAndThole[atom]; float2 temp = dampingAndThole[atom];
data.damp = temp.x; data.damp = temp.x;
data.thole = temp.y; data.thole = temp.y;
#ifdef USE_GK #ifdef USE_GK
data.inducedDipoleS.x = inducedDipoleS[atom*3]; data.inducedDipoleS = inducedDipoleS[atom];
data.inducedDipoleS.y = inducedDipoleS[atom*3+1]; data.inducedDipolePolarS = inducedDipolePolarS[atom];
data.inducedDipoleS.z = inducedDipoleS[atom*3+2];
data.inducedDipolePolarS.x = inducedDipolePolarS[atom*3];
data.inducedDipolePolarS.y = inducedDipolePolarS[atom*3+1];
data.inducedDipolePolarS.z = inducedDipolePolarS[atom*3+2];
data.bornRadius = bornRadii[atom]; data.bornRadius = bornRadii[atom];
#endif #endif
} }
...@@ -358,7 +350,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -358,7 +350,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
*/ */
extern "C" __global__ void computeInducedField( extern "C" __global__ void computeInducedField(
unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq, const int2* __restrict__ exclusionTiles, unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq, const int2* __restrict__ exclusionTiles,
const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef EXTRAPOLATED_POLARIZATION #ifdef EXTRAPOLATED_POLARIZATION
unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar, unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar,
#endif #endif
...@@ -366,8 +358,8 @@ extern "C" __global__ void computeInducedField( ...@@ -366,8 +358,8 @@ extern "C" __global__ void computeInducedField(
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#elif defined USE_GK #elif defined USE_GK
unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real* __restrict__ inducedDipoleS, unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real3* __restrict__ inducedDipoleS,
const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii, const real3* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii,
#ifdef EXTRAPOLATED_POLARIZATION #ifdef EXTRAPOLATED_POLARIZATION
unsigned long long* __restrict__ fieldGradientS, unsigned long long* __restrict__ fieldGradientPolarS, unsigned long long* __restrict__ fieldGradientS, unsigned long long* __restrict__ fieldGradientPolarS,
#endif #endif
...@@ -556,53 +548,6 @@ extern "C" __global__ void computeInducedField( ...@@ -556,53 +548,6 @@ extern "C" __global__ void computeInducedField(
} }
} }
extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__ fixedField, const long long* __restrict__ fixedFieldPolar,
const long long* __restrict__ fixedFieldS, const long long* __restrict__ inducedField, const long long* __restrict__ inducedFieldPolar,
real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability, float2* __restrict__ errors) {
extern __shared__ real2 buffer[];
const float polarSOR = 0.55f;
#ifdef USE_EWALD
const real ewaldScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
#else
const real ewaldScale = 0;
#endif
const real fieldScale = 1/(real) 0x100000000;
real sumErrors = 0;
real sumPolarErrors = 0;
for (int atom = blockIdx.x*blockDim.x + threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
real scale = polarizability[atom];
for (int component = 0; component < 3; component++) {
int dipoleIndex = 3*atom+component;
int fieldIndex = atom+component*PADDED_NUM_ATOMS;
real previousDipole = inducedDipole[dipoleIndex];
real previousDipolePolar = inducedDipolePolar[dipoleIndex];
long long fixedS = (fixedFieldS == NULL ? (long long) 0 : fixedFieldS[fieldIndex]);
real newDipole = scale*((fixedField[fieldIndex]+fixedS+inducedField[fieldIndex])*fieldScale+ewaldScale*previousDipole);
real newDipolePolar = scale*((fixedFieldPolar[fieldIndex]+fixedS+inducedFieldPolar[fieldIndex])*fieldScale+ewaldScale*previousDipolePolar);
newDipole = previousDipole + polarSOR*(newDipole-previousDipole);
newDipolePolar = previousDipolePolar + polarSOR*(newDipolePolar-previousDipolePolar);
inducedDipole[dipoleIndex] = newDipole;
inducedDipolePolar[dipoleIndex] = newDipolePolar;
sumErrors += (newDipole-previousDipole)*(newDipole-previousDipole);
sumPolarErrors += (newDipolePolar-previousDipolePolar)*(newDipolePolar-previousDipolePolar);
}
}
// Sum the errors over threads and store the total for this block.
buffer[threadIdx.x] = make_real2(sumErrors, sumPolarErrors);
__syncthreads();
for (int offset = 1; offset < blockDim.x; offset *= 2) {
if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0) {
buffer[threadIdx.x].x += buffer[threadIdx.x+offset].x;
buffer[threadIdx.x].y += buffer[threadIdx.x+offset].y;
}
__syncthreads();
}
if (threadIdx.x == 0)
errors[blockIdx.x] = make_float2((float) buffer[0].x, (float) buffer[0].y);
}
extern "C" __global__ void recordInducedDipolesForDIIS(const long long* __restrict__ fixedField, const long long* __restrict__ fixedFieldPolar, extern "C" __global__ void recordInducedDipolesForDIIS(const long long* __restrict__ fixedField, const long long* __restrict__ fixedFieldPolar,
const long long* __restrict__ fixedFieldS, const long long* __restrict__ inducedField, const long long* __restrict__ inducedFieldPolar, const long long* __restrict__ fixedFieldS, const long long* __restrict__ inducedField, const long long* __restrict__ inducedFieldPolar,
const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability, float2* __restrict__ errors, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability, float2* __restrict__ errors,
......
...@@ -72,11 +72,11 @@ __device__ void computeBSplinePoint(real4* thetai, real w, real* array) { ...@@ -72,11 +72,11 @@ __device__ void computeBSplinePoint(real4* thetai, real w, real* array) {
/** /**
* Convert the fixed multipoles from Cartesian to fractional coordinates. * Convert the fixed multipoles from Cartesian to fractional coordinates.
*/ */
extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real* __restrict__ labFrameDipole, extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real* __restrict__ labDipole,
#ifdef HIPPO #ifdef HIPPO
const real* __restrict__ labQXX, const real* __restrict__ labQXY, const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ, const real* __restrict__ labQXX, const real* __restrict__ labQXY, const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ,
#else #else
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ labQuadrupole,
#endif #endif
real* __restrict__ fracDipole, real* __restrict__ fracQuadrupole, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) { real* __restrict__ fracDipole, real* __restrict__ fracQuadrupole, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// Build matrices for transforming the dipoles and quadrupoles. // Build matrices for transforming the dipoles and quadrupoles.
...@@ -113,7 +113,7 @@ extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real ...@@ -113,7 +113,7 @@ extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real
for (int j = 0; j < 3; j++) { for (int j = 0; j < 3; j++) {
real dipole = 0; real dipole = 0;
for (int k = 0; k < 3; k++) for (int k = 0; k < 3; k++)
dipole += a[j][k]*labFrameDipole[3*i+k]; dipole += a[j][k]*labDipole[3*i+k];
fracDipole[3*i+j] = dipole; fracDipole[3*i+j] = dipole;
} }
for (int j = 0; j < 6; j++) { for (int j = 0; j < 6; j++) {
...@@ -127,8 +127,8 @@ extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real ...@@ -127,8 +127,8 @@ extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real
#else #else
real quadrupole = 0; real quadrupole = 0;
for (int k = 0; k < 5; k++) for (int k = 0; k < 5; k++)
quadrupole += quadScale[k]*b[j][k]*labFrameQuadrupole[5*i+k]; quadrupole += quadScale[k]*b[j][k]*labQuadrupole[5*i+k];
quadrupole -= quadScale[5]*b[j][5]*(labFrameQuadrupole[5*i]+labFrameQuadrupole[5*i+3]); quadrupole -= quadScale[5]*b[j][5]*(labQuadrupole[5*i]+labQuadrupole[5*i+3]);
#endif #endif
fracQuadrupole[6*i+j] = quadrupole; fracQuadrupole[6*i+j] = quadrupole;
} }
...@@ -289,11 +289,11 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p ...@@ -289,11 +289,11 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
} }
} }
extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ posq, const real* __restrict__ inducedDipole, extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ posq, const real3* __restrict__ inducedDipole,
#ifdef HIPPO #ifdef HIPPO
real* __restrict__ pmeGrid, real* __restrict__ pmeGrid,
#else #else
const real* __restrict__ inducedDipolePolar, real2* __restrict__ pmeGrid, const real3* __restrict__ inducedDipolePolar, real2* __restrict__ pmeGrid,
#endif #endif
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) { real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
#if __CUDA_ARCH__ < 500 #if __CUDA_ARCH__ < 500
...@@ -325,12 +325,12 @@ extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ po ...@@ -325,12 +325,12 @@ extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ po
pos -= periodicBoxVecZ*floor(pos.z*recipBoxVecZ.z+0.5f); pos -= periodicBoxVecZ*floor(pos.z*recipBoxVecZ.z+0.5f);
pos -= periodicBoxVecY*floor(pos.y*recipBoxVecY.z+0.5f); pos -= periodicBoxVecY*floor(pos.y*recipBoxVecY.z+0.5f);
pos -= periodicBoxVecX*floor(pos.x*recipBoxVecX.z+0.5f); pos -= periodicBoxVecX*floor(pos.x*recipBoxVecX.z+0.5f);
real3 cinducedDipole = ((const real3*) inducedDipole)[m]; real3 cinducedDipole = inducedDipole[m];
real3 finducedDipole = make_real3(cinducedDipole.x*cartToFrac[0][0] + cinducedDipole.y*cartToFrac[0][1] + cinducedDipole.z*cartToFrac[0][2], real3 finducedDipole = make_real3(cinducedDipole.x*cartToFrac[0][0] + cinducedDipole.y*cartToFrac[0][1] + cinducedDipole.z*cartToFrac[0][2],
cinducedDipole.x*cartToFrac[1][0] + cinducedDipole.y*cartToFrac[1][1] + cinducedDipole.z*cartToFrac[1][2], cinducedDipole.x*cartToFrac[1][0] + cinducedDipole.y*cartToFrac[1][1] + cinducedDipole.z*cartToFrac[1][2],
cinducedDipole.x*cartToFrac[2][0] + cinducedDipole.y*cartToFrac[2][1] + cinducedDipole.z*cartToFrac[2][2]); cinducedDipole.x*cartToFrac[2][0] + cinducedDipole.y*cartToFrac[2][1] + cinducedDipole.z*cartToFrac[2][2]);
#ifndef HIPPO #ifndef HIPPO
real3 cinducedDipolePolar = ((const real3*) inducedDipolePolar)[m]; real3 cinducedDipolePolar = inducedDipolePolar[m];
real3 finducedDipolePolar = make_real3(cinducedDipolePolar.x*cartToFrac[0][0] + cinducedDipolePolar.y*cartToFrac[0][1] + cinducedDipolePolar.z*cartToFrac[0][2], real3 finducedDipolePolar = make_real3(cinducedDipolePolar.x*cartToFrac[0][0] + cinducedDipolePolar.y*cartToFrac[0][1] + cinducedDipolePolar.z*cartToFrac[0][2],
cinducedDipolePolar.x*cartToFrac[1][0] + cinducedDipolePolar.y*cartToFrac[1][1] + cinducedDipolePolar.z*cartToFrac[1][2], cinducedDipolePolar.x*cartToFrac[1][0] + cinducedDipolePolar.y*cartToFrac[1][1] + cinducedDipolePolar.z*cartToFrac[1][2],
cinducedDipolePolar.x*cartToFrac[2][0] + cinducedDipolePolar.y*cartToFrac[2][1] + cinducedDipolePolar.z*cartToFrac[2][2]); cinducedDipolePolar.x*cartToFrac[2][0] + cinducedDipolePolar.y*cartToFrac[2][1] + cinducedDipolePolar.z*cartToFrac[2][2]);
...@@ -480,7 +480,7 @@ extern "C" __global__ void computeFixedPotentialFromGrid( ...@@ -480,7 +480,7 @@ extern "C" __global__ void computeFixedPotentialFromGrid(
#ifndef HIPPO #ifndef HIPPO
long long* __restrict__ fieldPolarBuffers, long long* __restrict__ fieldPolarBuffers,
#endif #endif
const real4* __restrict__ posq, const real* __restrict__ labFrameDipole, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, const real4* __restrict__ posq, const real* __restrict__ labDipole, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) { real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
#if __CUDA_ARCH__ < 500 #if __CUDA_ARCH__ < 500
real array[PME_ORDER*PME_ORDER]; real array[PME_ORDER*PME_ORDER];
...@@ -643,9 +643,9 @@ extern "C" __global__ void computeFixedPotentialFromGrid( ...@@ -643,9 +643,9 @@ extern "C" __global__ void computeFixedPotentialFromGrid(
phi[m+NUM_ATOMS*18] = tuv012; phi[m+NUM_ATOMS*18] = tuv012;
phi[m+NUM_ATOMS*19] = tuv111; phi[m+NUM_ATOMS*19] = tuv111;
real dipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI; real dipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
long long fieldx = (long long) ((dipoleScale*labFrameDipole[m*3]-tuv100*fracToCart[0][0]-tuv010*fracToCart[0][1]-tuv001*fracToCart[0][2])*0x100000000); long long fieldx = (long long) ((dipoleScale*labDipole[m*3]-tuv100*fracToCart[0][0]-tuv010*fracToCart[0][1]-tuv001*fracToCart[0][2])*0x100000000);
long long fieldy = (long long) ((dipoleScale*labFrameDipole[m*3+1]-tuv100*fracToCart[1][0]-tuv010*fracToCart[1][1]-tuv001*fracToCart[1][2])*0x100000000); long long fieldy = (long long) ((dipoleScale*labDipole[m*3+1]-tuv100*fracToCart[1][0]-tuv010*fracToCart[1][1]-tuv001*fracToCart[1][2])*0x100000000);
long long fieldz = (long long) ((dipoleScale*labFrameDipole[m*3+2]-tuv100*fracToCart[2][0]-tuv010*fracToCart[2][1]-tuv001*fracToCart[2][2])*0x100000000); long long fieldz = (long long) ((dipoleScale*labDipole[m*3+2]-tuv100*fracToCart[2][0]-tuv010*fracToCart[2][1]-tuv001*fracToCart[2][2])*0x100000000);
fieldBuffers[m] = fieldx; fieldBuffers[m] = fieldx;
fieldBuffers[m+PADDED_NUM_ATOMS] = fieldy; fieldBuffers[m+PADDED_NUM_ATOMS] = fieldy;
fieldBuffers[m+2*PADDED_NUM_ATOMS] = fieldz; fieldBuffers[m+2*PADDED_NUM_ATOMS] = fieldz;
...@@ -942,12 +942,12 @@ extern "C" __global__ void computeInducedPotentialFromGrid( ...@@ -942,12 +942,12 @@ extern "C" __global__ void computeInducedPotentialFromGrid(
} }
extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers,
long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, const real* __restrict__ labFrameDipole, long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, const real* __restrict__ labDipole,
#ifdef HIPPO #ifdef HIPPO
const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge, const real* __restrict__ labQXX, const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge, const real* __restrict__ labQXX,
const real* __restrict__ labQXY, const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ, const real* __restrict__ labQXY, const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ,
#else #else
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ labQuadrupole,
#endif #endif
const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole, const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole,
const real* __restrict__ phi, const real* __restrict__ cphi_global, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) { const real* __restrict__ phi, const real* __restrict__ cphi_global, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
...@@ -972,9 +972,9 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict ...@@ -972,9 +972,9 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
// Compute the torque. // Compute the torque.
multipole[1] = labFrameDipole[i*3]; multipole[1] = labDipole[i*3];
multipole[2] = labFrameDipole[i*3+1]; multipole[2] = labDipole[i*3+1];
multipole[3] = labFrameDipole[i*3+2]; multipole[3] = labDipole[i*3+2];
#ifdef HIPPO #ifdef HIPPO
multipole[0] = coreCharge[i]+valenceCharge[i]; multipole[0] = coreCharge[i]+valenceCharge[i];
multipole[4] = labQXX[i]; multipole[4] = labQXX[i];
...@@ -984,11 +984,11 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict ...@@ -984,11 +984,11 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
multipole[9] = 2*labQYZ[i]; multipole[9] = 2*labQYZ[i];
#else #else
multipole[0] = posq[i].w; multipole[0] = posq[i].w;
multipole[4] = labFrameQuadrupole[i*5]; multipole[4] = labQuadrupole[i*5];
multipole[5] = labFrameQuadrupole[i*5+3]; multipole[5] = labQuadrupole[i*5+3];
multipole[7] = 2*labFrameQuadrupole[i*5+1]; multipole[7] = 2*labQuadrupole[i*5+1];
multipole[8] = 2*labFrameQuadrupole[i*5+2]; multipole[8] = 2*labQuadrupole[i*5+2];
multipole[9] = 2*labFrameQuadrupole[i*5+4]; multipole[9] = 2*labQuadrupole[i*5+4];
#endif #endif
multipole[6] = -(multipole[4]+multipole[5]); multipole[6] = -(multipole[4]+multipole[5]);
...@@ -1039,17 +1039,17 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict ...@@ -1039,17 +1039,17 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
} }
extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers,
long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, const real* __restrict__ labFrameDipole, long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, const real* __restrict__ labDipole,
#ifdef HIPPO #ifdef HIPPO
const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge, const real* __restrict__ extrapolatedDipole, const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge, const real* __restrict__ extrapolatedDipole,
const real* __restrict__ extrapolatedPhi, const real* __restrict__ labQXX, const real* __restrict__ labQXY, const real* __restrict__ extrapolatedPhi, const real* __restrict__ labQXX, const real* __restrict__ labQXY,
const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ, const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ,
#else #else
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ labQuadrupole,
#endif #endif
const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole, const real* __restrict__ inducedDipole_global, const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole, const real3* __restrict__ inducedDipole_global,
#ifndef HIPPO #ifndef HIPPO
const real* __restrict__ inducedDipolePolar_global, const real3* __restrict__ inducedDipolePolar_global,
#endif #endif
const real* __restrict__ phi, const real* __restrict__ phi,
#ifndef HIPPO #ifndef HIPPO
...@@ -1082,9 +1082,9 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_ ...@@ -1082,9 +1082,9 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
// Compute the torque. // Compute the torque.
multipole[1] = labFrameDipole[i*3]; multipole[1] = labDipole[i*3];
multipole[2] = labFrameDipole[i*3+1]; multipole[2] = labDipole[i*3+1];
multipole[3] = labFrameDipole[i*3+2]; multipole[3] = labDipole[i*3+2];
#ifdef HIPPO #ifdef HIPPO
multipole[0] = coreCharge[i]+valenceCharge[i]; multipole[0] = coreCharge[i]+valenceCharge[i];
multipole[4] = labQXX[i]; multipole[4] = labQXX[i];
...@@ -1095,11 +1095,11 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_ ...@@ -1095,11 +1095,11 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
const real scale = EPSILON_FACTOR; const real scale = EPSILON_FACTOR;
#else #else
multipole[0] = posq[i].w; multipole[0] = posq[i].w;
multipole[4] = labFrameQuadrupole[i*5]; multipole[4] = labQuadrupole[i*5];
multipole[5] = labFrameQuadrupole[i*5+3]; multipole[5] = labQuadrupole[i*5+3];
multipole[7] = 2*labFrameQuadrupole[i*5+1]; multipole[7] = 2*labQuadrupole[i*5+1];
multipole[8] = 2*labFrameQuadrupole[i*5+2]; multipole[8] = 2*labQuadrupole[i*5+2];
multipole[9] = 2*labFrameQuadrupole[i*5+4]; multipole[9] = 2*labQuadrupole[i*5+4];
const real scale = EPSILON_FACTOR/2; const real scale = EPSILON_FACTOR/2;
#endif #endif
multipole[6] = -(multipole[4]+multipole[5]); multipole[6] = -(multipole[4]+multipole[5]);
...@@ -1132,13 +1132,13 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_ ...@@ -1132,13 +1132,13 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
multipole[8] = fracQuadrupole[i*6+2]; multipole[8] = fracQuadrupole[i*6+2];
multipole[9] = fracQuadrupole[i*6+4]; multipole[9] = fracQuadrupole[i*6+4];
cinducedDipole[0] = inducedDipole_global[i*3]; cinducedDipole[0] = inducedDipole_global[i].x;
cinducedDipole[1] = inducedDipole_global[i*3+1]; cinducedDipole[1] = inducedDipole_global[i].y;
cinducedDipole[2] = inducedDipole_global[i*3+2]; cinducedDipole[2] = inducedDipole_global[i].z;
#ifndef HIPPO #ifndef HIPPO
cinducedDipolePolar[0] = inducedDipolePolar_global[i*3]; cinducedDipolePolar[0] = inducedDipolePolar_global[i].x;
cinducedDipolePolar[1] = inducedDipolePolar_global[i*3+1]; cinducedDipolePolar[1] = inducedDipolePolar_global[i].y;
cinducedDipolePolar[2] = inducedDipolePolar_global[i*3+2]; cinducedDipolePolar[2] = inducedDipolePolar_global[i].z;
#endif #endif
// Multiply the dipoles by cartToFrac, which is just the transpose of fracToCart. // Multiply the dipoles by cartToFrac, which is just the transpose of fracToCart.
...@@ -1212,7 +1212,7 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_ ...@@ -1212,7 +1212,7 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
#ifdef HIPPO #ifdef HIPPO
extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phidp, long long* __restrict__ inducedField, extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phidp, long long* __restrict__ inducedField,
const real* __restrict__ inducedDipole, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) { const real3* __restrict__ inducedDipole, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
__shared__ real fracToCart[3][3]; __shared__ real fracToCart[3][3];
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
fracToCart[0][0] = GRID_SIZE_X*recipBoxVecX.x; fracToCart[0][0] = GRID_SIZE_X*recipBoxVecX.x;
...@@ -1228,15 +1228,15 @@ extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ ph ...@@ -1228,15 +1228,15 @@ extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ ph
__syncthreads(); __syncthreads();
real selfDipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI; real selfDipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
inducedField[i] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[0][0] + phidp[i+NUM_ATOMS*2]*fracToCart[0][1] + phidp[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[3*i])); inducedField[i] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[0][0] + phidp[i+NUM_ATOMS*2]*fracToCart[0][1] + phidp[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[i].x));
inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[1][0] + phidp[i+NUM_ATOMS*2]*fracToCart[1][1] + phidp[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[3*i+1])); inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[1][0] + phidp[i+NUM_ATOMS*2]*fracToCart[1][1] + phidp[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[i].y));
inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[2][0] + phidp[i+NUM_ATOMS*2]*fracToCart[2][1] + phidp[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[3*i+2])); inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[2][0] + phidp[i+NUM_ATOMS*2]*fracToCart[2][1] + phidp[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[i].z));
} }
} }
extern "C" __global__ void calculateSelfEnergyAndTorque(long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, extern "C" __global__ void calculateSelfEnergyAndTorque(long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
const real* __restrict__ labFrameDipole, const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge, const real3* __restrict__ labDipole, const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge,
const real* __restrict__ c6, const real* __restrict__ inducedDipole, const real* __restrict__ labQXX, const real* __restrict__ labQXY, const real* __restrict__ c6, const real3* __restrict__ inducedDipole, const real* __restrict__ labQXX, const real* __restrict__ labQXY,
const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ) { const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ) {
const real torqueScale = 4*EPSILON_FACTOR*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/(3*SQRT_PI); const real torqueScale = 4*EPSILON_FACTOR*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/(3*SQRT_PI);
real cii = 0; real cii = 0;
...@@ -1245,8 +1245,8 @@ extern "C" __global__ void calculateSelfEnergyAndTorque(long long* __restrict__ ...@@ -1245,8 +1245,8 @@ extern "C" __global__ void calculateSelfEnergyAndTorque(long long* __restrict__
real c6ii = 0; real c6ii = 0;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
real charge = coreCharge[i]+valenceCharge[i]; real charge = coreCharge[i]+valenceCharge[i];
real3 dipole = make_real3(labFrameDipole[i*3], labFrameDipole[i*3+1], labFrameDipole[i*3+2]); real3 dipole = labDipole[i];
real3 induced = make_real3(inducedDipole[i*3], inducedDipole[i*3+1], inducedDipole[i*3+2]); real3 induced = inducedDipole[i];
real qXX = labQXX[i]; real qXX = labQXX[i];
real qXY = labQXY[i]; real qXY = labQXY[i];
real qXZ = labQXZ[i]; real qXZ = labQXZ[i];
...@@ -1270,7 +1270,7 @@ extern "C" __global__ void calculateSelfEnergyAndTorque(long long* __restrict__ ...@@ -1270,7 +1270,7 @@ extern "C" __global__ void calculateSelfEnergyAndTorque(long long* __restrict__
} }
#else #else
extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phid, real* const __restrict__ phip, long long* __restrict__ inducedField, extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phid, real* const __restrict__ phip, long long* __restrict__ inducedField,
long long* __restrict__ inducedFieldPolar, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, long long* __restrict__ inducedFieldPolar, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar,
#ifdef EXTRAPOLATED_POLARIZATION #ifdef EXTRAPOLATED_POLARIZATION
unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar, unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar,
#endif #endif
...@@ -1290,12 +1290,12 @@ extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ ph ...@@ -1290,12 +1290,12 @@ extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ ph
__syncthreads(); __syncthreads();
real selfDipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI; real selfDipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
inducedField[i] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[0][0] + phid[i+NUM_ATOMS*2]*fracToCart[0][1] + phid[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[3*i])); inducedField[i] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[0][0] + phid[i+NUM_ATOMS*2]*fracToCart[0][1] + phid[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[i].x));
inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[1][0] + phid[i+NUM_ATOMS*2]*fracToCart[1][1] + phid[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[3*i+1])); inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[1][0] + phid[i+NUM_ATOMS*2]*fracToCart[1][1] + phid[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[i].y));
inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[2][0] + phid[i+NUM_ATOMS*2]*fracToCart[2][1] + phid[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[3*i+2])); inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[2][0] + phid[i+NUM_ATOMS*2]*fracToCart[2][1] + phid[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[i].z));
inducedFieldPolar[i] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[0][0] + phip[i+NUM_ATOMS*2]*fracToCart[0][1] + phip[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipolePolar[3*i])); inducedFieldPolar[i] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[0][0] + phip[i+NUM_ATOMS*2]*fracToCart[0][1] + phip[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipolePolar[i].x));
inducedFieldPolar[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[1][0] + phip[i+NUM_ATOMS*2]*fracToCart[1][1] + phip[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipolePolar[3*i+1])); inducedFieldPolar[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[1][0] + phip[i+NUM_ATOMS*2]*fracToCart[1][1] + phip[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipolePolar[i].y));
inducedFieldPolar[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[2][0] + phip[i+NUM_ATOMS*2]*fracToCart[2][1] + phip[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipolePolar[3*i+2])); inducedFieldPolar[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[2][0] + phip[i+NUM_ATOMS*2]*fracToCart[2][1] + phip[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipolePolar[i].z));
#ifdef EXTRAPOLATED_POLARIZATION #ifdef EXTRAPOLATED_POLARIZATION
// Compute and store the field gradients for later use. // Compute and store the field gradients for later use.
......
extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4* __restrict__ multipoleParticles, float* __restrict__ molecularDipoles, extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4* __restrict__ multipoleParticles, float* __restrict__ molecularDipoles,
float* __restrict__ molecularQuadrupoles, real* __restrict__ labFrameDipoles, real* __restrict__ labFrameQuadrupoles, float* __restrict__ molecularQuadrupoles, real3* __restrict__ labFrameDipoles, real* __restrict__ labFrameQuadrupoles,
real* __restrict__ sphericalDipoles, real* __restrict__ sphericalQuadrupoles) { real* __restrict__ sphericalDipoles, real* __restrict__ sphericalQuadrupoles) {
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) { for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
// Load the spherical multipoles. // Load the spherical multipoles.
...@@ -176,9 +176,9 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4 ...@@ -176,9 +176,9 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
molDipole[2] = molecularDipoles[offset+2]; molDipole[2] = molecularDipoles[offset+2];
if (reverse) if (reverse)
molDipole[1] *= -1; molDipole[1] *= -1;
labFrameDipoles[offset] = molDipole[0]*vectorX.x + molDipole[1]*vectorY.x + molDipole[2]*vectorZ.x; labFrameDipoles[atom] = make_real3(molDipole[0]*vectorX.x + molDipole[1]*vectorY.x + molDipole[2]*vectorZ.x,
labFrameDipoles[offset+1] = molDipole[0]*vectorX.y + molDipole[1]*vectorY.y + molDipole[2]*vectorZ.y; molDipole[0]*vectorX.y + molDipole[1]*vectorY.y + molDipole[2]*vectorZ.y,
labFrameDipoles[offset+2] = molDipole[0]*vectorX.z + molDipole[1]*vectorY.z + molDipole[2]*vectorZ.z; molDipole[0]*vectorX.z + molDipole[1]*vectorY.z + molDipole[2]*vectorZ.z);
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -275,9 +275,7 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4 ...@@ -275,9 +275,7 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
sphericalQuadrupoles[offset+4] = rotatedQuadrupole[4]; sphericalQuadrupoles[offset+4] = rotatedQuadrupole[4];
} }
else { else {
labFrameDipoles[3*atom] = molecularDipoles[3*atom]; labFrameDipoles[atom] = make_real3(molecularDipoles[3*atom], molecularDipoles[3*atom+1], molecularDipoles[3*atom+2]);
labFrameDipoles[3*atom+1] = molecularDipoles[3*atom+1];
labFrameDipoles[3*atom+2] = molecularDipoles[3*atom+2];
labFrameQuadrupoles[5*atom] = molecularQuadrupoles[5*atom]; labFrameQuadrupoles[5*atom] = molecularQuadrupoles[5*atom];
labFrameQuadrupoles[5*atom+1] = molecularQuadrupoles[5*atom+1]; labFrameQuadrupoles[5*atom+1] = molecularQuadrupoles[5*atom+1];
labFrameQuadrupoles[5*atom+2] = molecularQuadrupoles[5*atom+2]; labFrameQuadrupoles[5*atom+2] = molecularQuadrupoles[5*atom+2];
...@@ -289,24 +287,24 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4 ...@@ -289,24 +287,24 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fieldBuffers, const long long* __restrict__ fieldPolarBuffers, extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fieldBuffers, const long long* __restrict__ fieldPolarBuffers,
#ifdef USE_GK #ifdef USE_GK
const long long* __restrict__ gkFieldBuffers, real* __restrict__ inducedDipoleS, real* __restrict__ inducedDipolePolarS, const long long* __restrict__ gkFieldBuffers, real3* __restrict__ inducedDipoleS, real3* __restrict__ inducedDipolePolarS,
#endif #endif
real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability) { real3* __restrict__ inducedDipole, real3* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability) {
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) { for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
real scale = polarizability[atom]/(real) 0x100000000; real scale = polarizability[atom]/(real) 0x100000000;
inducedDipole[3*atom] = scale*fieldBuffers[atom]; inducedDipole[atom].x = scale*fieldBuffers[atom];
inducedDipole[3*atom+1] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS]; inducedDipole[atom].y = scale*fieldBuffers[atom+PADDED_NUM_ATOMS];
inducedDipole[3*atom+2] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS*2]; inducedDipole[atom].z = scale*fieldBuffers[atom+PADDED_NUM_ATOMS*2];
inducedDipolePolar[3*atom] = scale*fieldPolarBuffers[atom]; inducedDipolePolar[atom].x = scale*fieldPolarBuffers[atom];
inducedDipolePolar[3*atom+1] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS]; inducedDipolePolar[atom].y = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS];
inducedDipolePolar[3*atom+2] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2]; inducedDipolePolar[atom].z = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2];
#ifdef USE_GK #ifdef USE_GK
inducedDipoleS[3*atom] = scale*(fieldBuffers[atom]+gkFieldBuffers[atom]); inducedDipoleS[atom].x = scale*(fieldBuffers[atom]+gkFieldBuffers[atom]);
inducedDipoleS[3*atom+1] = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]); inducedDipoleS[atom].y = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
inducedDipoleS[3*atom+2] = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]); inducedDipoleS[atom].z = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
inducedDipolePolarS[3*atom] = scale*(fieldPolarBuffers[atom]+gkFieldBuffers[atom]); inducedDipolePolarS[atom].x = scale*(fieldPolarBuffers[atom]+gkFieldBuffers[atom]);
inducedDipolePolarS[3*atom+1] = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]); inducedDipolePolarS[atom].y = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
inducedDipolePolarS[3*atom+2] = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]); inducedDipolePolarS[atom].z = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
#endif #endif
} }
} }
...@@ -532,7 +530,7 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for ...@@ -532,7 +530,7 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
* Compute the electrostatic potential at each of a set of points. * Compute the electrostatic potential at each of a set of points.
*/ */
extern "C" __global__ void computePotentialAtPoints(const real4* __restrict__ posq, const real* __restrict__ labFrameDipole, extern "C" __global__ void computePotentialAtPoints(const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real4* __restrict__ points, const real* __restrict__ labFrameQuadrupole, const real3* __restrict__ inducedDipole, const real4* __restrict__ points,
real* __restrict__ potential, int numPoints, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) { real* __restrict__ potential, int numPoints, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
extern __shared__ real4 localPosq[]; extern __shared__ real4 localPosq[];
real3* localDipole = (real3*) &localPosq[blockDim.x]; real3* localDipole = (real3*) &localPosq[blockDim.x];
...@@ -550,7 +548,7 @@ extern "C" __global__ void computePotentialAtPoints(const real4* __restrict__ po ...@@ -550,7 +548,7 @@ extern "C" __global__ void computePotentialAtPoints(const real4* __restrict__ po
if (atom < NUM_ATOMS) { if (atom < NUM_ATOMS) {
localPosq[threadIdx.x] = posq[atom]; localPosq[threadIdx.x] = posq[atom];
localDipole[threadIdx.x] = make_real3(labFrameDipole[3*atom], labFrameDipole[3*atom+1], labFrameDipole[3*atom+2]); localDipole[threadIdx.x] = make_real3(labFrameDipole[3*atom], labFrameDipole[3*atom+1], labFrameDipole[3*atom+2]);
localInducedDipole[threadIdx.x] = make_real3(inducedDipole[3*atom], inducedDipole[3*atom+1], inducedDipole[3*atom+2]); localInducedDipole[threadIdx.x] = inducedDipole[atom];
localQuadrupole[5*threadIdx.x] = labFrameQuadrupole[5*atom]; localQuadrupole[5*threadIdx.x] = labFrameQuadrupole[5*atom];
localQuadrupole[5*threadIdx.x+1] = labFrameQuadrupole[5*atom+1]; localQuadrupole[5*threadIdx.x+1] = labFrameQuadrupole[5*atom+1];
localQuadrupole[5*threadIdx.x+2] = labFrameQuadrupole[5*atom+2]; localQuadrupole[5*threadIdx.x+2] = labFrameQuadrupole[5*atom+2];
......
...@@ -10,7 +10,7 @@ typedef struct { ...@@ -10,7 +10,7 @@ typedef struct {
} AtomData; } AtomData;
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ sphericalDipole, inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ sphericalDipole,
const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const real* __restrict__ sphericalQuadrupole, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar,
const float2* __restrict__ dampingAndThole) { const float2* __restrict__ dampingAndThole) {
real4 atomPosq = posq[atom]; real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z); data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
...@@ -25,12 +25,8 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res ...@@ -25,12 +25,8 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
data.sphericalQuadrupole[3] = sphericalQuadrupole[atom*5+3]; data.sphericalQuadrupole[3] = sphericalQuadrupole[atom*5+3];
data.sphericalQuadrupole[4] = sphericalQuadrupole[atom*5+4]; data.sphericalQuadrupole[4] = sphericalQuadrupole[atom*5+4];
#endif #endif
data.inducedDipole.x = inducedDipole[atom*3]; data.inducedDipole = inducedDipole[atom];
data.inducedDipole.y = inducedDipole[atom*3+1]; data.inducedDipolePolar = inducedDipolePolar[atom];
data.inducedDipole.z = inducedDipole[atom*3+2];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
float2 temp = dampingAndThole[atom]; float2 temp = dampingAndThole[atom];
data.damp = temp.x; data.damp = temp.x;
data.thole = temp.y; data.thole = temp.y;
...@@ -447,8 +443,8 @@ extern "C" __global__ void computeElectrostatics( ...@@ -447,8 +443,8 @@ extern "C" __global__ void computeElectrostatics(
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const unsigned int* __restrict__ interactingAtoms, const unsigned int* __restrict__ interactingAtoms,
#endif #endif
const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalQuadrupole, const real3* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) { const real3* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE; const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
......
...@@ -44,7 +44,6 @@ AmoebaReferenceMultipoleForce::AmoebaReferenceMultipoleForce() : ...@@ -44,7 +44,6 @@ AmoebaReferenceMultipoleForce::AmoebaReferenceMultipoleForce() :
_maximumMutualInducedDipoleIterations(100), _maximumMutualInducedDipoleIterations(100),
_mutualInducedDipoleEpsilon(1.0e+50), _mutualInducedDipoleEpsilon(1.0e+50),
_mutualInducedDipoleTargetEpsilon(1.0e-04), _mutualInducedDipoleTargetEpsilon(1.0e-04),
_polarSOR(0.55),
_debye(48.033324) _debye(48.033324)
{ {
initialize(); initialize();
...@@ -60,7 +59,6 @@ AmoebaReferenceMultipoleForce::AmoebaReferenceMultipoleForce(NonbondedMethod non ...@@ -60,7 +59,6 @@ AmoebaReferenceMultipoleForce::AmoebaReferenceMultipoleForce(NonbondedMethod non
_maximumMutualInducedDipoleIterations(100), _maximumMutualInducedDipoleIterations(100),
_mutualInducedDipoleEpsilon(1.0e+50), _mutualInducedDipoleEpsilon(1.0e+50),
_mutualInducedDipoleTargetEpsilon(1.0e-04), _mutualInducedDipoleTargetEpsilon(1.0e-04),
_polarSOR(0.55),
_debye(48.033324) _debye(48.033324)
{ {
initialize(); initialize();
...@@ -890,78 +888,6 @@ void AmoebaReferenceMultipoleForce::calculateInducedDipoleFields(const vector<Mu ...@@ -890,78 +888,6 @@ void AmoebaReferenceMultipoleForce::calculateInducedDipoleFields(const vector<Mu
calculateInducedDipolePairIxns(particleData[ii], particleData[jj], updateInducedDipoleFields); calculateInducedDipolePairIxns(particleData[ii], particleData[jj], updateInducedDipoleFields);
} }
double AmoebaReferenceMultipoleForce::updateInducedDipoleFields(const vector<MultipoleParticleData>& particleData,
vector<UpdateInducedDipoleFieldStruct>& updateInducedDipoleFields)
{
// Calculate the fields coming from induced dipoles.
calculateInducedDipoleFields(particleData, updateInducedDipoleFields);
// Update the induced dipoles and calculate the convergence factor, maxEpsilon
double maxEpsilon = 0.0;
for (auto& field : updateInducedDipoleFields) {
double epsilon = updateInducedDipole(particleData,
*field.fixedMultipoleField,
field.inducedDipoleField,
*field.inducedDipoles);
maxEpsilon = epsilon > maxEpsilon ? epsilon : maxEpsilon;
}
return maxEpsilon;
}
double AmoebaReferenceMultipoleForce::updateInducedDipole(const vector<MultipoleParticleData>& particleData,
const vector<Vec3>& fixedMultipoleField,
const vector<Vec3>& inducedDipoleField,
vector<Vec3>& inducedDipole)
{
double epsilon = 0.0;
for (unsigned int ii = 0; ii < particleData.size(); ii++) {
Vec3 oldValue = inducedDipole[ii];
Vec3 newValue = fixedMultipoleField[ii] + inducedDipoleField[ii]*particleData[ii].polarity;
Vec3 delta = newValue - oldValue;
inducedDipole[ii] = oldValue + delta*_polarSOR;
epsilon += delta.dot(delta);
}
return epsilon;
}
void AmoebaReferenceMultipoleForce::convergeInduceDipolesBySOR(const vector<MultipoleParticleData>& particleData,
vector<UpdateInducedDipoleFieldStruct>& updateInducedDipoleField)
{
bool done = false;
setMutualInducedDipoleConverged(false);
int iteration = 0;
double currentEpsilon = 1.0e+50;
// loop until (1) induced dipoles are converged or
// (2) iterations == max iterations or
// (3) convergence factor (spsilon) increases
while (!done) {
double epsilon = updateInducedDipoleFields(particleData, updateInducedDipoleField);
epsilon = _polarSOR*_debye*sqrt(epsilon/_numParticles);
if (epsilon < getMutualInducedDipoleTargetEpsilon()) {
setMutualInducedDipoleConverged(true);
done = true;
} else if (currentEpsilon < epsilon || iteration >= getMaximumMutualInducedDipoleIterations()) {
done = true;
}
currentEpsilon = epsilon;
iteration++;
}
setMutualInducedDipoleEpsilon(currentEpsilon);
setMutualInducedDipoleIterations(iteration);
}
void AmoebaReferenceMultipoleForce::convergeInduceDipolesByExtrapolation(const vector<MultipoleParticleData>& particleData, vector<UpdateInducedDipoleFieldStruct>& updateInducedDipoleField) { void AmoebaReferenceMultipoleForce::convergeInduceDipolesByExtrapolation(const vector<MultipoleParticleData>& particleData, vector<UpdateInducedDipoleFieldStruct>& updateInducedDipoleField) {
// Start by storing the direct dipoles as PT0 // Start by storing the direct dipoles as PT0
......
...@@ -750,7 +750,6 @@ protected: ...@@ -750,7 +750,6 @@ protected:
std::vector<double> _extPartCoefficients; std::vector<double> _extPartCoefficients;
double _mutualInducedDipoleEpsilon; double _mutualInducedDipoleEpsilon;
double _mutualInducedDipoleTargetEpsilon; double _mutualInducedDipoleTargetEpsilon;
double _polarSOR;
double _debye; double _debye;
/** /**
...@@ -1027,14 +1026,6 @@ protected: ...@@ -1027,14 +1026,6 @@ protected:
* @param particleData vector of particle positions and parameters (charge, labFrame dipoles, quadrupoles, ...) * @param particleData vector of particle positions and parameters (charge, labFrame dipoles, quadrupoles, ...)
* @param updateInducedDipoleFields vector of UpdateInducedDipoleFieldStruct containing input induced dipoles and output fields * @param updateInducedDipoleFields vector of UpdateInducedDipoleFieldStruct containing input induced dipoles and output fields
*/ */
void convergeInduceDipolesBySOR(const std::vector<MultipoleParticleData>& particleData,
std::vector<UpdateInducedDipoleFieldStruct>& calculateInducedDipoleField);
/**
* Converge induced dipoles.
*
* @param particleData vector of particle positions and parameters (charge, labFrame dipoles, quadrupoles, ...)
* @param updateInducedDipoleFields vector of UpdateInducedDipoleFieldStruct containing input induced dipoles and output fields
*/
void convergeInduceDipolesByDIIS(const std::vector<MultipoleParticleData>& particleData, void convergeInduceDipolesByDIIS(const std::vector<MultipoleParticleData>& particleData,
std::vector<UpdateInducedDipoleFieldStruct>& calculateInducedDipoleField); std::vector<UpdateInducedDipoleFieldStruct>& calculateInducedDipoleField);
...@@ -1046,28 +1037,6 @@ protected: ...@@ -1046,28 +1037,6 @@ protected:
*/ */
void computeDIISCoefficients(const std::vector<std::vector<Vec3> >& prevErrors, std::vector<double>& coefficients) const; void computeDIISCoefficients(const std::vector<std::vector<Vec3> >& prevErrors, std::vector<double>& coefficients) const;
/**
* Update fields due to induced dipoles for each particle.
*
* @param particleData vector of particle positions and parameters (charge, labFrame dipoles, quadrupoles, ...)
* @param updateInducedDipoleFields vector of UpdateInducedDipoleFieldStruct containing input induced dipoles and output fields
*/
double updateInducedDipoleFields(const std::vector<MultipoleParticleData>& particleData,
std::vector<UpdateInducedDipoleFieldStruct>& calculateInducedDipoleField);
/**
* Update induced dipole for a particle given updated induced dipole field at the site.
*
* @param particleI positions and parameters (charge, labFrame dipoles, quadrupoles, ...) for particle I
* @param fixedMultipoleField fields due fixed multipoles at each site
* @param inducedDipoleField fields due induced dipoles at each site
* @param inducedDipoles output vector of updated induced dipoles
*/
double updateInducedDipole(const std::vector<MultipoleParticleData>& particleI,
const std::vector<Vec3>& fixedMultipoleField,
const std::vector<Vec3>& inducedDipoleField,
std::vector<Vec3>& inducedDipoles);
/** /**
* Calculate induced dipoles. * Calculate induced dipoles.
* *
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment