Commit c83f2a12 authored by Peter Eastman's avatar Peter Eastman
Browse files

Began CUDA implementation of triclinic boxes for AmoebaMultipoleForce

parent 82e118fb
......@@ -796,8 +796,8 @@ private:
CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) :
CalcAmoebaMultipoleForceKernel(name, platform), cu(cu), system(system), hasInitializedScaleFactors(false), hasInitializedFFT(false), multipolesAreValid(false),
multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL), labFrameDipoles(NULL), labFrameQuadrupoles(NULL),
field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL), inducedDipole(NULL),
multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL), labFrameDipoles(NULL), labFrameQuadrupoles(NULL), fracDipoles(NULL),
fracQuadrupoles(NULL), field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL), inducedDipole(NULL),
diisCoefficients(NULL), inducedDipolePolar(NULL), inducedDipoleErrors(NULL), prevDipoles(NULL), prevDipolesPolar(NULL), prevDipolesGk(NULL),
prevDipolesGkPolar(NULL), prevErrors(NULL), diisMatrix(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL),
pmeGrid(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeIgrid(NULL), pmePhi(NULL),
......@@ -816,6 +816,10 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete labFrameDipoles;
if (labFrameQuadrupoles != NULL)
delete labFrameQuadrupoles;
if (fracDipoles != NULL)
delete fracDipoles;
if (fracQuadrupoles != NULL)
delete fracQuadrupoles;
if (field != NULL)
delete field;
if (fieldPolar != NULL)
......@@ -968,7 +972,9 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
labFrameDipoles = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "labFrameDipoles");
labFrameQuadrupoles = new CudaArray(cu, 9*paddedNumAtoms, elementSize, "labFrameQuadrupoles");
labFrameQuadrupoles = new CudaArray(cu, 5*paddedNumAtoms, elementSize, "labFrameQuadrupoles");
fracDipoles = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "fracDipoles");
fracQuadrupoles = new CudaArray(cu, 6*paddedNumAtoms, elementSize, "fracQuadrupoles");
field = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "field");
fieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "fieldPolar");
torque = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "torque");
......@@ -1185,6 +1191,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
pmeDefines["DIRECT_POLARIZATION"] = "";
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipolePme, pmeDefines);
pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
pmeTransformMultipolesKernel = cu.getKernel(module, "transformMultipolesToFractionalCoordinates");
pmeSpreadFixedMultipolesKernel = cu.getKernel(module, "gridSpreadFixedMultipoles");
pmeSpreadInducedDipolesKernel = cu.getKernel(module, "gridSpreadInducedDipoles");
pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
......@@ -1483,15 +1490,44 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
gkKernel->finishComputation(*torque, *labFrameDipoles, *labFrameQuadrupoles, *inducedDipole, *inducedDipolePolar, *dampingAndThole, *covalentFlags, *polarizationGroupFlags);
}
else {
// Compute reciprocal box vectors.
Vec3 boxVectors[3];
cu.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
double determinant = boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2];
double scale = 1.0/determinant;
double3 recipBoxVectors[3];
recipBoxVectors[0] = make_double3(boxVectors[1][1]*boxVectors[2][2]*scale, 0, 0);
recipBoxVectors[1] = make_double3(-boxVectors[1][0]*boxVectors[2][2]*scale, boxVectors[0][0]*boxVectors[2][2]*scale, 0);
recipBoxVectors[2] = make_double3((boxVectors[1][0]*boxVectors[2][1]-boxVectors[1][1]*boxVectors[2][0])*scale, -boxVectors[0][0]*boxVectors[2][1]*scale, boxVectors[0][0]*boxVectors[1][1]*scale);
float3 recipBoxVectorsFloat[3];
void* recipBoxVectorPointer[3];
if (cu.getUseDoublePrecision()) {
recipBoxVectorPointer[0] = &recipBoxVectors[0];
recipBoxVectorPointer[1] = &recipBoxVectors[1];
recipBoxVectorPointer[2] = &recipBoxVectors[2];
}
else {
recipBoxVectorsFloat[0] = make_float3((float) recipBoxVectors[0].x, 0, 0);
recipBoxVectorsFloat[1] = make_float3((float) recipBoxVectors[1].x, (float) recipBoxVectors[1].y, 0);
recipBoxVectorsFloat[2] = make_float3((float) recipBoxVectors[2].x, (float) recipBoxVectors[2].y, (float) recipBoxVectors[2].z);
recipBoxVectorPointer[0] = &recipBoxVectorsFloat[0];
recipBoxVectorPointer[1] = &recipBoxVectorsFloat[1];
recipBoxVectorPointer[2] = &recipBoxVectorsFloat[2];
}
// Reciprocal space calculation.
unsigned int maxTiles = nb.getInteractingTiles().getSize();
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(),
cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms(), cu.ThreadBlockSize, cu.ThreadBlockSize*PmeOrder*PmeOrder*elementSize);
sort->sort(*pmeAtomGridIndex);
void* pmeSpreadFixedMultipolesArgs[] = {&cu.getPosq().getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(),
&pmeGrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer()};
void* pmeTransformMultipolesArgs[] = {&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(),
&fracDipoles->getDevicePointer(), &fracQuadrupoles->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeTransformMultipolesKernel, pmeTransformMultipolesArgs, cu.getNumAtoms());
void* pmeSpreadFixedMultipolesArgs[] = {&cu.getPosq().getDevicePointer(), &fracDipoles->getDevicePointer(), &fracQuadrupoles->getDevicePointer(),
&pmeGrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeSpreadFixedMultipolesKernel, pmeSpreadFixedMultipolesArgs, cu.getNumAtoms());
void* finishSpreadArgs[] = {&pmeGrid->getDevicePointer()};
if (cu.getUseDoublePrecision())
......@@ -1501,7 +1537,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
void* pmeConvolutionArgs[] = {&pmeGrid->getDevicePointer(), &pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(),
&pmeBsplineModuliZ->getDevicePointer(), cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer()};
&pmeBsplineModuliZ->getDevicePointer(), cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeConvolutionKernel, pmeConvolutionArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
......@@ -1509,11 +1545,12 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
void* pmeFixedPotentialArgs[] = {&pmeGrid->getDevicePointer(), &pmePhi->getDevicePointer(), &field->getDevicePointer(),
&fieldPolar ->getDevicePointer(), &cu.getPosq().getDevicePointer(), &labFrameDipoles->getDevicePointer(),
cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer(), &pmeAtomGridIndex->getDevicePointer()};
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeFixedPotentialKernel, pmeFixedPotentialArgs, cu.getNumAtoms());
void* pmeFixedForceArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &torque->getDevicePointer(),
&cu.getEnergyBuffer().getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(),
&pmePhi->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
&fracDipoles->getDevicePointer(), &fracQuadrupoles->getDevicePointer(), &pmePhi->getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeFixedForceKernel, pmeFixedForceArgs, cu.getNumAtoms());
// Direct space calculation.
......@@ -1521,7 +1558,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void* computeFixedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &nb.getExclusionTiles().getDevicePointer(), &startTileIndex, &numTileIndices,
&nb.getInteractingTiles().getDevicePointer(), &nb.getInteractionCount().getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), &maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
&maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(),
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*fixedFieldThreads, fixedFieldThreads);
void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(),
......@@ -1532,7 +1570,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
cu.clearBuffer(*pmeGrid);
void* pmeSpreadInducedDipolesArgs[] = {&cu.getPosq().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
&pmeGrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer()};
&pmeGrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeSpreadInducedDipolesKernel, pmeSpreadInducedDipolesArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision())
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, pmeGrid->getSize());
......@@ -1546,7 +1584,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
void* pmeInducedPotentialArgs[] = {&pmeGrid->getDevicePointer(), &pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&pmePhidp->getDevicePointer(), &cu.getPosq().getDevicePointer(), cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer(),
&pmePhidp->getDevicePointer(), &cu.getPosq().getDevicePointer(), cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2],
&pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeInducedPotentialKernel, pmeInducedPotentialArgs, cu.getNumAtoms());
......@@ -1559,7 +1597,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&nb.getInteractingTiles().getDevicePointer(), &nb.getInteractionCount().getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), &maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
&maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(),
&dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
cu.clearBuffer(*pmeGrid);
......@@ -1577,7 +1616,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
cu.executeKernel(pmeInducedPotentialKernel, pmeInducedPotentialArgs, cu.getNumAtoms());
void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
bool converged = iterateDipolesByDIIS(i);
if (converged)
......@@ -1590,14 +1629,16 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&cu.getPosq().getDevicePointer(), &covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &startTileIndex, &numTileIndices,
&nb.getInteractingTiles().getDevicePointer(), &nb.getInteractionCount().getDevicePointer(),
cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer(), &maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(),
cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
&maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(),
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*electrostaticsThreads, electrostaticsThreads);
void* pmeInducedForceArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &torque->getDevicePointer(),
&cu.getEnergyBuffer().getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(),
&fracDipoles->getDevicePointer(), &fracQuadrupoles->getDevicePointer(),
&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &pmePhi->getDevicePointer(), &pmePhid->getDevicePointer(),
&pmePhip->getDevicePointer(), &pmePhidp->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
&pmePhip->getDevicePointer(), &pmePhidp->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeInducedForceKernel, pmeInducedForceArgs, cu.getNumAtoms());
}
......@@ -1785,7 +1826,8 @@ void CudaCalcAmoebaMultipoleForceKernel::getElectrostaticPotential(ContextImpl&
void* computePotentialArgs[] = {&cu.getPosq().getDevicePointer(), &labFrameDipoles->getDevicePointer(),
&labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(), &points.getDevicePointer(),
&potential.getDevicePointer(), &numPoints, cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer()};
&potential.getDevicePointer(), &numPoints, cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer(),
cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer()};
int blockSize = 128;
cu.executeKernel(computePotentialKernel, computePotentialArgs, numPoints, blockSize, blockSize*15*elementSize);
outputElectrostaticPotential.resize(numPoints);
......
......@@ -391,6 +391,8 @@ private:
CudaArray* molecularQuadrupoles;
CudaArray* labFrameDipoles;
CudaArray* labFrameQuadrupoles;
CudaArray* fracDipoles;
CudaArray* fracQuadrupoles;
CudaArray* field;
CudaArray* fieldPolar;
CudaArray* inducedField;
......@@ -428,6 +430,7 @@ private:
CUfunction pmeGridIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeFinishSpreadChargeKernel, pmeConvolutionKernel;
CUfunction pmeFixedPotentialKernel, pmeInducedPotentialKernel, pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel;
CUfunction recordDIISDipolesKernel, buildMatrixKernel;
CUfunction pmeTransformMultipolesKernel;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel* gkKernel;
static const int PmeOrder = 5;
static const int MaxPrevDIISDipoles = 20;
......
......@@ -65,7 +65,9 @@ extern "C" __global__ void computeElectrostatics(
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const unsigned int* __restrict__ interactingAtoms,
#endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
......
......@@ -440,7 +440,9 @@ extern "C" __global__ void computeFixedField(
const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const ushort2* __restrict__ exclusionTiles,
unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const unsigned int* __restrict__ interactingAtoms,
#elif defined USE_GK
const real* __restrict__ bornRadii, unsigned long long* __restrict__ gkFieldBuffers,
#endif
......@@ -494,9 +496,7 @@ extern "C" __global__ void computeFixedField(
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real3 delta = trimTo3(localData[tbx+j].posq-data.posq);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
......@@ -532,9 +532,7 @@ extern "C" __global__ void computeFixedField(
for (j = 0; j < TILE_SIZE; j++) {
real3 delta = trimTo3(localData[tbx+tj].posq-data.posq);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
......@@ -675,9 +673,7 @@ extern "C" __global__ void computeFixedField(
for (j = 0; j < TILE_SIZE; j++) {
real3 delta = trimTo3(localData[tbx+tj].posq-data.posq);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
......
......@@ -207,7 +207,8 @@ extern "C" __global__ void computeInducedField(
unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq, const ushort2* __restrict__ exclusionTiles,
const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#elif defined USE_GK
unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real* __restrict__ inducedDipoleS,
const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii,
......@@ -251,9 +252,7 @@ extern "C" __global__ void computeInducedField(
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real3 delta = localData[tbx+j].pos-data.pos;
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
int atom2 = y*TILE_SIZE+j;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS)
......@@ -273,9 +272,7 @@ extern "C" __global__ void computeInducedField(
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real3 delta = localData[tbx+tj].pos-data.pos;
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
int atom2 = y*TILE_SIZE+j;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS)
......@@ -404,9 +401,7 @@ extern "C" __global__ void computeInducedField(
for (j = 0; j < TILE_SIZE; j++) {
real3 delta = localData[tbx+tj].pos-data.pos;
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS)
......
......@@ -73,30 +73,30 @@ __device__ void computeBSplinePoint(real4* thetai, real w, real* array) {
* Compute the index of the grid point each atom is associated with.
*/
extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int2* __restrict__ pmeAtomGridIndex,
real4 periodicBoxSize, real4 invPeriodicBoxSize) {
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
real4 pos = posq[i];
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
// First axis.
real w = pos.x*invPeriodicBoxSize.x;
real w = pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x;
real fr = GRID_SIZE_X*(w-(int)(w+0.5f)+0.5f);
int ifr = (int) fr;
int igrid1 = ifr-PME_ORDER+1;
// Second axis.
w = pos.y*invPeriodicBoxSize.y;
w = pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y;
fr = GRID_SIZE_Y*(w-(int)(w+0.5f)+0.5f);
ifr = (int) fr;
int igrid2 = ifr-PME_ORDER+1;
// Third axis.
w = pos.z*invPeriodicBoxSize.z;
w = pos.z*recipBoxVecZ.z;
fr = GRID_SIZE_Z*(w-(int)(w+0.5f)+0.5f);
ifr = (int) fr;
int igrid3 = ifr-PME_ORDER+1;
......@@ -109,13 +109,61 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
pmeAtomGridIndex[i] = make_int2(i, igrid1*GRID_SIZE_Y*GRID_SIZE_Z+igrid2*GRID_SIZE_Z+igrid3);
}
}
/**
* Convert the fixed multipoles from Cartesian to fractional coordinates.
*/
extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole,
real* __restrict__ fracDipole, real* __restrict__ fracQuadrupole, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// Build matrices for transforming the dipoles and quadrupoles.
__shared__ real a[3][3];
if (threadIdx.x == 0) {
a[0][0] = GRID_SIZE_X*recipBoxVecX.x;
a[0][1] = GRID_SIZE_X*recipBoxVecY.x;
a[0][2] = GRID_SIZE_X*recipBoxVecZ.x;
a[1][0] = GRID_SIZE_Y*recipBoxVecX.y;
a[1][1] = GRID_SIZE_Y*recipBoxVecY.y;
a[1][2] = GRID_SIZE_Y*recipBoxVecZ.y;
a[2][0] = GRID_SIZE_Z*recipBoxVecX.z;
a[2][1] = GRID_SIZE_Z*recipBoxVecY.z;
a[2][2] = GRID_SIZE_Z*recipBoxVecZ.z;
}
__syncthreads();
int index1[] = {0, 0, 0, 1, 1, 2};
int index2[] = {0, 1, 2, 1, 2, 2};
__shared__ real b[6][6];
if (threadIdx.x < 36) {
int i = threadIdx.x/6;
int j = threadIdx.x-6*i;
b[i][j] = a[index1[i]][index1[j]]*a[index2[i]][index2[j]];
if (index1[i] != index2[i])
b[i][j] += a[index1[i]][index2[j]]*a[index2[i]][index1[j]];
}
__syncthreads();
extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, real2* __restrict__ pmeGrid, int2* __restrict__ pmeAtomGridIndex,
real4 periodicBoxSize, real4 invPeriodicBoxSize) {
const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x;
const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y;
const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z;
// Transform the multipoles.
real quadScale[] = {1, 2, 2, 1, 2, 1};
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
for (int j = 0; j < 3; j++) {
real dipole = 0;
for (int k = 0; k < 3; k++)
dipole += a[j][k]*labFrameDipole[3*i+k];
fracDipole[3*i+j] = dipole;
}
for (int j = 0; j < 6; j++) {
real quadrupole = 0;
for (int k = 0; k < 5; k++)
quadrupole += quadScale[k]*b[j][k]*labFrameQuadrupole[5*i+k];
quadrupole -= quadScale[5]*b[j][5]*(labFrameQuadrupole[5*i]+labFrameQuadrupole[5*i+3]);
fracQuadrupole[6*i+j] = quadrupole;
}
}
}
extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ posq, const real* __restrict__ fracDipole,
const real* __restrict__ fracQuadrupole, real2* __restrict__ pmeGrid, int2* __restrict__ pmeAtomGridIndex,
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
real array[PME_ORDER*PME_ORDER];
real4 theta1[PME_ORDER];
real4 theta2[PME_ORDER];
......@@ -127,26 +175,26 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
int m = pmeAtomGridIndex[i].x;
real4 pos = posq[m];
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
real w = pos.x*invPeriodicBoxSize.x;
real w = pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x;
real fr = GRID_SIZE_X*(w-(int)(w+0.5f)+0.5f);
int ifr = (int) fr;
w = fr - ifr;
int igrid1 = ifr-PME_ORDER+1;
computeBSplinePoint(theta1, w, array);
w = pos.y*invPeriodicBoxSize.y;
w = pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y;
fr = GRID_SIZE_Y*(w-(int)(w+0.5f)+0.5f);
ifr = (int) fr;
w = fr - ifr;
int igrid2 = ifr-PME_ORDER+1;
computeBSplinePoint(theta2, w, array);
w = pos.z*invPeriodicBoxSize.z;
w = pos.z*recipBoxVecZ.z;
fr = GRID_SIZE_Z*(w-(int)(w+0.5f)+0.5f);
ifr = (int) fr;
w = fr - ifr;
......@@ -177,15 +225,15 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
real4 v = theta3[iz];
real atomCharge = pos.w;
real atomDipoleX = xscale*labFrameDipole[m*3];
real atomDipoleY = yscale*labFrameDipole[m*3+1];
real atomDipoleZ = zscale*labFrameDipole[m*3+2];
real atomQuadrupoleXX = xscale*xscale*labFrameQuadrupole[m*5];
real atomQuadrupoleXY = 2*xscale*yscale*labFrameQuadrupole[m*5+1];
real atomQuadrupoleXZ = 2*xscale*zscale*labFrameQuadrupole[m*5+2];
real atomQuadrupoleYY = yscale*yscale*labFrameQuadrupole[m*5+3];
real atomQuadrupoleYZ = 2*yscale*zscale*labFrameQuadrupole[m*5+4];
real atomQuadrupoleZZ = -zscale*zscale*(labFrameQuadrupole[m*5]+labFrameQuadrupole[m*5+3]);
real atomDipoleX = fracDipole[m*3];
real atomDipoleY = fracDipole[m*3+1];
real atomDipoleZ = fracDipole[m*3+2];
real atomQuadrupoleXX = fracQuadrupole[m*6];
real atomQuadrupoleXY = fracQuadrupole[m*6+1];
real atomQuadrupoleXZ = fracQuadrupole[m*6+2];
real atomQuadrupoleYY = fracQuadrupole[m*6+3];
real atomQuadrupoleYZ = fracQuadrupole[m*6+4];
real atomQuadrupoleZZ = fracQuadrupole[m*6+5];
real term0 = atomCharge*u.x*v.x + atomDipoleY*u.y*v.x + atomDipoleZ*u.x*v.y + atomQuadrupoleYY*u.z*v.x + atomQuadrupoleZZ*u.x*v.z + atomQuadrupoleYZ*u.y*v.y;
real term1 = atomDipoleX*u.x*v.x + atomQuadrupoleXY*u.y*v.x + atomQuadrupoleXZ*u.x*v.y;
real term2 = atomQuadrupoleXX * u.x * v.x;
......@@ -204,10 +252,10 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ posq, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, real2* __restrict__ pmeGrid, int2* __restrict__ pmeAtomGridIndex,
real4 periodicBoxSize, real4 invPeriodicBoxSize) {
const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x;
const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y;
const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z;
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
const real xscale = GRID_SIZE_X*recipBoxVecX.x;
const real yscale = GRID_SIZE_Y*recipBoxVecY.y;
const real zscale = GRID_SIZE_Z*recipBoxVecZ.z;
real array[PME_ORDER*PME_ORDER];
real4 theta1[PME_ORDER];
real4 theta2[PME_ORDER];
......@@ -219,26 +267,26 @@ extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ po
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
int m = pmeAtomGridIndex[i].x;
real4 pos = posq[m];
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
real w = pos.x*invPeriodicBoxSize.x;
real w = pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x;
real fr = GRID_SIZE_X*(w-(int)(w+0.5f)+0.5f);
int ifr = (int) fr;
w = fr - ifr;
int igrid1 = ifr-PME_ORDER+1;
computeBSplinePoint(theta1, w, array);
w = pos.y*invPeriodicBoxSize.y;
w = pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y;
fr = GRID_SIZE_Y*(w-(int)(w+0.5f)+0.5f);
ifr = (int) fr;
w = fr - ifr;
int igrid2 = ifr-PME_ORDER+1;
computeBSplinePoint(theta2, w, array);
w = pos.z*invPeriodicBoxSize.z;
w = pos.z*recipBoxVecZ.z;
fr = GRID_SIZE_Z*(w-(int)(w+0.5f)+0.5f);
ifr = (int) fr;
w = fr - ifr;
......@@ -306,7 +354,8 @@ extern "C" __global__ void finishSpreadCharge(long long* __restrict__ pmeGrid) {
}
extern "C" __global__ void reciprocalConvolution(real2* __restrict__ pmeGrid, const real* __restrict__ pmeBsplineModuliX,
const real* __restrict__ pmeBsplineModuliY, const real* __restrict__ pmeBsplineModuliZ, real4 periodicBoxSize, real4 invPeriodicBoxSize) {
const real* __restrict__ pmeBsplineModuliY, const real* __restrict__ pmeBsplineModuliZ, real4 periodicBoxSize,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
real expFactor = M_PI*M_PI/(EWALD_ALPHA*EWALD_ALPHA);
real scaleFactor = RECIP(M_PI*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
......@@ -322,9 +371,9 @@ extern "C" __global__ void reciprocalConvolution(real2* __restrict__ pmeGrid, co
int mx = (kx < (GRID_SIZE_X+1)/2) ? kx : (kx-GRID_SIZE_X);
int my = (ky < (GRID_SIZE_Y+1)/2) ? ky : (ky-GRID_SIZE_Y);
int mz = (kz < (GRID_SIZE_Z+1)/2) ? kz : (kz-GRID_SIZE_Z);
real mhx = mx*invPeriodicBoxSize.x;
real mhy = my*invPeriodicBoxSize.y;
real mhz = mz*invPeriodicBoxSize.z;
real mhx = mx*recipBoxVecX.x;
real mhy = mx*recipBoxVecY.x+my*recipBoxVecY.y;
real mhz = mx*recipBoxVecZ.x+my*recipBoxVecZ.y+mz*recipBoxVecZ.z;
real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky];
real bz = pmeBsplineModuliZ[kz];
......@@ -338,7 +387,7 @@ extern "C" __global__ void reciprocalConvolution(real2* __restrict__ pmeGrid, co
extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict__ pmeGrid, real* __restrict__ phi,
long long* __restrict__ fieldBuffers, long long* __restrict__ fieldPolarBuffers, const real4* __restrict__ posq,
const real* __restrict__ labFrameDipole, real4 periodicBoxSize, real4 invPeriodicBoxSize, int2* __restrict__ pmeAtomGridIndex) {
const real* __restrict__ labFrameDipole, real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, int2* __restrict__ pmeAtomGridIndex) {
real array[PME_ORDER*PME_ORDER];
real4 theta1[PME_ORDER];
real4 theta2[PME_ORDER];
......@@ -350,26 +399,26 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
int m = pmeAtomGridIndex[i].x;
real4 pos = posq[m];
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
real w = pos.x*invPeriodicBoxSize.x;
real w = pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x;
real fr = GRID_SIZE_X*(w-(int)(w+0.5f)+0.5f);
int ifr = (int) fr;
w = fr - ifr;
int igrid1 = ifr-PME_ORDER+1;
computeBSplinePoint(theta1, w, array);
w = pos.y*invPeriodicBoxSize.y;
w = pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y;
fr = GRID_SIZE_Y*(w-(int)(w+0.5f)+0.5f);
ifr = (int) fr;
w = fr - ifr;
int igrid2 = ifr-PME_ORDER+1;
computeBSplinePoint(theta2, w, array);
w = pos.z*invPeriodicBoxSize.z;
w = pos.z*recipBoxVecZ.z;
fr = GRID_SIZE_Z*(w-(int)(w+0.5f)+0.5f);
ifr = (int) fr;
w = fr - ifr;
......@@ -481,13 +530,13 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
phi[20*m+18] = tuv012;
phi[20*m+19] = tuv111;
real dipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
long long fieldx = (long long) ((dipoleScale*labFrameDipole[m*3]-GRID_SIZE_X*invPeriodicBoxSize.x*tuv100)*0x100000000);
long long fieldx = (long long) ((dipoleScale*labFrameDipole[m*3]-GRID_SIZE_X*recipBoxVecX.x*tuv100)*0x100000000);
fieldBuffers[m] = fieldx;
fieldPolarBuffers[m] = fieldx;
long long fieldy = (long long) ((dipoleScale*labFrameDipole[m*3+1]-GRID_SIZE_Y*invPeriodicBoxSize.y*tuv010)*0x100000000);
long long fieldy = (long long) ((dipoleScale*labFrameDipole[m*3+1]-GRID_SIZE_Y*recipBoxVecY.y*tuv010)*0x100000000);
fieldBuffers[m+PADDED_NUM_ATOMS] = fieldy;
fieldPolarBuffers[m+PADDED_NUM_ATOMS] = fieldy;
long long fieldz = (long long) ((dipoleScale*labFrameDipole[m*3+2]-GRID_SIZE_Z*invPeriodicBoxSize.z*tuv001)*0x100000000);
long long fieldz = (long long) ((dipoleScale*labFrameDipole[m*3+2]-GRID_SIZE_Z*recipBoxVecZ.z*tuv001)*0x100000000);
fieldBuffers[m+2*PADDED_NUM_ATOMS] = fieldz;
fieldPolarBuffers[m+2*PADDED_NUM_ATOMS] = fieldz;
}
......@@ -495,7 +544,7 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restrict__ pmeGrid, real* __restrict__ phid,
real* __restrict__ phip, real* __restrict__ phidp, const real4* __restrict__ posq,
real4 periodicBoxSize, real4 invPeriodicBoxSize, int2* __restrict__ pmeAtomGridIndex) {
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, int2* __restrict__ pmeAtomGridIndex) {
real array[PME_ORDER*PME_ORDER];
real4 theta1[PME_ORDER];
real4 theta2[PME_ORDER];
......@@ -507,26 +556,26 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
int m = pmeAtomGridIndex[i].x;
real4 pos = posq[m];
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
real w = pos.x*invPeriodicBoxSize.x;
real w = pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x;
real fr = GRID_SIZE_X*(w-(int)(w+0.5f)+0.5f);
int ifr = (int) fr;
w = fr - ifr;
int igrid1 = ifr-PME_ORDER+1;
computeBSplinePoint(theta1, w, array);
w = pos.y*invPeriodicBoxSize.y;
w = pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y;
fr = GRID_SIZE_Y*(w-(int)(w+0.5f)+0.5f);
ifr = (int) fr;
w = fr - ifr;
int igrid2 = ifr-PME_ORDER+1;
computeBSplinePoint(theta2, w, array);
w = pos.z*invPeriodicBoxSize.z;
w = pos.z*recipBoxVecZ.z;
fr = GRID_SIZE_Z*(w-(int)(w+0.5f)+0.5f);
ifr = (int) fr;
w = fr - ifr;
......@@ -736,15 +785,29 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers,
long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ phi_global, real4 invPeriodicBoxSize) {
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole,
const real* __restrict__ phi_global, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
real multipole[10];
const int deriv1[] = {1, 4, 7, 8, 10, 15, 17, 13, 14, 19};
const int deriv2[] = {2, 7, 5, 9, 13, 11, 18, 15, 19, 16};
const int deriv3[] = {3, 8, 9, 6, 14, 16, 12, 19, 17, 18};
const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x;
const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y;
const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z;
const real xscale = GRID_SIZE_X*recipBoxVecX.x;
const real yscale = GRID_SIZE_Y*recipBoxVecY.y;
const real zscale = GRID_SIZE_Z*recipBoxVecZ.z;
real energy = 0;
__shared__ real fracToCart[3][3];
if (threadIdx.x == 0) {
fracToCart[0][0] = GRID_SIZE_X*recipBoxVecX.x;
fracToCart[1][0] = GRID_SIZE_X*recipBoxVecY.x;
fracToCart[2][0] = GRID_SIZE_X*recipBoxVecZ.x;
fracToCart[0][1] = GRID_SIZE_Y*recipBoxVecX.y;
fracToCart[1][1] = GRID_SIZE_Y*recipBoxVecY.y;
fracToCart[2][1] = GRID_SIZE_Y*recipBoxVecZ.y;
fracToCart[0][2] = GRID_SIZE_Z*recipBoxVecX.z;
fracToCart[1][2] = GRID_SIZE_Z*recipBoxVecY.z;
fracToCart[2][2] = GRID_SIZE_Z*recipBoxVecZ.z;
}
__syncthreads();
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
// Compute the torque.
......@@ -778,15 +841,15 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
// Compute the force and energy.
multipole[1] *= xscale;
multipole[2] *= yscale;
multipole[3] *= zscale;
multipole[4] *= xscale*xscale;
multipole[5] *= yscale*yscale;
multipole[6] *= zscale*zscale;
multipole[7] *= xscale*yscale;
multipole[8] *= xscale*zscale;
multipole[9] *= yscale*zscale;
multipole[1] = fracDipole[i*3];
multipole[2] = fracDipole[i*3+1];
multipole[3] = fracDipole[i*3+2];
multipole[4] = fracQuadrupole[i*6];
multipole[5] = fracQuadrupole[i*6+3];
multipole[6] = fracQuadrupole[i*6+5];
multipole[7] = fracQuadrupole[i*6+1];
multipole[8] = fracQuadrupole[i*6+2];
multipole[9] = fracQuadrupole[i*6+4];
real4 f = make_real4(0, 0, 0, 0);
for (int k = 0; k < 10; k++) {
......@@ -795,9 +858,9 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
f.y += multipole[k]*phi[deriv2[k]];
f.z += multipole[k]*phi[deriv3[k]];
}
f.x *= EPSILON_FACTOR*xscale;
f.y *= EPSILON_FACTOR*yscale;
f.z *= EPSILON_FACTOR*zscale;
f = make_real4(EPSILON_FACTOR*(f.x*fracToCart[0][0] + f.y*fracToCart[0][1] + f.z*fracToCart[0][2]),
EPSILON_FACTOR*(f.x*fracToCart[1][0] + f.y*fracToCart[1][1] + f.z*fracToCart[1][2]),
EPSILON_FACTOR*(f.x*fracToCart[2][0] + f.y*fracToCart[2][1] + f.z*fracToCart[2][2]), 0);
forceBuffers[i] -= static_cast<unsigned long long>((long long) (f.x*0x100000000));
forceBuffers[i+PADDED_NUM_ATOMS] -= static_cast<unsigned long long>((long long) (f.y*0x100000000));
forceBuffers[i+PADDED_NUM_ATOMS*2] -= static_cast<unsigned long long>((long long) (f.z*0x100000000));
......@@ -807,23 +870,37 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers,
long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole_global, const real* __restrict__ inducedDipolePolar_global,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole,
const real* __restrict__ inducedDipole_global, const real* __restrict__ inducedDipolePolar_global,
const real* __restrict__ phi_global, const real* __restrict__ phid_global, const real* __restrict__ phip_global,
const real* __restrict__ phidp_global, real4 invPeriodicBoxSize) {
const real* __restrict__ phidp_global, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
real multipole[10];
real inducedDipole[3];
real inducedDipolePolar[3];
real cinducedDipole[3], inducedDipole[3];
real cinducedDipolePolar[3], inducedDipolePolar[3];
real scales[3];
const int deriv1[] = {1, 4, 7, 8, 10, 15, 17, 13, 14, 19};
const int deriv2[] = {2, 7, 5, 9, 13, 11, 18, 15, 19, 16};
const int deriv3[] = {3, 8, 9, 6, 14, 16, 12, 19, 17, 18};
const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x;
const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y;
const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z;
const real xscale = GRID_SIZE_X*recipBoxVecX.x;
const real yscale = GRID_SIZE_Y*recipBoxVecY.y;
const real zscale = GRID_SIZE_Z*recipBoxVecZ.z;
scales[0] = xscale;
scales[1] = yscale;
scales[2] = zscale;
real energy = 0;
__shared__ real fracToCart[3][3];
if (threadIdx.x == 0) {
fracToCart[0][0] = GRID_SIZE_X*recipBoxVecX.x;
fracToCart[1][0] = GRID_SIZE_X*recipBoxVecY.x;
fracToCart[2][0] = GRID_SIZE_X*recipBoxVecZ.x;
fracToCart[0][1] = GRID_SIZE_Y*recipBoxVecX.y;
fracToCart[1][1] = GRID_SIZE_Y*recipBoxVecY.y;
fracToCart[2][1] = GRID_SIZE_Y*recipBoxVecZ.y;
fracToCart[0][2] = GRID_SIZE_Z*recipBoxVecX.z;
fracToCart[1][2] = GRID_SIZE_Z*recipBoxVecY.z;
fracToCart[2][2] = GRID_SIZE_Z*recipBoxVecZ.z;
}
__syncthreads();
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
// Compute the torque.
......@@ -856,56 +933,62 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
// Compute the force and energy.
multipole[1] *= xscale;
multipole[2] *= yscale;
multipole[3] *= zscale;
multipole[4] *= xscale*xscale;
multipole[5] *= yscale*yscale;
multipole[6] *= zscale*zscale;
multipole[7] *= xscale*yscale;
multipole[8] *= xscale*zscale;
multipole[9] *= yscale*zscale;
inducedDipole[0] = inducedDipole_global[i*3];
inducedDipole[1] = inducedDipole_global[i*3+1];
inducedDipole[2] = inducedDipole_global[i*3+2];
inducedDipolePolar[0] = inducedDipolePolar_global[i*3];
inducedDipolePolar[1] = inducedDipolePolar_global[i*3+1];
inducedDipolePolar[2] = inducedDipolePolar_global[i*3+2];
multipole[1] = fracDipole[i*3];
multipole[2] = fracDipole[i*3+1];
multipole[3] = fracDipole[i*3+2];
multipole[4] = fracQuadrupole[i*6];
multipole[5] = fracQuadrupole[i*6+3];
multipole[6] = fracQuadrupole[i*6+5];
multipole[7] = fracQuadrupole[i*6+1];
multipole[8] = fracQuadrupole[i*6+2];
multipole[9] = fracQuadrupole[i*6+4];
cinducedDipole[0] = inducedDipole_global[i*3];
cinducedDipole[1] = inducedDipole_global[i*3+1];
cinducedDipole[2] = inducedDipole_global[i*3+2];
cinducedDipolePolar[0] = inducedDipolePolar_global[i*3];
cinducedDipolePolar[1] = inducedDipolePolar_global[i*3+1];
cinducedDipolePolar[2] = inducedDipolePolar_global[i*3+2];
// Multiply the dipoles by cartToFrac, which is just the transpose of fracToCart.
inducedDipole[0] = cinducedDipole[0]*fracToCart[0][0] + cinducedDipole[1]*fracToCart[1][0] + cinducedDipole[2]*fracToCart[2][0];
inducedDipole[1] = cinducedDipole[0]*fracToCart[0][1] + cinducedDipole[1]*fracToCart[1][1] + cinducedDipole[2]*fracToCart[2][1];
inducedDipole[2] = cinducedDipole[0]*fracToCart[0][2] + cinducedDipole[1]*fracToCart[1][2] + cinducedDipole[2]*fracToCart[2][2];
inducedDipolePolar[0] = cinducedDipolePolar[0]*fracToCart[0][0] + cinducedDipolePolar[1]*fracToCart[1][0] + cinducedDipolePolar[2]*fracToCart[2][0];
inducedDipolePolar[1] = cinducedDipolePolar[0]*fracToCart[0][1] + cinducedDipolePolar[1]*fracToCart[1][1] + cinducedDipolePolar[2]*fracToCart[2][1];
inducedDipolePolar[2] = cinducedDipolePolar[0]*fracToCart[0][2] + cinducedDipolePolar[1]*fracToCart[1][2] + cinducedDipolePolar[2]*fracToCart[2][2];
const real* phi = &phi_global[20*i];
const real* phip = &phip_global[10*i];
const real* phid = &phid_global[10*i];
real4 f = make_real4(0, 0, 0, 0);
energy += GRID_SIZE_X*invPeriodicBoxSize.x*inducedDipole[0]*phi[1];
energy += GRID_SIZE_Y*invPeriodicBoxSize.y*inducedDipole[1]*phi[2];
energy += GRID_SIZE_Z*invPeriodicBoxSize.z*inducedDipole[2]*phi[3];
energy += inducedDipole[0]*phi[1];
energy += inducedDipole[1]*phi[2];
energy += inducedDipole[2]*phi[3];
for (int k = 0; k < 3; k++) {
int j1 = deriv1[k+1];
int j2 = deriv2[k+1];
int j3 = deriv3[k+1];
f.x += (inducedDipole[k]+inducedDipolePolar[k])*phi[j1]*(scales[k]/xscale);
f.y += (inducedDipole[k]+inducedDipolePolar[k])*phi[j2]*(scales[k]/yscale);
f.z += (inducedDipole[k]+inducedDipolePolar[k])*phi[j3]*(scales[k]/zscale);
f.x += (inducedDipole[k]+inducedDipolePolar[k])*phi[j1];
f.y += (inducedDipole[k]+inducedDipolePolar[k])*phi[j2];
f.z += (inducedDipole[k]+inducedDipolePolar[k])*phi[j3];
#ifndef DIRECT_POLARIZATION
f.x += (inducedDipole[k]*phip[j1] + inducedDipolePolar[k]*phid[j1])*(scales[k]/xscale);
f.y += (inducedDipole[k]*phip[j2] + inducedDipolePolar[k]*phid[j2])*(scales[k]/yscale);
f.z += (inducedDipole[k]*phip[j3] + inducedDipolePolar[k]*phid[j3])*(scales[k]/zscale);
f.x += (inducedDipole[k]*phip[j1] + inducedDipolePolar[k]*phid[j1]);
f.y += (inducedDipole[k]*phip[j2] + inducedDipolePolar[k]*phid[j2]);
f.z += (inducedDipole[k]*phip[j3] + inducedDipolePolar[k]*phid[j3]);
#endif
}
f.x *= GRID_SIZE_X*invPeriodicBoxSize.x;
f.y *= GRID_SIZE_Y*invPeriodicBoxSize.y;
f.z *= GRID_SIZE_Z*invPeriodicBoxSize.z;
for (int k = 0; k < 10; k++) {
f.x += multipole[k]*phidp[deriv1[k]];
f.y += multipole[k]*phidp[deriv2[k]];
f.z += multipole[k]*phidp[deriv3[k]];
}
f.x *= 0.5f*EPSILON_FACTOR*xscale;
f.y *= 0.5f*EPSILON_FACTOR*yscale;
f.z *= 0.5f*EPSILON_FACTOR*zscale;
f = make_real4(0.5f*EPSILON_FACTOR*(f.x*fracToCart[0][0] + f.y*fracToCart[0][1] + f.z*fracToCart[0][2]),
0.5f*EPSILON_FACTOR*(f.x*fracToCart[1][0] + f.y*fracToCart[1][1] + f.z*fracToCart[1][2]),
0.5f*EPSILON_FACTOR*(f.x*fracToCart[2][0] + f.y*fracToCart[2][1] + f.z*fracToCart[2][2]), 0);
forceBuffers[i] -= static_cast<unsigned long long>((long long) (f.x*0x100000000));
forceBuffers[i+PADDED_NUM_ATOMS] -= static_cast<unsigned long long>((long long) (f.y*0x100000000));
forceBuffers[i+PADDED_NUM_ATOMS*2] -= static_cast<unsigned long long>((long long) (f.z*0x100000000));
......@@ -914,10 +997,10 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
}
extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phid, real* const __restrict__ phip,
long long* __restrict__ inducedField, long long* __restrict__ inducedFieldPolar, real4 invPeriodicBoxSize) {
real xscale = GRID_SIZE_X*invPeriodicBoxSize.x*0x100000000;
real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y*0x100000000;
real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z*0x100000000;
long long* __restrict__ inducedField, long long* __restrict__ inducedFieldPolar, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
real xscale = GRID_SIZE_X*recipBoxVecX.x*0x100000000;
real yscale = GRID_SIZE_Y*recipBoxVecY.y*0x100000000;
real zscale = GRID_SIZE_Z*recipBoxVecZ.z*0x100000000;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
inducedField[i] -= (long long) (xscale*phid[10*i+1]);
inducedField[i+PADDED_NUM_ATOMS] -= (long long) (yscale*phid[10*i+2]);
......
......@@ -448,7 +448,7 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
*/
extern "C" __global__ void computePotentialAtPoints(const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real4* __restrict__ points,
real* __restrict__ potential, int numPoints, real4 periodicBoxSize, real4 invPeriodicBoxSize) {
real* __restrict__ potential, int numPoints, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
extern __shared__ real4 localPosq[];
real3* localDipole = (real3*) &localPosq[blockDim.x];
real3* localInducedDipole = (real3*) &localDipole[blockDim.x];
......@@ -481,9 +481,7 @@ extern "C" __global__ void computePotentialAtPoints(const real4* __restrict__ po
for (int i = 0; i < end; i++) {
real3 delta = trimTo3(localPosq[i]-pointPos);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = dot(delta, delta);
real rInv = RSQRT(r2);
......
......@@ -65,7 +65,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
}
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool hasExclusions, float dScale, float pScale, float mScale, float forceFactor,
real& energy, real4 periodicBoxSize, real4 invPeriodicBoxSize) {
real& energy, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 delta;
delta.x = atom2.pos.x - atom1.pos.x;
delta.y = atom2.pos.y - atom1.pos.y;
......@@ -73,10 +73,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
// periodic box
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
delta.w = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (delta.w > CUTOFF_SQUARED)
return;
......@@ -203,7 +200,9 @@ extern "C" __global__ void computeElectrostatics(
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const unsigned int* __restrict__ interactingAtoms,
#endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
......@@ -255,7 +254,7 @@ extern "C" __global__ void computeElectrostatics(
float d = computeDScaleFactor(polarizationGroup, j);
float p = computePScaleFactor(covalent, polarizationGroup, j);
float m = computeMScaleFactor(covalent, j);
computeOneInteraction(data, localData[tbx+j], true, d, p, m, 0.5f, energy, periodicBoxSize, invPeriodicBoxSize);
computeOneInteraction(data, localData[tbx+j], true, d, p, m, 0.5f, energy, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
}
}
if (atom1 < NUM_ATOMS)
......@@ -283,7 +282,7 @@ extern "C" __global__ void computeElectrostatics(
float d = computeDScaleFactor(polarizationGroup, tj);
float p = computePScaleFactor(covalent, polarizationGroup, tj);
float m = computeMScaleFactor(covalent, tj);
computeOneInteraction(data, localData[tbx+tj], true, d, p, m, 1, energy, periodicBoxSize, invPeriodicBoxSize);
computeOneInteraction(data, localData[tbx+tj], true, d, p, m, 1, energy, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
......@@ -386,7 +385,7 @@ extern "C" __global__ void computeElectrostatics(
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
computeOneInteraction(data, localData[tbx+tj], false, 1, 1, 1, 1, energy, periodicBoxSize, invPeriodicBoxSize);
computeOneInteraction(data, localData[tbx+tj], false, 1, 1, 1, 1, energy, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment