"platforms/vscode:/vscode.git/clone" did not exist on "6c6bc628bf95bb9ec90344400a671c55a4a6d5b1"
Commit 699fce6e authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to convert AmoebaMultipoleForce: mutual induced dipoles now work

parent 29654b80
...@@ -865,9 +865,9 @@ private: ...@@ -865,9 +865,9 @@ private:
CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) :
CalcAmoebaMultipoleForceKernel(name, platform), cu(cu), system(system), hasInitializedScaleFactors(false), CalcAmoebaMultipoleForceKernel(name, platform), cu(cu), system(system), hasInitializedScaleFactors(false),
multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL), multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL), labFrameDipoles(NULL), labFrameQuadrupoles(NULL),
labFrameDipoles(NULL), labFrameQuadrupoles(NULL), field(NULL), fieldPolar(NULL), torque(NULL), dampingAndThole(NULL), field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL),
inducedDipole(NULL), inducedDipolePolar(NULL), currentEpsilon(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL), inducedDipole(NULL), inducedDipolePolar(NULL), inducedDipoleErrors(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL),
pmeGrid(NULL) { pmeGrid(NULL) {
} }
...@@ -887,6 +887,10 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() { ...@@ -887,6 +887,10 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete field; delete field;
if (fieldPolar != NULL) if (fieldPolar != NULL)
delete fieldPolar; delete fieldPolar;
if (inducedField != NULL)
delete inducedField;
if (inducedFieldPolar != NULL)
delete inducedFieldPolar;
if (torque != NULL) if (torque != NULL)
delete torque; delete torque;
if (dampingAndThole != NULL) if (dampingAndThole != NULL)
...@@ -895,8 +899,8 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() { ...@@ -895,8 +899,8 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete inducedDipole; delete inducedDipole;
if (inducedDipolePolar != NULL) if (inducedDipolePolar != NULL)
delete inducedDipolePolar; delete inducedDipolePolar;
if (currentEpsilon != NULL) if (inducedDipoleErrors != NULL)
delete currentEpsilon; delete inducedDipoleErrors;
if (polarizability != NULL) if (polarizability != NULL)
delete polarizability; delete polarizability;
if (covalentFlags != NULL) if (covalentFlags != NULL)
...@@ -971,6 +975,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -971,6 +975,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
torque = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "torque"); torque = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "torque");
inducedDipole = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipole"); inducedDipole = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipole");
inducedDipolePolar = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolar"); inducedDipolePolar = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolar");
inducedDipoleErrors = new CudaArray(cu, cu.getNumThreadBlocks(), sizeof(float2), "inducedDipoleErrors");
cu.addAutoclearBuffer(*field); cu.addAutoclearBuffer(*field);
cu.addAutoclearBuffer(*fieldPolar); cu.addAutoclearBuffer(*fieldPolar);
cu.addAutoclearBuffer(*torque); cu.addAutoclearBuffer(*torque);
...@@ -1009,9 +1014,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1009,9 +1014,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
if (force.getPolarizationType() == AmoebaMultipoleForce::Mutual) { if (force.getPolarizationType() == AmoebaMultipoleForce::Mutual) {
maxInducedIterations = force.getMutualInducedMaxIterations(); maxInducedIterations = force.getMutualInducedMaxIterations();
inducedEpsilon = force.getMutualInducedTargetEpsilon(); inducedEpsilon = force.getMutualInducedTargetEpsilon();
inducedField = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedField");
inducedFieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedFieldPolar");
} }
else else
maxInducedIterations == 0; maxInducedIterations = 0;
// Create the kernels. // Create the kernels.
...@@ -1032,6 +1039,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1032,6 +1039,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
mapTorqueKernel = cu.getKernel(module, "mapTorqueToForce"); mapTorqueKernel = cu.getKernel(module, "mapTorqueToForce");
module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleFixedField, defines); module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleFixedField, defines);
computeFixedFieldKernel = cu.getKernel(module, "computeFixedField"); computeFixedFieldKernel = cu.getKernel(module, "computeFixedField");
if (maxInducedIterations > 0) {
module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleInducedField, defines);
computeInducedFieldKernel = cu.getKernel(module, "computeInducedField");
updateInducedFieldKernel = cu.getKernel(module, "updateInducedFieldBySOR");
}
stringstream electrostaticsSource; stringstream electrostaticsSource;
electrostaticsSource << CudaKernelSources::vectorOps; electrostaticsSource << CudaKernelSources::vectorOps;
electrostaticsSource << CudaAmoebaKernelSources::multipoleElectrostatics; electrostaticsSource << CudaAmoebaKernelSources::multipoleElectrostatics;
...@@ -1436,8 +1448,26 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1436,8 +1448,26 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(),
&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()}; &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()};
cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms()); cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms());
vector<float2> errors;
for (int i = 0; i < maxInducedIterations; i++) { for (int i = 0; i < maxInducedIterations; i++) {
cu.clearBuffer(*inducedField);
cu.clearBuffer(*inducedFieldPolar);
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
void* updateInducedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &inducedField->getDevicePointer(),
&inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
&polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()};
cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize);
inducedDipoleErrors->download(errors);
double total1 = 0.0, total2 = 0.0;
for (int j = 0; j < (int) errors.size(); j++) {
total1 += errors[j].x;
total2 += errors[j].y;
}
if (48.033324*sqrt(max(total1, total2)/cu.getNumAtoms()) < inducedEpsilon)
break;
} }
// Compute electrostatic force. // Compute electrostatic force.
......
...@@ -388,11 +388,13 @@ private: ...@@ -388,11 +388,13 @@ private:
CudaArray* labFrameQuadrupoles; CudaArray* labFrameQuadrupoles;
CudaArray* field; CudaArray* field;
CudaArray* fieldPolar; CudaArray* fieldPolar;
CudaArray* inducedField;
CudaArray* inducedFieldPolar;
CudaArray* torque; CudaArray* torque;
CudaArray* dampingAndThole; CudaArray* dampingAndThole;
CudaArray* inducedDipole; CudaArray* inducedDipole;
CudaArray* inducedDipolePolar; CudaArray* inducedDipolePolar;
CudaArray* currentEpsilon; CudaArray* inducedDipoleErrors;
CudaArray* polarizability; CudaArray* polarizability;
CudaArray* covalentFlags; CudaArray* covalentFlags;
CudaArray* polarizationGroupFlags; CudaArray* polarizationGroupFlags;
...@@ -413,7 +415,7 @@ private: ...@@ -413,7 +415,7 @@ private:
CudaArray* pmeAtomGridIndex; CudaArray* pmeAtomGridIndex;
CudaSort* sort; CudaSort* sort;
cufftHandle fft; cufftHandle fft;
CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, electrostaticsKernel, mapTorqueKernel; CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel;
}; };
/** /**
......
#define TILE_SIZE 32
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef struct {
real3 pos;
real3 field, fieldPolar, inducedDipole, inducedDipolePolar;
float thole, damp;
} AtomData;
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1];
data.inducedDipole.z = inducedDipole[atom*3+2];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
float2 temp = dampingAndThole[atom];
data.damp = temp.x;
data.thole = temp.y;
}
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 deltaR, real3* fields) {
real rI = RSQRT(dot(deltaR, deltaR));
real r = RECIP(rI);
real r2I = rI*rI;
real rr3 = -rI*r2I;
real rr5 = -3*rr3*r2I;
real dampProd = atom1.damp*atom2.damp;
real ratio = (dampProd != 0 ? r/dampProd : 1);
float pGamma = (atom2.thole > atom1.thole ? atom1.thole: atom2.thole);
real damp = ratio*ratio*ratio*pGamma;
real dampExp = (dampProd != 0 ? EXP(-damp) : 0);
rr3 *= 1 - dampExp;
rr5 *= 1 - (1+damp)*dampExp;
real dDotDelta = rr5*dot(deltaR, atom2.inducedDipole);
fields[0] = rr3*atom2.inducedDipole + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom2.inducedDipolePolar);
fields[1] = rr3*atom2.inducedDipolePolar + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom1.inducedDipole);
fields[2] = rr3*atom1.inducedDipole + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom1.inducedDipolePolar);
fields[3] = rr3*atom1.inducedDipolePolar + dDotDelta*deltaR;
}
/**
* Compute the mutual induced field.
*/
extern "C" __global__ void computeInducedField(
unsigned long long* __restrict__ fieldBuffers, unsigned long long* __restrict__ fieldPolarBuffers, const real4* __restrict__ posq,
const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const unsigned int* __restrict__ interactionFlags,
#endif
const float2* __restrict__ dampingAndThole) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
#ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0];
unsigned int pos = (numTiles > maxTiles ? startTileIndex+warp*numTileIndices/totalWarps : warp*numTiles/totalWarps);
unsigned int end = (numTiles > maxTiles ? startTileIndex+(warp+1)*numTileIndices/totalWarps : (warp+1)*numTiles/totalWarps);
#else
const unsigned int numTiles = numTileIndices;
unsigned int pos = startTileIndex+warp*numTiles/totalWarps;
unsigned int end = startTileIndex+(warp+1)*numTiles/totalWarps;
#endif
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
#ifndef ENABLE_SHUFFLE
__shared__ real tempBuffer[3*THREAD_BLOCK_SIZE];
#endif
do {
// Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
unsigned int x, y;
AtomData data;
data.field = make_real3(0);
data.fieldPolar = make_real3(0);
if (pos < end) {
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
y = tileIndices.y;
}
else
#endif
{
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
}
unsigned int atom1 = x*TILE_SIZE + tgx;
loadAtomData(data, atom1, posq, inducedDipole, inducedDipolePolar, dampingAndThole);
if (pos >= end)
; // This warp is done.
else if (x == y) {
// This tile is on the diagonal.
localData[threadIdx.x].pos = data.pos;
localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
localData[threadIdx.x].thole = data.thole;
localData[threadIdx.x].damp = data.damp;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+j;
real3 delta = localData[atom2].pos-data.pos;
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
real3 fields[4];
computeOneInteraction(data, localData[atom2], delta, fields);
atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
data.field += fields[0];
data.fieldPolar += fields[1];
}
}
}
else {
// This is an off-diagonal tile.
loadAtomData(localData[threadIdx.x], y*TILE_SIZE+tgx, posq, inducedDipole, inducedDipolePolar, dampingAndThole);
localData[threadIdx.x].field = make_real3(0);
localData[threadIdx.x].fieldPolar = make_real3(0);
#ifdef USE_CUTOFF
unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (flags != 0xFFFFFFFF) {
if (flags == 0) {
// No interactions in this tile.
}
else {
// Compute only a subset of the interactions in this tile.
for (unsigned int j = 0; j < TILE_SIZE; j++) {
if ((flags&(1<<j)) != 0) {
int atom2 = tbx+j;
int bufferIndex = 3*threadIdx.x;
real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0);
real3 delta = localData[atom2].pos-data.pos;
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
COMPUTE_INTERACTION
#ifdef USE_CUTOFF
}
#endif
#ifdef ENABLE_SHUFFLE
force.x -= dEdR1.x;
force.y -= dEdR1.y;
force.z -= dEdR1.z;
for (int i = 16; i >= 1; i /= 2) {
dEdR2.x += __shfl_xor(dEdR2.x, i, 32);
dEdR2.y += __shfl_xor(dEdR2.y, i, 32);
dEdR2.z += __shfl_xor(dEdR2.z, i, 32);
}
if (tgx == 0) {
localData[tbx+j].fx += dEdR2.x;
localData[tbx+j].fy += dEdR2.y;
localData[tbx+j].fz += dEdR2.z;
}
#else
force.x -= dEdR1.x;
force.y -= dEdR1.y;
force.z -= dEdR1.z;
tempBuffer[bufferIndex] = dEdR2.x;
tempBuffer[bufferIndex+1] = dEdR2.y;
tempBuffer[bufferIndex+2] = dEdR2.z;
// Sum the forces on atom2.
if (tgx % 4 == 0) {
tempBuffer[bufferIndex] += tempBuffer[bufferIndex+3]+tempBuffer[bufferIndex+6]+tempBuffer[bufferIndex+9];
tempBuffer[bufferIndex+1] += tempBuffer[bufferIndex+4]+tempBuffer[bufferIndex+7]+tempBuffer[bufferIndex+10];
tempBuffer[bufferIndex+2] += tempBuffer[bufferIndex+5]+tempBuffer[bufferIndex+8]+tempBuffer[bufferIndex+11];
}
if (tgx == 0) {
localData[tbx+j].fx += tempBuffer[bufferIndex]+tempBuffer[bufferIndex+12]+tempBuffer[bufferIndex+24]+tempBuffer[bufferIndex+36]+tempBuffer[bufferIndex+48]+tempBuffer[bufferIndex+60]+tempBuffer[bufferIndex+72]+tempBuffer[bufferIndex+84];
localData[tbx+j].fy += tempBuffer[bufferIndex+1]+tempBuffer[bufferIndex+13]+tempBuffer[bufferIndex+25]+tempBuffer[bufferIndex+37]+tempBuffer[bufferIndex+49]+tempBuffer[bufferIndex+61]+tempBuffer[bufferIndex+73]+tempBuffer[bufferIndex+85];
localData[tbx+j].fz += tempBuffer[bufferIndex+2]+tempBuffer[bufferIndex+14]+tempBuffer[bufferIndex+26]+tempBuffer[bufferIndex+38]+tempBuffer[bufferIndex+50]+tempBuffer[bufferIndex+62]+tempBuffer[bufferIndex+74]+tempBuffer[bufferIndex+86];
}
#endif
}
}
}
}
else
#endif
{
// Compute the full set of interactions in this tile.
unsigned int tj = tgx;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+tj;
real3 delta = localData[atom2].pos-data.pos;
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
real3 fields[4];
computeOneInteraction(data, localData[atom2], delta, fields);
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
data.field += fields[0];
data.fieldPolar += fields[1];
localData[atom2].field += fields[2];
localData[atom2].fieldPolar += fields[3];
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
}
}
}
// Write results.
if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&fieldBuffers[offset], static_cast<unsigned long long>((long long) (data.field.x*0xFFFFFFFF)));
atomicAdd(&fieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0xFFFFFFFF)));
atomicAdd(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0xFFFFFFFF)));
}
if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx;
atomicAdd(&fieldBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0xFFFFFFFF)));
atomicAdd(&fieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0xFFFFFFFF)));
atomicAdd(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0xFFFFFFFF)));
}
pos++;
} while (pos < end);
}
extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__ fixedField, const long long* __restrict__ fixedFieldPolar,
const long long* __restrict__ inducedField, const long long* __restrict__ inducedFieldPolar, real* __restrict__ inducedDipole,
real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability, float2* __restrict__ errors) {
extern __shared__ real2 buffer[];
float polarSOR = 0.55f;
real sumErrors = 0;
real sumPolarErrors = 0;
for (int atom = blockIdx.x*blockDim.x + threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
real scale = polarizability[atom]/(real) 0xFFFFFFFF;
for (int component = 0; component < 3; component++) {
int dipoleIndex = 3*atom+component;
int fieldIndex = atom+component*PADDED_NUM_ATOMS;
real previousDipole = inducedDipole[dipoleIndex];
real previousDipolePolar = inducedDipolePolar[dipoleIndex];
real newDipole = scale*(fixedField[fieldIndex]+inducedField[fieldIndex]);
real newDipolePolar = scale*(fixedFieldPolar[fieldIndex]+inducedFieldPolar[fieldIndex]);
newDipole = previousDipole + polarSOR*(newDipole-previousDipole);
newDipolePolar = previousDipolePolar + polarSOR*(newDipolePolar-previousDipolePolar);
inducedDipole[dipoleIndex] = newDipole;
inducedDipolePolar[dipoleIndex] = newDipolePolar;
sumErrors += (newDipole-previousDipole)*(newDipole-previousDipole);
sumPolarErrors += (newDipolePolar-previousDipolePolar)*(newDipolePolar-previousDipolePolar);
}
}
// Sum the errors over threads and store the total for this block.
buffer[threadIdx.x] = make_real2(sumErrors, sumPolarErrors);
__syncthreads();
for (int offset = 1; offset < blockDim.x; offset *= 2) {
if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0) {
buffer[threadIdx.x].x += buffer[threadIdx.x+offset].x;
buffer[threadIdx.x].y += buffer[threadIdx.x+offset].y;
}
__syncthreads();
}
if (threadIdx.x == 0)
errors[blockIdx.x] = make_float2((float) buffer[0].x, (float) buffer[0].y);
}
\ No newline at end of file
...@@ -1384,8 +1384,8 @@ int main( int numberOfArguments, char* argv[] ) { ...@@ -1384,8 +1384,8 @@ int main( int numberOfArguments, char* argv[] ) {
// test mutual polarization, no cutoff // test mutual polarization, no cutoff
// testMultipoleAmmoniaMutualPolarization( log ); testMultipoleAmmoniaMutualPolarization( log );
//
// // test multipole direct & mutual polarization using PME // // test multipole direct & mutual polarization using PME
// //
// testMultipoleWaterPMEDirectPolarization( log ); // testMultipoleWaterPMEDirectPolarization( log );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment