"vscode:/vscode.git/clone" did not exist on "14b2cfc528cfd123aff42c1383e6e6ae1ad7a9eb"
Commit cf112a25 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed bugs when using double precision

parent dac874af
...@@ -766,25 +766,43 @@ void CudaContext::validateMolecules() { ...@@ -766,25 +766,43 @@ void CudaContext::validateMolecules() {
// atoms to their original order, rebuild the list of identical molecules, and sort them // atoms to their original order, rebuild the list of identical molecules, and sort them
// again. // again.
vector<float4> oldPosq(paddedNumAtoms);
vector<float4> newPosq(paddedNumAtoms);
vector<float4> oldVelm(paddedNumAtoms);
vector<float4> newVelm(paddedNumAtoms);
vector<int4> newCellOffsets(numAtoms); vector<int4> newCellOffsets(numAtoms);
posq->download(oldPosq); if (useDoublePrecision) {
velm->download(oldVelm); vector<double4> oldPosq(paddedNumAtoms);
for (int i = 0; i < numAtoms; i++) { vector<double4> newPosq(paddedNumAtoms);
int index = atomIndex[i]; vector<double4> oldVelm(paddedNumAtoms);
newPosq[index] = oldPosq[i]; vector<double4> newVelm(paddedNumAtoms);
newVelm[index] = oldVelm[i]; posq->download(oldPosq);
newCellOffsets[index] = posCellOffsets[i]; velm->download(oldVelm);
for (int i = 0; i < numAtoms; i++) {
int index = atomIndex[i];
newPosq[index] = oldPosq[i];
newVelm[index] = oldVelm[i];
newCellOffsets[index] = posCellOffsets[i];
}
posq->upload(newPosq);
velm->upload(newVelm);
}
else {
vector<float4> oldPosq(paddedNumAtoms);
vector<float4> newPosq(paddedNumAtoms);
vector<float4> oldVelm(paddedNumAtoms);
vector<float4> newVelm(paddedNumAtoms);
posq->download(oldPosq);
velm->download(oldVelm);
for (int i = 0; i < numAtoms; i++) {
int index = atomIndex[i];
newPosq[index] = oldPosq[i];
newVelm[index] = oldVelm[i];
newCellOffsets[index] = posCellOffsets[i];
}
posq->upload(newPosq);
velm->upload(newVelm);
} }
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
atomIndex[i] = i; atomIndex[i] = i;
posCellOffsets[i] = newCellOffsets[i]; posCellOffsets[i] = newCellOffsets[i];
} }
posq->upload(newPosq);
velm->upload(newVelm);
atomIndexDevice->upload(atomIndex); atomIndexDevice->upload(atomIndex);
findMoleculeGroups(); findMoleculeGroups();
for (int i = 0; i < (int) reorderListeners.size(); i++) for (int i = 0; i < (int) reorderListeners.size(); i++)
...@@ -797,16 +815,23 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) { ...@@ -797,16 +815,23 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
if (moleculesInvalid) if (moleculesInvalid)
validateMolecules(); validateMolecules();
atomsWereReordered = true; atomsWereReordered = true;
if (useDoublePrecision)
reorderAtomsImpl<double, double4>(enforcePeriodic);
else
reorderAtomsImpl<float, float4>(enforcePeriodic);
}
template <class Real, class Real4>
void CudaContext::reorderAtomsImpl(bool enforcePeriodic) {
// Find the range of positions and the number of bins along each axis. // Find the range of positions and the number of bins along each axis.
vector<float4> oldPosq(paddedNumAtoms); vector<Real4> oldPosq(paddedNumAtoms);
vector<float4> oldVelm(paddedNumAtoms); vector<Real4> oldVelm(paddedNumAtoms);
posq->download(oldPosq); posq->download(oldPosq);
velm->download(oldVelm); velm->download(oldVelm);
float minx = oldPosq[0].x, maxx = oldPosq[0].x; Real minx = oldPosq[0].x, maxx = oldPosq[0].x;
float miny = oldPosq[0].y, maxy = oldPosq[0].y; Real miny = oldPosq[0].y, maxy = oldPosq[0].y;
float minz = oldPosq[0].z, maxz = oldPosq[0].z; Real minz = oldPosq[0].z, maxz = oldPosq[0].z;
if (nonbonded->getUsePeriodic()) { if (nonbonded->getUsePeriodic()) {
minx = miny = minz = 0.0; minx = miny = minz = 0.0;
maxx = periodicBoxSize.x; maxx = periodicBoxSize.x;
...@@ -815,7 +840,7 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) { ...@@ -815,7 +840,7 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
} }
else { else {
for (int i = 1; i < numAtoms; i++) { for (int i = 1; i < numAtoms; i++) {
const float4& pos = oldPosq[i]; const Real4& pos = oldPosq[i];
minx = min(minx, pos.x); minx = min(minx, pos.x);
maxx = max(maxx, pos.x); maxx = max(maxx, pos.x);
miny = min(miny, pos.y); miny = min(miny, pos.y);
...@@ -828,8 +853,8 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) { ...@@ -828,8 +853,8 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
// Loop over each group of identical molecules and reorder them. // Loop over each group of identical molecules and reorder them.
vector<int> originalIndex(numAtoms); vector<int> originalIndex(numAtoms);
vector<float4> newPosq(paddedNumAtoms); vector<Real4> newPosq(paddedNumAtoms);
vector<float4> newVelm(paddedNumAtoms); vector<Real4> newVelm(paddedNumAtoms);
vector<int4> newCellOffsets(numAtoms); vector<int4> newCellOffsets(numAtoms);
for (int group = 0; group < (int) moleculeGroups.size(); group++) { for (int group = 0; group < (int) moleculeGroups.size(); group++) {
// Find the center of each molecule. // Find the center of each molecule.
...@@ -837,15 +862,15 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) { ...@@ -837,15 +862,15 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
MoleculeGroup& mol = moleculeGroups[group]; MoleculeGroup& mol = moleculeGroups[group];
int numMolecules = mol.offsets.size(); int numMolecules = mol.offsets.size();
vector<int>& atoms = mol.atoms; vector<int>& atoms = mol.atoms;
vector<float4> molPos(numMolecules); vector<Real4> molPos(numMolecules);
float invNumAtoms = 1.0f/atoms.size(); Real invNumAtoms = (Real) (1.0/atoms.size());
for (int i = 0; i < numMolecules; i++) { for (int i = 0; i < numMolecules; i++) {
molPos[i].x = 0.0f; molPos[i].x = 0.0f;
molPos[i].y = 0.0f; molPos[i].y = 0.0f;
molPos[i].z = 0.0f; molPos[i].z = 0.0f;
for (int j = 0; j < (int)atoms.size(); j++) { for (int j = 0; j < (int)atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i]; int atom = atoms[j]+mol.offsets[i];
const float4& pos = oldPosq[atom]; const Real4& pos = oldPosq[atom];
molPos[i].x += pos.x; molPos[i].x += pos.x;
molPos[i].y += pos.y; molPos[i].y += pos.y;
molPos[i].z += pos.z; molPos[i].z += pos.z;
...@@ -861,9 +886,9 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) { ...@@ -861,9 +886,9 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
int xcell = (int) floor(molPos[i].x*invPeriodicBoxSize.x); int xcell = (int) floor(molPos[i].x*invPeriodicBoxSize.x);
int ycell = (int) floor(molPos[i].y*invPeriodicBoxSize.y); int ycell = (int) floor(molPos[i].y*invPeriodicBoxSize.y);
int zcell = (int) floor(molPos[i].z*invPeriodicBoxSize.z); int zcell = (int) floor(molPos[i].z*invPeriodicBoxSize.z);
float dx = xcell*periodicBoxSize.x; Real dx = xcell*periodicBoxSize.x;
float dy = ycell*periodicBoxSize.y; Real dy = ycell*periodicBoxSize.y;
float dz = zcell*periodicBoxSize.z; Real dz = zcell*periodicBoxSize.z;
if (dx != 0.0f || dy != 0.0f || dz != 0.0f) { if (dx != 0.0f || dy != 0.0f || dz != 0.0f) {
molPos[i].x -= dx; molPos[i].x -= dx;
molPos[i].y -= dy; molPos[i].y -= dy;
...@@ -871,7 +896,7 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) { ...@@ -871,7 +896,7 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
if (enforcePeriodic) { if (enforcePeriodic) {
for (int j = 0; j < (int) atoms.size(); j++) { for (int j = 0; j < (int) atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i]; int atom = atoms[j]+mol.offsets[i];
float4 p = oldPosq[atom]; Real4 p = oldPosq[atom];
p.x -= dx; p.x -= dx;
p.y -= dy; p.y -= dy;
p.z -= dz; p.z -= dz;
...@@ -888,12 +913,12 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) { ...@@ -888,12 +913,12 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
// Select a bin for each molecule, then sort them by bin. // Select a bin for each molecule, then sort them by bin.
bool useHilbert = (numMolecules > 5000 || atoms.size() > 8); // For small systems, a simple zigzag curve works better than a Hilbert curve. bool useHilbert = (numMolecules > 5000 || atoms.size() > 8); // For small systems, a simple zigzag curve works better than a Hilbert curve.
float binWidth; Real binWidth;
if (useHilbert) if (useHilbert)
binWidth = (float)(max(max(maxx-minx, maxy-miny), maxz-minz)/255.0); binWidth = (Real)(max(max(maxx-minx, maxy-miny), maxz-minz)/255.0);
else else
binWidth = (float)(0.2*nonbonded->getCutoffDistance()); binWidth = (Real)(0.2*nonbonded->getCutoffDistance());
float invBinWidth = 1.0f/binWidth; Real invBinWidth = (Real) (1.0/binWidth);
int xbins = 1 + (int) ((maxx-minx)*invBinWidth); int xbins = 1 + (int) ((maxx-minx)*invBinWidth);
int ybins = 1 + (int) ((maxy-miny)*invBinWidth); int ybins = 1 + (int) ((maxy-miny)*invBinWidth);
vector<pair<int, int> > molBins(numMolecules); vector<pair<int, int> > molBins(numMolecules);
......
...@@ -448,6 +448,11 @@ private: ...@@ -448,6 +448,11 @@ private:
* of molecules and resort the atoms. * of molecules and resort the atoms.
*/ */
void validateMolecules(); void validateMolecules();
/**
* This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use.
*/
template <class Real, class Real4>
void reorderAtomsImpl(bool enforcePeriodic);
static bool hasInitializedCuda; static bool hasInitializedCuda;
const System& system; const System& system;
double time, computeCapability; double time, computeCapability;
......
...@@ -717,16 +717,17 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double ...@@ -717,16 +717,17 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
ccmaForceKernel = ccmaPosForceKernel; ccmaForceKernel = ccmaPosForceKernel;
} }
float floatTol = (float) tol; float floatTol = (float) tol;
void* tolPointer = (context.getUseDoublePrecision() ? (void*) &tol : (void*) &floatTol);
if (settleAtoms != NULL) { if (settleAtoms != NULL) {
int numClusters = settleAtoms->getSize(); int numClusters = settleAtoms->getSize();
void* args[] = {&numClusters, &floatTol, &context.getPosq().getDevicePointer(), void* args[] = {&numClusters, tolPointer, &context.getPosq().getDevicePointer(),
&posDelta->getDevicePointer(), &context.getVelm().getDevicePointer(), &posDelta->getDevicePointer(), &context.getVelm().getDevicePointer(),
&settleAtoms->getDevicePointer(), &settleParams->getDevicePointer()}; &settleAtoms->getDevicePointer(), &settleParams->getDevicePointer()};
context.executeKernel(settleKernel, args, settleAtoms->getSize()); context.executeKernel(settleKernel, args, settleAtoms->getSize());
} }
if (shakeAtoms != NULL) { if (shakeAtoms != NULL) {
int numClusters = shakeAtoms->getSize(); int numClusters = shakeAtoms->getSize();
void* args[] = {&numClusters, &floatTol, &context.getPosq().getDevicePointer(), void* args[] = {&numClusters, tolPointer, &context.getPosq().getDevicePointer(),
constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta->getDevicePointer(), constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta->getDevicePointer(),
&shakeAtoms->getDevicePointer(), &shakeParams->getDevicePointer()}; &shakeAtoms->getDevicePointer(), &shakeParams->getDevicePointer()};
context.executeKernel(shakeKernel, args, shakeAtoms->getSize()); context.executeKernel(shakeKernel, args, shakeAtoms->getSize());
...@@ -738,7 +739,7 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double ...@@ -738,7 +739,7 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
void* forceArgs[] = {&ccmaAtoms->getDevicePointer(), &ccmaDistance->getDevicePointer(), void* forceArgs[] = {&ccmaAtoms->getDevicePointer(), &ccmaDistance->getDevicePointer(),
constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta->getDevicePointer(), constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta->getDevicePointer(),
&ccmaReducedMass->getDevicePointer(), &ccmaDelta1->getDevicePointer(), &ccmaConvergedDeviceMemory, &ccmaReducedMass->getDevicePointer(), &ccmaDelta1->getDevicePointer(), &ccmaConvergedDeviceMemory,
&floatTol, &i}; tolPointer, &i};
void* multiplyArgs[] = {&ccmaDelta1->getDevicePointer(), &ccmaDelta2->getDevicePointer(), void* multiplyArgs[] = {&ccmaDelta1->getDevicePointer(), &ccmaDelta2->getDevicePointer(),
&ccmaConstraintMatrixColumn->getDevicePointer(), &ccmaConstraintMatrixValue->getDevicePointer(), &ccmaConvergedDeviceMemory, &i}; &ccmaConstraintMatrixColumn->getDevicePointer(), &ccmaConstraintMatrixValue->getDevicePointer(), &ccmaConvergedDeviceMemory, &i};
void* updateArgs[] = {&ccmaNumAtomConstraints->getDevicePointer(), &ccmaAtomConstraints->getDevicePointer(), &ccmaDistance->getDevicePointer(), void* updateArgs[] = {&ccmaNumAtomConstraints->getDevicePointer(), &ccmaAtomConstraints->getDevicePointer(), &ccmaDistance->getDevicePointer(),
......
...@@ -1451,7 +1451,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1451,7 +1451,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
pmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid"); pmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid");
cu.addAutoclearBuffer(pmeGrid->getDevicePointer(), pmeGrid->getSize()*sizeof(float2)); cu.addAutoclearBuffer(pmeGrid->getDevicePointer(), pmeGrid->getSize()*2*elementSize);
pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX"); pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY"); pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ"); pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
...@@ -1459,7 +1459,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1459,7 +1459,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange"); pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex"); pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex");
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms()); sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
cufftResult result = cufftPlan3d(&fft, gridSizeX, gridSizeY, gridSizeZ, CUFFT_C2C); cufftResult result = cufftPlan3d(&fft, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_Z2Z : CUFFT_C2C);
if (result != CUFFT_SUCCESS) if (result != CUFFT_SUCCESS)
throw OpenMMException("Error initializing FFT: "+cu.intToString(result)); throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
hasInitializedFFT = true; hasInitializedFFT = true;
......
...@@ -234,8 +234,14 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -234,8 +234,14 @@ void CudaNonbondedUtilities::initialize(const System& system) {
interactingTiles = CudaArray::create<ushort2>(context, maxTiles, "interactingTiles"); interactingTiles = CudaArray::create<ushort2>(context, maxTiles, "interactingTiles");
interactionFlags = CudaArray::create<unsigned int>(context, maxTiles, "interactionFlags"); interactionFlags = CudaArray::create<unsigned int>(context, maxTiles, "interactionFlags");
interactionCount = CudaArray::create<unsigned int>(context, 1, "interactionCount"); interactionCount = CudaArray::create<unsigned int>(context, 1, "interactionCount");
blockCenter = CudaArray::create<float4>(context, numAtomBlocks, "blockCenter"); if (context.getUseDoublePrecision()) {
blockBoundingBox = CudaArray::create<float4>(context, numAtomBlocks, "blockBoundingBox"); blockCenter = CudaArray::create<double4>(context, numAtomBlocks, "blockCenter");
blockBoundingBox = CudaArray::create<double4>(context, numAtomBlocks, "blockBoundingBox");
}
else {
blockCenter = CudaArray::create<float4>(context, numAtomBlocks, "blockCenter");
blockBoundingBox = CudaArray::create<float4>(context, numAtomBlocks, "blockBoundingBox");
}
CHECK_RESULT(cuMemHostAlloc((void**) &pinnedInteractionCount, sizeof(unsigned int), 0)); CHECK_RESULT(cuMemHostAlloc((void**) &pinnedInteractionCount, sizeof(unsigned int), 0));
pinnedInteractionCount[0] = 0; pinnedInteractionCount[0] = 0;
interactionCount->upload(pinnedInteractionCount); interactionCount->upload(pinnedInteractionCount);
......
real4 exceptionParams = PARAMS[index]; float4 exceptionParams = PARAMS[index];
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z); real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2); real invR = RSQRT(r2);
......
real4 torsionParams = PARAMS[index]; float4 torsionParams = PARAMS[index];
real deltaAngle = torsionParams.z*theta-torsionParams.y; real deltaAngle = torsionParams.z*theta-torsionParams.y;
energy += torsionParams.x*(1.0f+COS(deltaAngle)); energy += torsionParams.x*(1.0f+COS(deltaAngle));
real sinDeltaAngle = SIN(deltaAngle); real sinDeltaAngle = SIN(deltaAngle);
......
real4 torsionParams1 = PARAMS1[index]; float4 torsionParams1 = PARAMS1[index];
real2 torsionParams2 = PARAMS2[index]; float2 torsionParams2 = PARAMS2[index];
if (theta < 0) if (theta < 0)
theta += PI; theta += PI;
else else
......
...@@ -476,30 +476,46 @@ void testBlockInteractions(bool periodic) { ...@@ -476,30 +476,46 @@ void testBlockInteractions(bool periodic) {
// Verify that the bounds of each block were calculated correctly. // Verify that the bounds of each block were calculated correctly.
vector<float4> posq(cuContext.getPosq().getSize()); vector<double4> posq(cuContext.getPosq().getSize());
cuContext.getPosq().download(posq); vector<double4> blockCenters(numBlocks);
vector<float4> blockCenters(numBlocks); vector<double4> blockBoundingBoxes(numBlocks);
vector<float4> blockBoundingBoxes(numBlocks); if (cuContext.getUseDoublePrecision()) {
nb.getBlockCenters().download(blockCenters); cuContext.getPosq().download(posq);
nb.getBlockBoundingBoxes().download(blockBoundingBoxes); nb.getBlockCenters().download(blockCenters);
nb.getBlockBoundingBoxes().download(blockBoundingBoxes);
}
else {
vector<float4> posqf(cuContext.getPosq().getSize());
vector<float4> blockCentersf(numBlocks);
vector<float4> blockBoundingBoxesf(numBlocks);
cuContext.getPosq().download(posqf);
nb.getBlockCenters().download(blockCentersf);
nb.getBlockBoundingBoxes().download(blockBoundingBoxesf);
for (int i = 0; i < numParticles; i++)
posq[i] = make_double4(posqf[i].x, posqf[i].y, posqf[i].z, posqf[i].w);
for (int i = 0; i < numBlocks; i++) {
blockCenters[i] = make_double4(blockCentersf[i].x, blockCentersf[i].y, blockCentersf[i].z, blockCentersf[i].w);
blockBoundingBoxes[i] = make_double4(blockBoundingBoxesf[i].x, blockBoundingBoxesf[i].y, blockBoundingBoxesf[i].z, blockBoundingBoxesf[i].w);
}
}
for (int i = 0; i < numBlocks; i++) { for (int i = 0; i < numBlocks; i++) {
float4 gridSize = blockBoundingBoxes[i]; double4 gridSize = blockBoundingBoxes[i];
float4 center = blockCenters[i]; double4 center = blockCenters[i];
if (periodic) { if (periodic) {
ASSERT(gridSize.x < 0.5*boxSize); ASSERT(gridSize.x < 0.5*boxSize);
ASSERT(gridSize.y < 0.5*boxSize); ASSERT(gridSize.y < 0.5*boxSize);
ASSERT(gridSize.z < 0.5*boxSize); ASSERT(gridSize.z < 0.5*boxSize);
} }
float minx = 0.0, maxx = 0.0, miny = 0.0, maxy = 0.0, minz = 0.0, maxz = 0.0, radius = 0.0; double minx = 0.0, maxx = 0.0, miny = 0.0, maxy = 0.0, minz = 0.0, maxz = 0.0, radius = 0.0;
for (int j = 0; j < blockSize; j++) { for (int j = 0; j < blockSize; j++) {
float4 pos = posq[i*blockSize+j]; double4 pos = posq[i*blockSize+j];
float dx = pos.x-center.x; double dx = pos.x-center.x;
float dy = pos.y-center.y; double dy = pos.y-center.y;
float dz = pos.z-center.z; double dz = pos.z-center.z;
if (periodic) { if (periodic) {
dx -= (float)(floor(0.5+dx/boxSize)*boxSize); dx -= floor(0.5+dx/boxSize)*boxSize;
dy -= (float)(floor(0.5+dy/boxSize)*boxSize); dy -= floor(0.5+dy/boxSize)*boxSize;
dz -= (float)(floor(0.5+dz/boxSize)*boxSize); dz -= floor(0.5+dz/boxSize)*boxSize;
} }
ASSERT(abs(dx) < gridSize.x+TOL); ASSERT(abs(dx) < gridSize.x+TOL);
ASSERT(abs(dy) < gridSize.y+TOL); ASSERT(abs(dy) < gridSize.y+TOL);
...@@ -538,21 +554,21 @@ void testBlockInteractions(bool periodic) { ...@@ -538,21 +554,21 @@ void testBlockInteractions(bool periodic) {
// Make sure this tile really should have been flagged based on bounding volumes. // Make sure this tile really should have been flagged based on bounding volumes.
float4 gridSize1 = blockBoundingBoxes[x]; double4 gridSize1 = blockBoundingBoxes[x];
float4 gridSize2 = blockBoundingBoxes[y]; double4 gridSize2 = blockBoundingBoxes[y];
float4 center1 = blockCenters[x]; double4 center1 = blockCenters[x];
float4 center2 = blockCenters[y]; double4 center2 = blockCenters[y];
float dx = center1.x-center2.x; double dx = center1.x-center2.x;
float dy = center1.y-center2.y; double dy = center1.y-center2.y;
float dz = center1.z-center2.z; double dz = center1.z-center2.z;
if (periodic) { if (periodic) {
dx -= (float)(floor(0.5+dx/boxSize)*boxSize); dx -= floor(0.5+dx/boxSize)*boxSize;
dy -= (float)(floor(0.5+dy/boxSize)*boxSize); dy -= floor(0.5+dy/boxSize)*boxSize;
dz -= (float)(floor(0.5+dz/boxSize)*boxSize); dz -= floor(0.5+dz/boxSize)*boxSize;
} }
dx = max(0.0f, abs(dx)-gridSize1.x-gridSize2.x); dx = max(0.0, abs(dx)-gridSize1.x-gridSize2.x);
dy = max(0.0f, abs(dy)-gridSize1.y-gridSize2.y); dy = max(0.0, abs(dy)-gridSize1.y-gridSize2.y);
dz = max(0.0f, abs(dz)-gridSize1.z-gridSize2.z); dz = max(0.0, abs(dz)-gridSize1.z-gridSize2.z);
ASSERT(sqrt(dx*dx+dy*dy+dz*dz) < cutoff+TOL); ASSERT(sqrt(dx*dx+dy*dy+dz*dz) < cutoff+TOL);
// Check the interaction flags. // Check the interaction flags.
...@@ -560,16 +576,16 @@ void testBlockInteractions(bool periodic) { ...@@ -560,16 +576,16 @@ void testBlockInteractions(bool periodic) {
unsigned int flags = interactionFlags[i]; unsigned int flags = interactionFlags[i];
for (int atom2 = 0; atom2 < 32; atom2++) { for (int atom2 = 0; atom2 < 32; atom2++) {
if ((flags & 1) == 0) { if ((flags & 1) == 0) {
float4 pos2 = posq[y*blockSize+atom2]; double4 pos2 = posq[y*blockSize+atom2];
for (int atom1 = 0; atom1 < blockSize; ++atom1) { for (int atom1 = 0; atom1 < blockSize; ++atom1) {
float4 pos1 = posq[x*blockSize+atom1]; double4 pos1 = posq[x*blockSize+atom1];
float dx = pos2.x-pos1.x; double dx = pos2.x-pos1.x;
float dy = pos2.y-pos1.y; double dy = pos2.y-pos1.y;
float dz = pos2.z-pos1.z; double dz = pos2.z-pos1.z;
if (periodic) { if (periodic) {
dx -= (float)(floor(0.5+dx/boxSize)*boxSize); dx -= floor(0.5+dx/boxSize)*boxSize;
dy -= (float)(floor(0.5+dy/boxSize)*boxSize); dy -= floor(0.5+dy/boxSize)*boxSize;
dz -= (float)(floor(0.5+dz/boxSize)*boxSize); dz -= floor(0.5+dz/boxSize)*boxSize;
} }
ASSERT(dx*dx+dy*dy+dz*dz > cutoff*cutoff); ASSERT(dx*dx+dy*dy+dz*dz > cutoff*cutoff);
} }
...@@ -585,16 +601,16 @@ void testBlockInteractions(bool periodic) { ...@@ -585,16 +601,16 @@ void testBlockInteractions(bool periodic) {
unsigned int y = (unsigned int) std::floor(numBlocks+0.5-std::sqrt((numBlocks+0.5)*(numBlocks+0.5)-2*i)); unsigned int y = (unsigned int) std::floor(numBlocks+0.5-std::sqrt((numBlocks+0.5)*(numBlocks+0.5)-2*i));
unsigned int x = (i-y*numBlocks+y*(y+1)/2); unsigned int x = (i-y*numBlocks+y*(y+1)/2);
for (int atom1 = 0; atom1 < blockSize; ++atom1) { for (int atom1 = 0; atom1 < blockSize; ++atom1) {
float4 pos1 = posq[x*blockSize+atom1]; double4 pos1 = posq[x*blockSize+atom1];
for (int atom2 = 0; atom2 < blockSize; ++atom2) { for (int atom2 = 0; atom2 < blockSize; ++atom2) {
float4 pos2 = posq[y*blockSize+atom2]; double4 pos2 = posq[y*blockSize+atom2];
float dx = pos1.x-pos2.x; double dx = pos1.x-pos2.x;
float dy = pos1.y-pos2.y; double dy = pos1.y-pos2.y;
float dz = pos1.z-pos2.z; double dz = pos1.z-pos2.z;
if (periodic) { if (periodic) {
dx -= (float)(floor(0.5+dx/boxSize)*boxSize); dx -= floor(0.5+dx/boxSize)*boxSize;
dy -= (float)(floor(0.5+dy/boxSize)*boxSize); dy -= floor(0.5+dy/boxSize)*boxSize;
dz -= (float)(floor(0.5+dz/boxSize)*boxSize); dz -= floor(0.5+dz/boxSize)*boxSize;
} }
ASSERT(dx*dx+dy*dy+dz*dz > cutoff*cutoff); ASSERT(dx*dx+dy*dy+dz*dz > cutoff*cutoff);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment