"platforms/cuda/vscode:/vscode.git/clone" did not exist on "aa6abbbe49da513f85e2dfcbe05dfdb52ad02878"
Commit 490cc912 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed lots of bugs found by mixed/double precision test cases

parent 3c8adf0c
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009 Stanford University and the Authors. * * Portions copyright (c) 2009-2012 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
......
...@@ -4446,7 +4446,7 @@ void CudaIntegrateCustomStepKernel::initialize(const System& system, const Custo ...@@ -4446,7 +4446,7 @@ void CudaIntegrateCustomStepKernel::initialize(const System& system, const Custo
globalValues = new CudaArray(cu, max(1, numGlobalVariables), elementSize, "globalVariables"); globalValues = new CudaArray(cu, max(1, numGlobalVariables), elementSize, "globalVariables");
sumBuffer = new CudaArray(cu, 3*system.getNumParticles(), elementSize, "sumBuffer"); sumBuffer = new CudaArray(cu, 3*system.getNumParticles(), elementSize, "sumBuffer");
potentialEnergy = new CudaArray(cu, 1, cu.getEnergyBuffer().getElementSize(), "potentialEnergy"); potentialEnergy = new CudaArray(cu, 1, cu.getEnergyBuffer().getElementSize(), "potentialEnergy");
kineticEnergy = new CudaArray(cu, 1, cu.getEnergyBuffer().getElementSize(), "kineticEnergy"); kineticEnergy = new CudaArray(cu, 1, elementSize, "kineticEnergy");
perDofValues = new CudaParameterSet(cu, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables", false, cu.getUseDoublePrecision() || cu.getUseMixedPrecision()); perDofValues = new CudaParameterSet(cu, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables", false, cu.getUseDoublePrecision() || cu.getUseMixedPrecision());
cu.addReorderListener(new ReorderListener(cu, *perDofValues, localPerDofValuesFloat, localPerDofValuesDouble, deviceValuesAreCurrent)); cu.addReorderListener(new ReorderListener(cu, *perDofValues, localPerDofValuesFloat, localPerDofValuesDouble, deviceValuesAreCurrent));
prevStepSize = -1.0; prevStepSize = -1.0;
...@@ -4840,6 +4840,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -4840,6 +4840,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args << ", " << buffer.getType() << "* __restrict__ " << valueName; args << ", " << buffer.getType() << "* __restrict__ " << valueName;
} }
replacements["PARAMETER_ARGUMENTS"] = args.str(); replacements["PARAMETER_ARGUMENTS"] = args.str();
defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms);
if (defines.find("LOAD_POS_AS_DELTA") != defines.end()) if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
defines.erase("LOAD_POS_AS_DELTA"); defines.erase("LOAD_POS_AS_DELTA");
module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorPerDof, replacements), defines); module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorPerDof, replacements), defines);
......
...@@ -942,7 +942,7 @@ void OpenCLIntegrationUtilities::loadCheckpoint(istream& stream) { ...@@ -942,7 +942,7 @@ void OpenCLIntegrationUtilities::loadCheckpoint(istream& stream) {
double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) { double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) {
int numParticles = context.getNumAtoms(); int numParticles = context.getNumAtoms();
double energy = 0.0; double energy = 0.0;
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) { if (context.getUseDoublePrecision()) {
mm_double4* force = (mm_double4*) context.getPinnedBuffer(); mm_double4* force = (mm_double4*) context.getPinnedBuffer();
context.getForce().download(force); context.getForce().download(force);
vector<mm_double4> velm; vector<mm_double4> velm;
...@@ -958,6 +958,22 @@ double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) { ...@@ -958,6 +958,22 @@ double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) {
} }
} }
} }
else if (context.getUseMixedPrecision()) {
mm_float4* force = (mm_float4*) context.getPinnedBuffer();
context.getForce().download(force);
vector<mm_double4> velm;
context.getVelm().download(velm);
for (int i = 0; i < numParticles; i++) {
mm_double4 v = velm[i];
if (v.w != 0) {
double scale = timeShift*v.w;
v.x += scale*force[i].x;
v.y += scale*force[i].y;
v.z += scale*force[i].z;
energy += (v.x*v.x+v.y*v.y+v.z*v.z)/v.w;
}
}
}
else { else {
mm_float4* force = (mm_float4*) context.getPinnedBuffer(); mm_float4* force = (mm_float4*) context.getPinnedBuffer();
context.getForce().download(force); context.getForce().download(force);
......
...@@ -4706,7 +4706,7 @@ void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const Cus ...@@ -4706,7 +4706,7 @@ void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const Cus
int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
globalValues = new OpenCLArray(cl, max(1, numGlobalVariables), elementSize, "globalVariables"); globalValues = new OpenCLArray(cl, max(1, numGlobalVariables), elementSize, "globalVariables");
sumBuffer = new OpenCLArray(cl, 3*system.getNumParticles(), elementSize, "sumBuffer"); sumBuffer = new OpenCLArray(cl, 3*system.getNumParticles(), elementSize, "sumBuffer");
potentialEnergy = new OpenCLArray(cl, 1, elementSize, "potentialEnergy"); potentialEnergy = new OpenCLArray(cl, 1, cl.getEnergyBuffer().getElementSize(), "potentialEnergy");
kineticEnergy = new OpenCLArray(cl, 1, elementSize, "kineticEnergy"); kineticEnergy = new OpenCLArray(cl, 1, elementSize, "kineticEnergy");
perDofValues = new OpenCLParameterSet(cl, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables", false, cl.getUseDoublePrecision() || cl.getUseMixedPrecision()); perDofValues = new OpenCLParameterSet(cl, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables", false, cl.getUseDoublePrecision() || cl.getUseMixedPrecision());
cl.addReorderListener(new ReorderListener(cl, *perDofValues, localPerDofValuesFloat, localPerDofValuesDouble, deviceValuesAreCurrent)); cl.addReorderListener(new ReorderListener(cl, *perDofValues, localPerDofValuesFloat, localPerDofValuesDouble, deviceValuesAreCurrent));
......
...@@ -473,30 +473,46 @@ void testBlockInteractions(bool periodic) { ...@@ -473,30 +473,46 @@ void testBlockInteractions(bool periodic) {
// Verify that the bounds of each block were calculated correctly. // Verify that the bounds of each block were calculated correctly.
vector<mm_float4> posq; vector<mm_double4> posq(clcontext.getPosq().getSize());
vector<mm_double4> blockCenters(numBlocks);
vector<mm_double4> blockBoundingBoxes(numBlocks);
if (clcontext.getUseDoublePrecision()) {
clcontext.getPosq().download(posq); clcontext.getPosq().download(posq);
vector<mm_float4> blockCenters(numBlocks);
vector<mm_float4> blockBoundingBoxes(numBlocks);
nb.getBlockCenters().download(blockCenters); nb.getBlockCenters().download(blockCenters);
nb.getBlockBoundingBoxes().download(blockBoundingBoxes); nb.getBlockBoundingBoxes().download(blockBoundingBoxes);
}
else {
vector<mm_float4> posqf(clcontext.getPosq().getSize());
vector<mm_float4> blockCentersf(numBlocks);
vector<mm_float4> blockBoundingBoxesf(numBlocks);
clcontext.getPosq().download(posqf);
nb.getBlockCenters().download(blockCentersf);
nb.getBlockBoundingBoxes().download(blockBoundingBoxesf);
for (int i = 0; i < numParticles; i++)
posq[i] = mm_double4(posqf[i].x, posqf[i].y, posqf[i].z, posqf[i].w);
for (int i = 0; i < numBlocks; i++) {
blockCenters[i] = mm_double4(blockCentersf[i].x, blockCentersf[i].y, blockCentersf[i].z, blockCentersf[i].w);
blockBoundingBoxes[i] = mm_double4(blockBoundingBoxesf[i].x, blockBoundingBoxesf[i].y, blockBoundingBoxesf[i].z, blockBoundingBoxesf[i].w);
}
}
for (int i = 0; i < numBlocks; i++) { for (int i = 0; i < numBlocks; i++) {
mm_float4 gridSize = blockBoundingBoxes[i]; mm_double4 gridSize = blockBoundingBoxes[i];
mm_float4 center = blockCenters[i]; mm_double4 center = blockCenters[i];
if (periodic) { if (periodic) {
ASSERT(gridSize.x < 0.5*boxSize); ASSERT(gridSize.x < 0.5*boxSize);
ASSERT(gridSize.y < 0.5*boxSize); ASSERT(gridSize.y < 0.5*boxSize);
ASSERT(gridSize.z < 0.5*boxSize); ASSERT(gridSize.z < 0.5*boxSize);
} }
float minx = 0.0, maxx = 0.0, miny = 0.0, maxy = 0.0, minz = 0.0, maxz = 0.0, radius = 0.0; double minx = 0.0, maxx = 0.0, miny = 0.0, maxy = 0.0, minz = 0.0, maxz = 0.0, radius = 0.0;
for (int j = 0; j < blockSize; j++) { for (int j = 0; j < blockSize; j++) {
mm_float4 pos = posq[i*blockSize+j]; mm_double4 pos = posq[i*blockSize+j];
float dx = pos.x-center.x; double dx = pos.x-center.x;
float dy = pos.y-center.y; double dy = pos.y-center.y;
float dz = pos.z-center.z; double dz = pos.z-center.z;
if (periodic) { if (periodic) {
dx -= (float)(floor(0.5+dx/boxSize)*boxSize); dx -= floor(0.5+dx/boxSize)*boxSize;
dy -= (float)(floor(0.5+dy/boxSize)*boxSize); dy -= floor(0.5+dy/boxSize)*boxSize;
dz -= (float)(floor(0.5+dz/boxSize)*boxSize); dz -= floor(0.5+dz/boxSize)*boxSize;
} }
ASSERT(abs(dx) < gridSize.x+TOL); ASSERT(abs(dx) < gridSize.x+TOL);
ASSERT(abs(dy) < gridSize.y+TOL); ASSERT(abs(dy) < gridSize.y+TOL);
...@@ -538,21 +554,21 @@ void testBlockInteractions(bool periodic) { ...@@ -538,21 +554,21 @@ void testBlockInteractions(bool periodic) {
// Make sure this tile really should have been flagged based on bounding volumes. // Make sure this tile really should have been flagged based on bounding volumes.
mm_float4 gridSize1 = blockBoundingBoxes[x]; mm_double4 gridSize1 = blockBoundingBoxes[x];
mm_float4 gridSize2 = blockBoundingBoxes[y]; mm_double4 gridSize2 = blockBoundingBoxes[y];
mm_float4 center1 = blockCenters[x]; mm_double4 center1 = blockCenters[x];
mm_float4 center2 = blockCenters[y]; mm_double4 center2 = blockCenters[y];
float dx = center1.x-center2.x; double dx = center1.x-center2.x;
float dy = center1.y-center2.y; double dy = center1.y-center2.y;
float dz = center1.z-center2.z; double dz = center1.z-center2.z;
if (periodic) { if (periodic) {
dx -= (float)(floor(0.5+dx/boxSize)*boxSize); dx -= floor(0.5+dx/boxSize)*boxSize;
dy -= (float)(floor(0.5+dy/boxSize)*boxSize); dy -= floor(0.5+dy/boxSize)*boxSize;
dz -= (float)(floor(0.5+dz/boxSize)*boxSize); dz -= floor(0.5+dz/boxSize)*boxSize;
} }
dx = max(0.0f, abs(dx)-gridSize1.x-gridSize2.x); dx = max(0.0, abs(dx)-gridSize1.x-gridSize2.x);
dy = max(0.0f, abs(dy)-gridSize1.y-gridSize2.y); dy = max(0.0, abs(dy)-gridSize1.y-gridSize2.y);
dz = max(0.0f, abs(dz)-gridSize1.z-gridSize2.z); dz = max(0.0, abs(dz)-gridSize1.z-gridSize2.z);
ASSERT(sqrt(dx*dx+dy*dy+dz*dz) < cutoff+TOL); ASSERT(sqrt(dx*dx+dy*dy+dz*dz) < cutoff+TOL);
// Check the interaction flags. // Check the interaction flags.
...@@ -561,16 +577,16 @@ void testBlockInteractions(bool periodic) { ...@@ -561,16 +577,16 @@ void testBlockInteractions(bool periodic) {
unsigned int flags = interactionFlags[i]; unsigned int flags = interactionFlags[i];
for (int atom2 = 0; atom2 < 32; atom2++) { for (int atom2 = 0; atom2 < 32; atom2++) {
if ((flags & 1) == 0) { if ((flags & 1) == 0) {
mm_float4 pos2 = posq[y*blockSize+atom2]; mm_double4 pos2 = posq[y*blockSize+atom2];
for (int atom1 = 0; atom1 < blockSize; ++atom1) { for (int atom1 = 0; atom1 < blockSize; ++atom1) {
mm_float4 pos1 = posq[x*blockSize+atom1]; mm_double4 pos1 = posq[x*blockSize+atom1];
float dx = pos2.x-pos1.x; double dx = pos2.x-pos1.x;
float dy = pos2.y-pos1.y; double dy = pos2.y-pos1.y;
float dz = pos2.z-pos1.z; double dz = pos2.z-pos1.z;
if (periodic) { if (periodic) {
dx -= (float)(floor(0.5+dx/boxSize)*boxSize); dx -= floor(0.5+dx/boxSize)*boxSize;
dy -= (float)(floor(0.5+dy/boxSize)*boxSize); dy -= floor(0.5+dy/boxSize)*boxSize;
dz -= (float)(floor(0.5+dz/boxSize)*boxSize); dz -= floor(0.5+dz/boxSize)*boxSize;
} }
ASSERT(dx*dx+dy*dy+dz*dz > cutoff*cutoff); ASSERT(dx*dx+dy*dy+dz*dz > cutoff*cutoff);
} }
...@@ -587,16 +603,16 @@ void testBlockInteractions(bool periodic) { ...@@ -587,16 +603,16 @@ void testBlockInteractions(bool periodic) {
unsigned int y = (unsigned int) std::floor(numBlocks+0.5-std::sqrt((numBlocks+0.5)*(numBlocks+0.5)-2*i)); unsigned int y = (unsigned int) std::floor(numBlocks+0.5-std::sqrt((numBlocks+0.5)*(numBlocks+0.5)-2*i));
unsigned int x = (i-y*numBlocks+y*(y+1)/2); unsigned int x = (i-y*numBlocks+y*(y+1)/2);
for (int atom1 = 0; atom1 < blockSize; ++atom1) { for (int atom1 = 0; atom1 < blockSize; ++atom1) {
mm_float4 pos1 = posq[x*blockSize+atom1]; mm_double4 pos1 = posq[x*blockSize+atom1];
for (int atom2 = 0; atom2 < blockSize; ++atom2) { for (int atom2 = 0; atom2 < blockSize; ++atom2) {
mm_float4 pos2 = posq[y*blockSize+atom2]; mm_double4 pos2 = posq[y*blockSize+atom2];
float dx = pos1.x-pos2.x; double dx = pos1.x-pos2.x;
float dy = pos1.y-pos2.y; double dy = pos1.y-pos2.y;
float dz = pos1.z-pos2.z; double dz = pos1.z-pos2.z;
if (periodic) { if (periodic) {
dx -= (float)(floor(0.5+dx/boxSize)*boxSize); dx -= (floor(0.5+dx/boxSize)*boxSize);
dy -= (float)(floor(0.5+dy/boxSize)*boxSize); dy -= (floor(0.5+dy/boxSize)*boxSize);
dz -= (float)(floor(0.5+dz/boxSize)*boxSize); dz -= (floor(0.5+dz/boxSize)*boxSize);
} }
ASSERT(dx*dx+dy*dy+dz*dz > cutoff*cutoff); ASSERT(dx*dx+dy*dy+dz*dz > cutoff*cutoff);
} }
......
...@@ -187,7 +187,7 @@ __device__ void computeOneInteractionB1B2(AtomData2& atom1, volatile AtomData2& ...@@ -187,7 +187,7 @@ __device__ void computeOneInteractionB1B2(AtomData2& atom1, volatile AtomData2&
inline __device__ void loadAtomData2(AtomData2& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole, inline __device__ void loadAtomData2(AtomData2& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const real* __restrict__ bornRadius) { const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const real* __restrict__ bornRadius) {
float4 atomPosq = posq[atom]; real4 atomPosq = posq[atom];
data.pos = trimTo3(atomPosq); data.pos = trimTo3(atomPosq);
data.q = atomPosq.w; data.q = atomPosq.w;
data.dipole.x = labFrameDipole[atom*3]; data.dipole.x = labFrameDipole[atom*3];
...@@ -412,7 +412,7 @@ inline __device__ void loadAtomData3(AtomData3& data, int atom, const real4* __r ...@@ -412,7 +412,7 @@ inline __device__ void loadAtomData3(AtomData3& data, int atom, const real4* __r
__device__ void computeBornChainRuleInteraction(AtomData3& atom1, AtomData3& atom2, real3& force) { __device__ void computeBornChainRuleInteraction(AtomData3& atom1, AtomData3& atom2, real3& force) {
real third = 1/(real) 3; real third = 1/(real) 3;
real pi43 = 4*third*M_PI; real pi43 = 4*third*M_PI;
real factor = -POW(M_PI, third)*POW(6, 2/(real) 3)/9; real factor = -POW(M_PI, third)*POW((real) 6, 2/(real) 3)/9;
real term = pi43/(atom1.bornRadius*atom1.bornRadius*atom1.bornRadius); real term = pi43/(atom1.bornRadius*atom1.bornRadius*atom1.bornRadius);
term = factor/POW(term, 4/(real) 3); term = factor/POW(term, 4/(real) 3);
......
...@@ -45,7 +45,7 @@ using namespace OpenMM; ...@@ -45,7 +45,7 @@ using namespace OpenMM;
extern "C" void registerAmoebaCudaKernelFactories(); extern "C" void registerAmoebaCudaKernelFactories();
const double TOL = 1e-5; const double TOL = 1e-4;
#define PI_M 3.141592653589 #define PI_M 3.141592653589
#define RADIAN 57.29577951308 #define RADIAN 57.29577951308
const double DegreesToRadians = PI_M/180.0; const double DegreesToRadians = PI_M/180.0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment