Commit 195bcecf authored by Peter Eastman's avatar Peter Eastman
Browse files

Began implementing OpenCL support for triclinic boxes

parent 17c06daa
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2013 Stanford University and the Authors. * * Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -448,36 +448,65 @@ public: ...@@ -448,36 +448,65 @@ public:
/** /**
* Get whether the device being used supports 64 bit atomic operations on global memory. * Get whether the device being used supports 64 bit atomic operations on global memory.
*/ */
bool getSupports64BitGlobalAtomics() { bool getSupports64BitGlobalAtomics() const {
return supports64BitGlobalAtomics; return supports64BitGlobalAtomics;
} }
/** /**
* Get whether the device being used supports double precision math. * Get whether the device being used supports double precision math.
*/ */
bool getSupportsDoublePrecision() { bool getSupportsDoublePrecision() const {
return supportsDoublePrecision; return supportsDoublePrecision;
} }
/** /**
* Get whether double precision is being used. * Get whether double precision is being used.
*/ */
bool getUseDoublePrecision() { bool getUseDoublePrecision() const {
return useDoublePrecision; return useDoublePrecision;
} }
/** /**
* Get whether mixed precision is being used. * Get whether mixed precision is being used.
*/ */
bool getUseMixedPrecision() { bool getUseMixedPrecision() const {
return useMixedPrecision; return useMixedPrecision;
} }
/**
* Get whether the periodic box is triclinic.
*/
bool getBoxIsTriclinic() const {
return boxIsTriclinic;
}
/** /**
* Convert a number to a string in a format suitable for including in a kernel. * Convert a number to a string in a format suitable for including in a kernel.
* This takes into account whether the context uses single or double precision. * This takes into account whether the context uses single or double precision.
*/ */
std::string doubleToString(double value); std::string doubleToString(double value) const;
/** /**
* Convert a number to a string in a format suitable for including in a kernel. * Convert a number to a string in a format suitable for including in a kernel.
*/ */
std::string intToString(int value); std::string intToString(int value) const;
/**
* Get the vectors defining the periodic box.
*/
void getPeriodicBoxVectors(Vec3& a, Vec3& b, Vec3& c) const {
a = Vec3(periodicBoxVecXDouble.x, periodicBoxVecXDouble.y, periodicBoxVecXDouble.z);
b = Vec3(periodicBoxVecYDouble.x, periodicBoxVecYDouble.y, periodicBoxVecYDouble.z);
c = Vec3(periodicBoxVecZDouble.x, periodicBoxVecZDouble.y, periodicBoxVecZDouble.z);
}
/**
* Set the vectors defining the periodic box.
*/
void setPeriodicBoxVectors(const Vec3& a, const Vec3& b, const Vec3& c) {
periodicBoxVecX = mm_float4((float) a[0], (float) a[1], (float) a[2], 0.0f);
periodicBoxVecY = mm_float4((float) b[0], (float) b[1], (float) b[2], 0.0f);
periodicBoxVecZ = mm_float4((float) c[0], (float) c[1], (float) c[2], 0.0f);
periodicBoxVecXDouble = mm_double4(a[0], a[1], a[2], 0.0);
periodicBoxVecYDouble = mm_double4(b[0], b[1], b[2], 0.0);
periodicBoxVecZDouble = mm_double4(c[0], c[1], c[2], 0.0);
periodicBoxSize = mm_float4((float) a[0], (float) b[1], (float) c[2], 0.0f);
invPeriodicBoxSize = mm_float4(1.0f/(float) a[0], 1.0f/(float) b[1], 1.0f/(float) c[2], 0.0f);
periodicBoxSizeDouble = mm_double4(a[0], b[1], c[2], 0.0);
invPeriodicBoxSizeDouble = mm_double4(1.0/a[0], 1.0/b[1], 1.0/c[2], 0.0);
}
/** /**
* Get the size of the periodic box. * Get the size of the periodic box.
*/ */
...@@ -490,15 +519,6 @@ public: ...@@ -490,15 +519,6 @@ public:
mm_double4 getPeriodicBoxSizeDouble() const { mm_double4 getPeriodicBoxSizeDouble() const {
return periodicBoxSizeDouble; return periodicBoxSizeDouble;
} }
/**
* Set the size of the periodic box.
*/
void setPeriodicBoxSize(double xsize, double ysize, double zsize) {
periodicBoxSize = mm_float4((float) xsize, (float) ysize, (float) zsize, 0);
invPeriodicBoxSize = mm_float4((float) (1.0/xsize), (float) (1.0/ysize), (float) (1.0/zsize), 0);
periodicBoxSizeDouble = mm_double4(xsize, ysize, zsize, 0);
invPeriodicBoxSizeDouble = mm_double4(1.0/xsize, 1.0/ysize, 1.0/zsize, 0);
}
/** /**
* Get the inverse of the size of the periodic box. * Get the inverse of the size of the periodic box.
*/ */
...@@ -511,6 +531,42 @@ public: ...@@ -511,6 +531,42 @@ public:
mm_double4 getInvPeriodicBoxSizeDouble() const { mm_double4 getInvPeriodicBoxSizeDouble() const {
return invPeriodicBoxSizeDouble; return invPeriodicBoxSizeDouble;
} }
/**
* Get the first periodic box vector.
*/
mm_float4 getPeriodicBoxVecX() {
return periodicBoxVecX;
}
/**
* Get the first periodic box vector.
*/
mm_double4 getPeriodicBoxVecXDouble() {
return periodicBoxVecXDouble;
}
/**
* Get the second periodic box vector.
*/
mm_float4 getPeriodicBoxVecY() {
return periodicBoxVecY;
}
/**
* Get the second periodic box vector.
*/
mm_double4 getPeriodicBoxVecYDouble() {
return periodicBoxVecYDouble;
}
/**
* Get the third periodic box vector.
*/
mm_float4 getPeriodicBoxVecZ() {
return periodicBoxVecZ;
}
/**
* Get the third periodic box vector.
*/
mm_double4 getPeriodicBoxVecZDouble() {
return periodicBoxVecZDouble;
}
/** /**
* Get the OpenCLIntegrationUtilities for this context. * Get the OpenCLIntegrationUtilities for this context.
*/ */
...@@ -628,9 +684,9 @@ private: ...@@ -628,9 +684,9 @@ private:
int numThreadBlocks; int numThreadBlocks;
int numForceBuffers; int numForceBuffers;
int simdWidth; int simdWidth;
bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered; bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered, boxIsTriclinic;
mm_float4 periodicBoxSize, invPeriodicBoxSize; mm_float4 periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ;
mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble; mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble, periodicBoxVecXDouble, periodicBoxVecYDouble, periodicBoxVecZDouble;
std::string defaultOptimizationOptions; std::string defaultOptimizationOptions;
std::map<std::string, std::string> compilationDefines; std::map<std::string, std::string> compilationDefines;
cl::Context context; cl::Context context;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2013 Stanford University and the Authors. * * Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -336,6 +336,54 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -336,6 +336,54 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines["LOG"] = "log"; compilationDefines["LOG"] = "log";
} }
// Set defines for applying periodic boundary conditions.
Vec3 boxVectors[3];
system.getDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
boxIsTriclinic = (boxVectors[0][1] != 0.0 || boxVectors[0][2] != 0.0 ||
boxVectors[1][0] != 0.0 || boxVectors[1][2] != 0.0 ||
boxVectors[2][0] != 0.0 || boxVectors[2][1] != 0.0);
if (boxIsTriclinic) {
compilationDefines["APPLY_PERIODIC_TO_DELTA(delta)"] =
"{"
"real scale3 = floor(delta.z*invPeriodicBoxSize.z+0.5f); \\\n"
"delta.xyz -= scale3*periodicBoxVecZ.xyz; \\\n"
"real scale2 = floor(delta.y*invPeriodicBoxSize.y+0.5f); \\\n"
"delta.xy -= scale2*periodicBoxVecY.xy; \\\n"
"real scale1 = floor(delta.x*invPeriodicBoxSize.x+0.5f); \\\n"
"delta.x -= scale1*periodicBoxVecX.x;}";
compilationDefines["APPLY_PERIODIC_TO_POS(pos)"] =
"{"
"real scale3 = floor(pos.z*invPeriodicBoxSize.z); \\\n"
"pos.xyz -= scale3*periodicBoxVecZ.xyz; \\\n"
"real scale2 = floor(pos.y*invPeriodicBoxSize.y); \\\n"
"pos.xy -= scale2*periodicBoxVecY.xy; \\\n"
"real scale1 = floor(pos.x*invPeriodicBoxSize.x); \\\n"
"pos.x -= scale1*periodicBoxVecX.x;}";
compilationDefines["APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)"] =
"{"
"real scale3 = floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f); \\\n"
"pos.x -= scale3*periodicBoxVecZ.x; \\\n"
"pos.y -= scale3*periodicBoxVecZ.y; \\\n"
"pos.z -= scale3*periodicBoxVecZ.z; \\\n"
"real scale2 = floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f); \\\n"
"pos.x -= scale2*periodicBoxVecY.x; \\\n"
"pos.y -= scale2*periodicBoxVecY.y; \\\n"
"real scale1 = floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f); \\\n"
"pos.x -= scale1*periodicBoxVecX.x;}";
}
else {
compilationDefines["APPLY_PERIODIC_TO_DELTA(delta)"] =
"delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;";
compilationDefines["APPLY_PERIODIC_TO_POS(pos)"] =
"pos.xyz -= floor(pos.xyz*invPeriodicBoxSize.xyz)*periodicBoxSize.xyz;";
compilationDefines["APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)"] =
"{"
"pos.x -= floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; \\\n"
"pos.y -= floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; \\\n"
"pos.z -= floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;}";
}
// Create the work thread used for parallelization when running on multiple devices. // Create the work thread used for parallelization when running on multiple devices.
thread = new WorkThread(); thread = new WorkThread();
...@@ -527,7 +575,7 @@ void OpenCLContext::restoreDefaultQueue() { ...@@ -527,7 +575,7 @@ void OpenCLContext::restoreDefaultQueue() {
currentQueue = defaultQueue; currentQueue = defaultQueue;
} }
string OpenCLContext::doubleToString(double value) { string OpenCLContext::doubleToString(double value) const {
stringstream s; stringstream s;
s.precision(useDoublePrecision ? 16 : 8); s.precision(useDoublePrecision ? 16 : 8);
s << scientific << value; s << scientific << value;
...@@ -536,7 +584,7 @@ string OpenCLContext::doubleToString(double value) { ...@@ -536,7 +584,7 @@ string OpenCLContext::doubleToString(double value) {
return s.str(); return s.str();
} }
string OpenCLContext::intToString(int value) { string OpenCLContext::intToString(int value) const {
stringstream s; stringstream s;
s << value; s << value;
return s.str(); return s.str();
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2014 Stanford University and the Authors. * * Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -77,6 +77,19 @@ static void setInvPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int ...@@ -77,6 +77,19 @@ static void setInvPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int
kernel.setArg<mm_float4>(index, cl.getInvPeriodicBoxSize()); kernel.setArg<mm_float4>(index, cl.getInvPeriodicBoxSize());
} }
static void setPeriodicBoxVecArgs(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision()) {
kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxVecXDouble());
kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxVecYDouble());
kernel.setArg<mm_double4>(index, cl.getPeriodicBoxVecZDouble());
}
else {
kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxVecX());
kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxVecY());
kernel.setArg<mm_float4>(index, cl.getPeriodicBoxVecZ());
}
}
static bool isZeroExpression(const Lepton::ParsedExpression& expression) { static bool isZeroExpression(const Lepton::ParsedExpression& expression) {
const Lepton::Operation& op = expression.getRootNode().getOperation(); const Lepton::Operation& op = expression.getRootNode().getOperation();
if (op.getId() != Lepton::Operation::CONSTANT) if (op.getId() != Lepton::Operation::CONSTANT)
...@@ -323,20 +336,17 @@ void OpenCLUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>& ...@@ -323,20 +336,17 @@ void OpenCLUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>&
} }
void OpenCLUpdateStateDataKernel::getPeriodicBoxVectors(ContextImpl& context, Vec3& a, Vec3& b, Vec3& c) const { void OpenCLUpdateStateDataKernel::getPeriodicBoxVectors(ContextImpl& context, Vec3& a, Vec3& b, Vec3& c) const {
mm_double4 box = cl.getPeriodicBoxSizeDouble(); cl.getPeriodicBoxVectors(a, b, c);
a = Vec3(box.x, 0, 0);
b = Vec3(0, box.y, 0);
c = Vec3(0, 0, box.z);
} }
void OpenCLUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) const { void OpenCLUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) const {
vector<OpenCLContext*>& contexts = cl.getPlatformData().contexts; vector<OpenCLContext*>& contexts = cl.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++) for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->setPeriodicBoxSize(a[0], b[1], c[2]); contexts[i]->setPeriodicBoxVectors(a, b, c);
} }
void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) { void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) {
int version = 1; int version = 2;
stream.write((char*) &version, sizeof(int)); stream.write((char*) &version, sizeof(int));
int precision = (cl.getUseDoublePrecision() ? 2 : cl.getUseMixedPrecision() ? 1 : 0); int precision = (cl.getUseDoublePrecision() ? 2 : cl.getUseMixedPrecision() ? 1 : 0);
stream.write((char*) &precision, sizeof(int)); stream.write((char*) &precision, sizeof(int));
...@@ -357,8 +367,9 @@ void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream ...@@ -357,8 +367,9 @@ void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream
stream.write(buffer, cl.getVelm().getSize()*cl.getVelm().getElementSize()); stream.write(buffer, cl.getVelm().getSize()*cl.getVelm().getElementSize());
stream.write((char*) &cl.getAtomIndex()[0], sizeof(cl_int)*cl.getAtomIndex().size()); stream.write((char*) &cl.getAtomIndex()[0], sizeof(cl_int)*cl.getAtomIndex().size());
stream.write((char*) &cl.getPosCellOffsets()[0], sizeof(mm_int4)*cl.getPosCellOffsets().size()); stream.write((char*) &cl.getPosCellOffsets()[0], sizeof(mm_int4)*cl.getPosCellOffsets().size());
mm_float4 box = cl.getPeriodicBoxSize(); Vec3 boxVectors[3];
stream.write((char*) &box, sizeof(mm_float4)); cl.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
stream.write((char*) boxVectors, 3*sizeof(Vec3));
cl.getIntegrationUtilities().createCheckpoint(stream); cl.getIntegrationUtilities().createCheckpoint(stream);
SimTKOpenMMUtilities::createCheckpoint(stream); SimTKOpenMMUtilities::createCheckpoint(stream);
} }
...@@ -366,7 +377,7 @@ void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream ...@@ -366,7 +377,7 @@ void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream
void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) { void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) {
int version; int version;
stream.read((char*) &version, sizeof(int)); stream.read((char*) &version, sizeof(int));
if (version != 1) if (version != 2)
throw OpenMMException("Checkpoint was created with a different version of OpenMM"); throw OpenMMException("Checkpoint was created with a different version of OpenMM");
int precision; int precision;
stream.read((char*) &precision, sizeof(int)); stream.read((char*) &precision, sizeof(int));
...@@ -396,10 +407,10 @@ void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& ...@@ -396,10 +407,10 @@ void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream&
stream.read((char*) &cl.getAtomIndex()[0], sizeof(cl_int)*cl.getAtomIndex().size()); stream.read((char*) &cl.getAtomIndex()[0], sizeof(cl_int)*cl.getAtomIndex().size());
cl.getAtomIndexArray().upload(cl.getAtomIndex()); cl.getAtomIndexArray().upload(cl.getAtomIndex());
stream.read((char*) &cl.getPosCellOffsets()[0], sizeof(mm_int4)*cl.getPosCellOffsets().size()); stream.read((char*) &cl.getPosCellOffsets()[0], sizeof(mm_int4)*cl.getPosCellOffsets().size());
mm_float4 box; Vec3 boxVectors[3];
stream.read((char*) &box, sizeof(mm_float4)); stream.read((char*) &boxVectors, 3*sizeof(Vec3));
for (int i = 0; i < (int) contexts.size(); i++) for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->setPeriodicBoxSize(box.x, box.y, box.z); contexts[i]->setPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
cl.getIntegrationUtilities().loadCheckpoint(stream); cl.getIntegrationUtilities().loadCheckpoint(stream);
SimTKOpenMMUtilities::loadCheckpoint(stream); SimTKOpenMMUtilities::loadCheckpoint(stream);
for (int i = 0; i < (int) cl.getReorderListeners().size(); i++) for (int i = 0; i < (int) cl.getReorderListeners().size(); i++)
...@@ -6636,9 +6647,9 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, ...@@ -6636,9 +6647,9 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
// Initialize the kernel arguments. // Initialize the kernel arguments.
kernel.setArg<cl_int>(3, numMolecules); kernel.setArg<cl_int>(3, numMolecules);
kernel.setArg<cl::Buffer>(6, cl.getPosq().getDeviceBuffer()); kernel.setArg<cl::Buffer>(9, cl.getPosq().getDeviceBuffer());
kernel.setArg<cl::Buffer>(7, moleculeAtoms->getDeviceBuffer()); kernel.setArg<cl::Buffer>(10, moleculeAtoms->getDeviceBuffer());
kernel.setArg<cl::Buffer>(8, moleculeStartIndex->getDeviceBuffer()); kernel.setArg<cl::Buffer>(11, moleculeStartIndex->getDeviceBuffer());
} }
int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4)); int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueCopyBuffer(cl.getPosq().getDeviceBuffer(), savedPositions->getDeviceBuffer(), 0, 0, bytesToCopy); cl.getQueue().enqueueCopyBuffer(cl.getPosq().getDeviceBuffer(), savedPositions->getDeviceBuffer(), 0, 0, bytesToCopy);
...@@ -6647,6 +6658,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, ...@@ -6647,6 +6658,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
kernel.setArg<cl_float>(2, (cl_float) scaleZ); kernel.setArg<cl_float>(2, (cl_float) scaleZ);
setPeriodicBoxSizeArg(cl, kernel, 4); setPeriodicBoxSizeArg(cl, kernel, 4);
setInvPeriodicBoxSizeArg(cl, kernel, 5); setInvPeriodicBoxSizeArg(cl, kernel, 5);
setPeriodicBoxVecArgs(cl, kernel, 6);
cl.executeKernel(kernel, cl.getNumAtoms()); cl.executeKernel(kernel, cl.getNumAtoms());
for (int i = 0; i < (int) cl.getPosCellOffsets().size(); i++) for (int i = 0; i < (int) cl.getPosCellOffsets().size(); i++)
cl.getPosCellOffsets()[i] = mm_int4(0, 0, 0, 0); cl.getPosCellOffsets()[i] = mm_int4(0, 0, 0, 0);
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2013 Stanford University and the Authors. * * Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -325,11 +325,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -325,11 +325,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
cl::Program interactingBlocksProgram = context.createProgram(file, defines); cl::Program interactingBlocksProgram = context.createProgram(file, defines);
findBlockBoundsKernel = cl::Kernel(interactingBlocksProgram, "findBlockBounds"); findBlockBoundsKernel = cl::Kernel(interactingBlocksProgram, "findBlockBounds");
findBlockBoundsKernel.setArg<cl_int>(0, context.getNumAtoms()); findBlockBoundsKernel.setArg<cl_int>(0, context.getNumAtoms());
findBlockBoundsKernel.setArg<cl::Buffer>(3, context.getPosq().getDeviceBuffer()); findBlockBoundsKernel.setArg<cl::Buffer>(6, context.getPosq().getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(4, blockCenter->getDeviceBuffer()); findBlockBoundsKernel.setArg<cl::Buffer>(7, blockCenter->getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(5, blockBoundingBox->getDeviceBuffer()); findBlockBoundsKernel.setArg<cl::Buffer>(8, blockBoundingBox->getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(6, rebuildNeighborList->getDeviceBuffer()); findBlockBoundsKernel.setArg<cl::Buffer>(9, rebuildNeighborList->getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(7, sortedBlocks->getDeviceBuffer()); findBlockBoundsKernel.setArg<cl::Buffer>(10, sortedBlocks->getDeviceBuffer());
sortBoxDataKernel = cl::Kernel(interactingBlocksProgram, "sortBoxData"); sortBoxDataKernel = cl::Kernel(interactingBlocksProgram, "sortBoxData");
sortBoxDataKernel.setArg<cl::Buffer>(0, sortedBlocks->getDeviceBuffer()); sortBoxDataKernel.setArg<cl::Buffer>(0, sortedBlocks->getDeviceBuffer());
sortBoxDataKernel.setArg<cl::Buffer>(1, blockCenter->getDeviceBuffer()); sortBoxDataKernel.setArg<cl::Buffer>(1, blockCenter->getDeviceBuffer());
...@@ -341,20 +341,20 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -341,20 +341,20 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
sortBoxDataKernel.setArg<cl::Buffer>(7, interactionCount->getDeviceBuffer()); sortBoxDataKernel.setArg<cl::Buffer>(7, interactionCount->getDeviceBuffer());
sortBoxDataKernel.setArg<cl::Buffer>(8, rebuildNeighborList->getDeviceBuffer()); sortBoxDataKernel.setArg<cl::Buffer>(8, rebuildNeighborList->getDeviceBuffer());
findInteractingBlocksKernel = cl::Kernel(interactingBlocksProgram, "findBlocksWithInteractions"); findInteractingBlocksKernel = cl::Kernel(interactingBlocksProgram, "findBlocksWithInteractions");
findInteractingBlocksKernel.setArg<cl::Buffer>(2, interactionCount->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(5, interactionCount->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(3, interactingTiles->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(4, interactingAtoms->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(5, context.getPosq().getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(8, context.getPosq().getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl_uint>(6, interactingTiles->getSize()); findInteractingBlocksKernel.setArg<cl_uint>(9, interactingTiles->getSize());
findInteractingBlocksKernel.setArg<cl_uint>(7, startBlockIndex); findInteractingBlocksKernel.setArg<cl_uint>(10, startBlockIndex);
findInteractingBlocksKernel.setArg<cl_uint>(8, numBlocks); findInteractingBlocksKernel.setArg<cl_uint>(11, numBlocks);
findInteractingBlocksKernel.setArg<cl::Buffer>(9, sortedBlocks->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(12, sortedBlocks->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(10, sortedBlockCenter->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(13, sortedBlockCenter->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(11, sortedBlockBoundingBox->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(14, sortedBlockBoundingBox->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(12, exclusionIndices->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(15, exclusionIndices->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(13, exclusionRowIndices->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(16, exclusionRowIndices->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(14, oldPositions->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(17, oldPositions->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(15, rebuildNeighborList->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(18, rebuildNeighborList->getDeviceBuffer());
if (findInteractingBlocksKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice()) < groupSize) { if (findInteractingBlocksKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice()) < groupSize) {
// The device can't handle this block size, so reduce it. // The device can't handle this block size, so reduce it.
...@@ -369,18 +369,21 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -369,18 +369,21 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
} }
} }
static void setPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int index) { static void setPeriodicBoxArgs(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision()) if (cl.getUseDoublePrecision()) {
kernel.setArg<mm_double4>(index, cl.getPeriodicBoxSizeDouble()); kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxSizeDouble());
else kernel.setArg<mm_double4>(index++, cl.getInvPeriodicBoxSizeDouble());
kernel.setArg<mm_float4>(index, cl.getPeriodicBoxSize()); kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxVecXDouble());
} kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxVecYDouble());
kernel.setArg<mm_double4>(index, cl.getPeriodicBoxVecZDouble());
static void setInvPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int index) { }
if (cl.getUseDoublePrecision()) else {
kernel.setArg<mm_double4>(index, cl.getInvPeriodicBoxSizeDouble()); kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxSize());
else kernel.setArg<mm_float4>(index++, cl.getInvPeriodicBoxSize());
kernel.setArg<mm_float4>(index, cl.getInvPeriodicBoxSize()); kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxVecX());
kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxVecY());
kernel.setArg<mm_float4>(index, cl.getPeriodicBoxVecZ());
}
} }
void OpenCLNonbondedUtilities::prepareInteractions() { void OpenCLNonbondedUtilities::prepareInteractions() {
...@@ -397,22 +400,18 @@ void OpenCLNonbondedUtilities::prepareInteractions() { ...@@ -397,22 +400,18 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
// Compute the neighbor list. // Compute the neighbor list.
setPeriodicBoxSizeArg(context, findBlockBoundsKernel, 1); setPeriodicBoxArgs(context, findBlockBoundsKernel, 1);
setInvPeriodicBoxSizeArg(context, findBlockBoundsKernel, 2);
context.executeKernel(findBlockBoundsKernel, context.getNumAtoms()); context.executeKernel(findBlockBoundsKernel, context.getNumAtoms());
blockSorter->sort(*sortedBlocks); blockSorter->sort(*sortedBlocks);
context.executeKernel(sortBoxDataKernel, context.getNumAtoms()); context.executeKernel(sortBoxDataKernel, context.getNumAtoms());
setPeriodicBoxSizeArg(context, findInteractingBlocksKernel, 0); setPeriodicBoxArgs(context, findInteractingBlocksKernel, 0);
setInvPeriodicBoxSizeArg(context, findInteractingBlocksKernel, 1);
context.executeKernel(findInteractingBlocksKernel, context.getNumAtoms(), interactingBlocksThreadBlockSize); context.executeKernel(findInteractingBlocksKernel, context.getNumAtoms(), interactingBlocksThreadBlockSize);
} }
void OpenCLNonbondedUtilities::computeInteractions() { void OpenCLNonbondedUtilities::computeInteractions() {
if (kernelSource.size() > 0) { if (kernelSource.size() > 0) {
if (useCutoff) { if (useCutoff)
setPeriodicBoxSizeArg(context, forceKernel, 9); setPeriodicBoxArgs(context, forceKernel, 9);
setInvPeriodicBoxSizeArg(context, forceKernel, 10);
}
context.executeKernel(forceKernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); context.executeKernel(forceKernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
if (context.getComputeForceCount() == 1) if (context.getComputeForceCount() == 1)
updateNeighborListSize(); // This is the first time step, so check whether our initial guess was large enough. updateNeighborListSize(); // This is the first time step, so check whether our initial guess was large enough.
...@@ -441,11 +440,11 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() { ...@@ -441,11 +440,11 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
interactingTiles = OpenCLArray::create<cl_int>(context, maxTiles, "interactingTiles"); interactingTiles = OpenCLArray::create<cl_int>(context, maxTiles, "interactingTiles");
interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms"); interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms");
forceKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer());
forceKernel.setArg<cl_uint>(11, maxTiles); forceKernel.setArg<cl_uint>(14, maxTiles);
forceKernel.setArg<cl::Buffer>(14, interactingAtoms->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(17, interactingAtoms->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(3, interactingTiles->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(4, interactingAtoms->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl_uint>(6, maxTiles); findInteractingBlocksKernel.setArg<cl_uint>(9, maxTiles);
int numAtoms = context.getNumAtoms(); int numAtoms = context.getNumAtoms();
if (context.getUseDoublePrecision()) { if (context.getUseDoublePrecision()) {
vector<mm_double4> oldPositionsVec(numAtoms, mm_double4(1e30, 1e30, 1e30, 0)); vector<mm_double4> oldPositionsVec(numAtoms, mm_double4(1e30, 1e30, 1e30, 0));
...@@ -473,8 +472,8 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en ...@@ -473,8 +472,8 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
forceKernel.setArg<cl_uint>(5, startTileIndex); forceKernel.setArg<cl_uint>(5, startTileIndex);
forceKernel.setArg<cl_uint>(6, numTiles); forceKernel.setArg<cl_uint>(6, numTiles);
findInteractingBlocksKernel.setArg<cl_uint>(7, startBlockIndex); findInteractingBlocksKernel.setArg<cl_uint>(10, startBlockIndex);
findInteractingBlocksKernel.setArg<cl_uint>(8, numBlocks); findInteractingBlocksKernel.setArg<cl_uint>(11, numBlocks);
} }
} }
...@@ -617,7 +616,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -617,7 +616,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
if (useCutoff) { if (useCutoff) {
kernel.setArg<cl::Buffer>(index++, interactingTiles->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, interactingTiles->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, interactionCount->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, interactionCount->getDeviceBuffer());
index += 2; // The periodic box size arguments are set when the kernel is executed. index += 5; // The periodic box size arguments are set when the kernel is executed.
kernel.setArg<cl_uint>(index++, interactingTiles->getSize()); kernel.setArg<cl_uint>(index++, interactingTiles->getSize());
kernel.setArg<cl::Buffer>(index++, blockCenter->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, blockCenter->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, blockBoundingBox->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, blockBoundingBox->getDeviceBuffer());
......
...@@ -5,15 +5,15 @@ ...@@ -5,15 +5,15 @@
/** /**
* Find a bounding box for the atoms in each block. * Find a bounding box for the atoms in each block.
*/ */
__kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const real4* restrict posq, __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global real4* restrict blockCenter, __global real4* restrict blockBoundingBox, __global int* restrict rebuildNeighborList, __global const real4* restrict posq, __global real4* restrict blockCenter, __global real4* restrict blockBoundingBox, __global int* restrict rebuildNeighborList,
__global real2* restrict sortedBlocks) { __global real2* restrict sortedBlocks) {
int index = get_global_id(0); int index = get_global_id(0);
int base = index*TILE_SIZE; int base = index*TILE_SIZE;
while (base < numAtoms) { while (base < numAtoms) {
real4 pos = posq[base]; real4 pos = posq[base];
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
pos.xyz -= floor(pos.xyz*invPeriodicBoxSize.xyz)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS(pos)
#endif #endif
real4 minPos = pos; real4 minPos = pos;
real4 maxPos = pos; real4 maxPos = pos;
...@@ -22,7 +22,7 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri ...@@ -22,7 +22,7 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
pos = posq[i]; pos = posq[i];
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
real4 center = 0.5f*(maxPos+minPos); real4 center = 0.5f*(maxPos+minPos);
pos.xyz -= floor((pos.xyz-center.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)
#endif #endif
minPos = min(minPos, pos); minPos = min(minPos, pos);
maxPos = max(maxPos, pos); maxPos = max(maxPos, pos);
...@@ -65,9 +65,10 @@ __kernel void sortBoxData(__global const real2* restrict sortedBlock, __global c ...@@ -65,9 +65,10 @@ __kernel void sortBoxData(__global const real2* restrict sortedBlock, __global c
} }
} }
__kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, __global unsigned int* restrict interactionCount, __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global int* restrict interactingTiles, __global unsigned int* restrict interactingAtoms, __global const real4* restrict posq, unsigned int maxTiles, unsigned int startBlockIndex, __global unsigned int* restrict interactionCount, __global int* restrict interactingTiles, __global unsigned int* restrict interactingAtoms,
unsigned int numBlocks, __global real2* restrict sortedBlocks, __global const real4* restrict sortedBlockCenter, __global const real4* restrict sortedBlockBoundingBox, __global const real4* restrict posq, unsigned int maxTiles, unsigned int startBlockIndex, unsigned int numBlocks, __global real2* restrict sortedBlocks,
__global const real4* restrict sortedBlockCenter, __global const real4* restrict sortedBlockBoundingBox,
__global const unsigned int* restrict exclusionIndices, __global const unsigned int* restrict exclusionRowIndices, __global real4* restrict oldPositions, __global const unsigned int* restrict exclusionIndices, __global const unsigned int* restrict exclusionRowIndices, __global real4* restrict oldPositions,
__global const int* restrict rebuildNeighborList) { __global const int* restrict rebuildNeighborList) {
...@@ -108,7 +109,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi ...@@ -108,7 +109,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
// The box is small enough that we can just translate all the atoms into a single periodic // The box is small enough that we can just translate all the atoms into a single periodic
// box, then skip having to apply periodic boundary conditions later. // box, then skip having to apply periodic boundary conditions later.
pos1.xyz -= floor((pos1.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS_WITH_CENTER(pos1, blockCenterX)
} }
#endif #endif
posBuffer[get_local_id(0)] = pos1; posBuffer[get_local_id(0)] = pos1;
...@@ -136,7 +137,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi ...@@ -136,7 +137,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
real4 blockSizeY = (block2 < NUM_BLOCKS ? sortedBlockBoundingBox[block2] : (real4) (0)); real4 blockSizeY = (block2 < NUM_BLOCKS ? sortedBlockBoundingBox[block2] : (real4) (0));
real4 blockDelta = blockCenterX-blockCenterY; real4 blockDelta = blockCenterX-blockCenterY;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
blockDelta.xyz -= floor(blockDelta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(blockDelta)
#endif #endif
blockDelta.x = max((real) 0, fabs(blockDelta.x)-blockSizeX.x-blockSizeY.x); blockDelta.x = max((real) 0, fabs(blockDelta.x)-blockSizeX.x-blockSizeY.x);
blockDelta.y = max((real) 0, fabs(blockDelta.y)-blockSizeX.y-blockSizeY.y); blockDelta.y = max((real) 0, fabs(blockDelta.y)-blockSizeX.y-blockSizeY.y);
...@@ -166,7 +167,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi ...@@ -166,7 +167,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
real3 pos2 = posq[atom2].xyz; real3 pos2 = posq[atom2].xyz;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
if (singlePeriodicCopy) if (singlePeriodicCopy)
pos2.xyz -= floor((pos2.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS_WITH_CENTER(pos2, blockCenterX)
#endif #endif
bool interacts = false; bool interacts = false;
if (atom2 < NUM_ATOMS) { if (atom2 < NUM_ATOMS) {
...@@ -174,7 +175,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi ...@@ -174,7 +175,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
if (!singlePeriodicCopy) { if (!singlePeriodicCopy) {
for (int j = 0; j < TILE_SIZE; j++) { for (int j = 0; j < TILE_SIZE; j++) {
real3 delta = pos2-posBuffer[warpStart+j]; real3 delta = pos2-posBuffer[warpStart+j];
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
interacts |= (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED); interacts |= (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED);
} }
} }
......
...@@ -2,7 +2,8 @@ ...@@ -2,7 +2,8 @@
* Scale the particle positions with each axis independent. * Scale the particle positions with each axis independent.
*/ */
__kernel void scalePositions(float scaleX, float scaleY, float scaleZ, int numMolecules, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global real4* restrict posq, __kernel void scalePositions(float scaleX, float scaleY, float scaleZ, int numMolecules, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, __global real4* restrict posq,
__global const int* restrict moleculeAtoms, __global const int* restrict moleculeStartIndex) { __global const int* restrict moleculeAtoms, __global const int* restrict moleculeStartIndex) {
for (int index = get_global_id(0); index < numMolecules; index += get_global_size(0)) { for (int index = get_global_id(0); index < numMolecules; index += get_global_size(0)) {
int first = moleculeStartIndex[index]; int first = moleculeStartIndex[index];
...@@ -11,19 +12,17 @@ __kernel void scalePositions(float scaleX, float scaleY, float scaleZ, int numMo ...@@ -11,19 +12,17 @@ __kernel void scalePositions(float scaleX, float scaleY, float scaleZ, int numMo
// Find the center of each molecule. // Find the center of each molecule.
real4 center = (real4) 0; real3 center = (real3) 0;
for (int atom = first; atom < last; atom++) for (int atom = first; atom < last; atom++)
center += posq[moleculeAtoms[atom]]; center += posq[moleculeAtoms[atom]].xyz;
center /= (real) numAtoms; center /= (real) numAtoms;
// Move it into the first periodic box. // Move it into the first periodic box.
int xcell = (int) floor(center.x*invPeriodicBoxSize.x); real3 oldCenter = center;
int ycell = (int) floor(center.y*invPeriodicBoxSize.y); APPLY_PERIODIC_TO_POS(center)
int zcell = (int) floor(center.z*invPeriodicBoxSize.z); real3 delta = oldCenter-center;;
real4 delta = (real4) (xcell*periodicBoxSize.x, ycell*periodicBoxSize.y, zcell*periodicBoxSize.z, 0); real3 scaleXYZ = (real3) (scaleX, scaleY, scaleZ);
real4 scaleXYZ = (real4) (scaleX, scaleY, scaleZ, 1);
center -= delta;
// Now scale the position of the molecule center. // Now scale the position of the molecule center.
......
...@@ -26,7 +26,8 @@ __kernel void computeNonbonded( ...@@ -26,7 +26,8 @@ __kernel void computeNonbonded(
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices __global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, , __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = get_global_size(0)/TILE_SIZE; const unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
...@@ -67,7 +68,7 @@ __kernel void computeNonbonded( ...@@ -67,7 +68,7 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q); real4 posq2 = (real4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2); real invR = RSQRT(r2);
...@@ -121,7 +122,7 @@ __kernel void computeNonbonded( ...@@ -121,7 +122,7 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q); real4 posq2 = (real4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF #ifdef PRUNE_BY_CUTOFF
...@@ -289,10 +290,8 @@ __kernel void computeNonbonded( ...@@ -289,10 +290,8 @@ __kernel void computeNonbonded(
// box, then skip having to apply periodic boundary conditions later. // box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x]; real4 blockCenterX = blockCenter[x];
posq1.xyz -= floor((posq1.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
localData[localAtomIndex].x -= floor((localData[localAtomIndex].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[localAtomIndex], blockCenterX)
localData[localAtomIndex].y -= floor((localData[localAtomIndex].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[localAtomIndex].z -= floor((localData[localAtomIndex].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
SYNC_WARPS; SYNC_WARPS;
unsigned int tj = tgx; unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
...@@ -349,7 +348,7 @@ __kernel void computeNonbonded( ...@@ -349,7 +348,7 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q); real4 posq2 = (real4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF #ifdef PRUNE_BY_CUTOFF
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2014 Stanford University and the Authors. * * Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -261,6 +261,65 @@ void testPeriodic() { ...@@ -261,6 +261,65 @@ void testPeriodic() {
ASSERT_EQUAL_TOL(1.9+1+0.9, state.getPotentialEnergy(), TOL); ASSERT_EQUAL_TOL(1.9+1+0.9, state.getPotentialEnergy(), TOL);
} }
void testTriclinic() {
System system;
system.addParticle(1.0);
system.addParticle(1.0);
Vec3 a(3.1, 0, 0);
Vec3 b(0.4, 3.5, 0);
Vec3 c(-0.1, -0.5, 4.0);
system.setDefaultPeriodicBoxVectors(a, b, c);
VerletIntegrator integrator(0.01);
CustomNonbondedForce* nonbonded = new CustomNonbondedForce("r");
nonbonded->addParticle(vector<double>());
nonbonded->addParticle(vector<double>());
nonbonded->setNonbondedMethod(CustomNonbondedForce::CutoffPeriodic);
const double cutoff = 1.5;
nonbonded->setCutoffDistance(cutoff);
system.addForce(nonbonded);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int iteration = 0; iteration < 50; iteration++) {
// Generate random positions for the two particles.
positions[0] = a*genrand_real2(sfmt) + b*genrand_real2(sfmt) + c*genrand_real2(sfmt);
positions[1] = a*genrand_real2(sfmt) + b*genrand_real2(sfmt) + c*genrand_real2(sfmt);
context.setPositions(positions);
// Loop over all possible periodic copies and find the nearest one.
Vec3 delta;
double distance2 = 100.0;
for (int i = -1; i < 2; i++)
for (int j = -1; j < 2; j++)
for (int k = -1; k < 2; k++) {
Vec3 d = positions[1]-positions[0]+a*i+b*j+c*k;
if (d.dot(d) < distance2) {
delta = d;
distance2 = d.dot(d);
}
}
double distance = sqrt(distance2);
// See if the force and energy are correct.
State state = context.getState(State::Forces | State::Energy);
if (distance >= cutoff) {
ASSERT_EQUAL(0.0, state.getPotentialEnergy());
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), state.getForces()[0], 0);
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), state.getForces()[1], 0);
}
else {
const Vec3 force = delta/sqrt(delta.dot(delta));
ASSERT_EQUAL_TOL(distance, state.getPotentialEnergy(), TOL);
ASSERT_EQUAL_VEC(force, state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(-force, state.getForces()[1], TOL);
}
}
}
void testContinuous1DFunction() { void testContinuous1DFunction() {
System system; System system;
system.addParticle(1.0); system.addParticle(1.0);
...@@ -924,6 +983,7 @@ int main(int argc, char* argv[]) { ...@@ -924,6 +983,7 @@ int main(int argc, char* argv[]) {
testExclusions(); testExclusions();
testCutoff(); testCutoff();
testPeriodic(); testPeriodic();
testTriclinic();
testContinuous1DFunction(); testContinuous1DFunction();
testContinuous2DFunction(); testContinuous2DFunction();
testContinuous3DFunction(); testContinuous3DFunction();
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2013 Stanford University and the Authors. * * Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Peter Eastman, Lee-Ping Wang * * Authors: Peter Eastman, Lee-Ping Wang *
* Contributors: * * Contributors: *
* * * *
...@@ -236,6 +236,82 @@ void testRandomSeed() { ...@@ -236,6 +236,82 @@ void testRandomSeed() {
} }
} }
void testTriclinic() {
const int numParticles = 64;
const int frequency = 10;
const int steps = 1000;
const double pressure = 1.5;
const double pressureInMD = pressure*(AVOGADRO*1e-25); // pressure in kJ/mol/nm^3
const double temperature = 300.0;
const double initialVolume = numParticles*BOLTZ*temperature/pressureInMD;
const double initialLength = std::pow(initialVolume, 1.0/3.0);
// Create a gas of noninteracting particles.
System system;
Vec3 initialBox[3];
initialBox[0] = Vec3(initialLength, 0, 0);
initialBox[1] = Vec3(0.2*initialLength, initialLength, 0);
initialBox[2] = Vec3(0.1*initialLength, 0.3*initialLength, initialLength);
system.setDefaultPeriodicBoxVectors(initialBox[0], initialBox[1], initialBox[2]);
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; ++i) {
system.addParticle(1.0);
positions[i] = Vec3(initialLength*genrand_real2(sfmt), initialLength*genrand_real2(sfmt), initialLength*genrand_real2(sfmt));
}
MonteCarloAnisotropicBarostat* barostat = new MonteCarloAnisotropicBarostat(Vec3(pressure, pressure, pressure), temperature, true, true, true, frequency);
system.addForce(barostat);
// Run a simulation
LangevinIntegrator integrator(temperature, 0.1, 0.01);
Context context(system, integrator, platform);
context.setPositions(positions);
// Let it equilibrate.
integrator.step(10000);
// Now run it for a while and see if the volume is correct.
double volume = 0.0;
for (int j = 0; j < steps; ++j) {
Vec3 box[3];
context.getState(0).getPeriodicBoxVectors(box[0], box[1], box[2]);
volume += box[0][0]*box[1][1]*box[2][2];
integrator.step(frequency);
}
volume /= steps;
double expected = (numParticles+1)*BOLTZ*temperature/pressureInMD;
ASSERT_USUALLY_EQUAL_TOL(expected, volume, 3/std::sqrt((double) steps));
// Make sure the box vectors have been scaled consistently.
State state = context.getState(State::Positions);
Vec3 box[3];
state.getPeriodicBoxVectors(box[0], box[1], box[2]);
double xscale = box[2][0]/(0.1*initialLength);
double yscale = box[2][1]/(0.3*initialLength);
double zscale = box[2][2]/(1.0*initialLength);
for (int i = 0; i < 3; i++) {
ASSERT_EQUAL_VEC(Vec3(xscale*initialBox[i][0], yscale*initialBox[i][1], zscale*initialBox[i][2]), box[i], 1e-5);
}
// The barostat should have put all particles inside the first periodic box. One integration step
// has happened since then, so they may have moved slightly outside it.
for (int i = 0; i < numParticles; i++) {
Vec3 pos = state.getPositions()[i];
ASSERT(pos[2]/box[2][2] > -1 && pos[2]/box[2][2] < 2);
pos -= box[2]*floor(pos[2]/box[2][2]);
ASSERT(pos[1]/box[1][1] > -1 && pos[1]/box[1][1] < 2);
pos -= box[1]*floor(pos[1]/box[1][1]);
ASSERT(pos[0]/box[0][0] > -1 && pos[0]/box[0][0] < 2);
}
}
/** /**
* Run a constant pressure simulation on an anisotropic Einstein crystal * Run a constant pressure simulation on an anisotropic Einstein crystal
* using isotropic and anisotropic barostats. There are a total of 15 simulations: * using isotropic and anisotropic barostats. There are a total of 15 simulations:
...@@ -389,6 +465,7 @@ int main(int argc, char* argv[]) { ...@@ -389,6 +465,7 @@ int main(int argc, char* argv[]) {
testIdealGasAxis(1); testIdealGasAxis(1);
testIdealGasAxis(2); testIdealGasAxis(2);
testRandomSeed(); testRandomSeed();
testTriclinic();
//testEinsteinCrystal(); //testEinsteinCrystal();
} }
catch(const exception& e) { catch(const exception& e) {
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2013 Stanford University and the Authors. * * Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -355,6 +355,67 @@ void testPeriodic() { ...@@ -355,6 +355,67 @@ void testPeriodic() {
ASSERT_EQUAL_TOL(2*ONE_4PI_EPS0*(1.0)*(1.0+krf*1.0-crf), state.getPotentialEnergy(), TOL); ASSERT_EQUAL_TOL(2*ONE_4PI_EPS0*(1.0)*(1.0+krf*1.0-crf), state.getPotentialEnergy(), TOL);
} }
void testTriclinic() {
System system;
system.addParticle(1.0);
system.addParticle(1.0);
Vec3 a(3.1, 0, 0);
Vec3 b(0.4, 3.5, 0);
Vec3 c(-0.1, -0.5, 4.0);
system.setDefaultPeriodicBoxVectors(a, b, c);
VerletIntegrator integrator(0.01);
NonbondedForce* nonbonded = new NonbondedForce();
nonbonded->addParticle(1.0, 1, 0);
nonbonded->addParticle(1.0, 1, 0);
nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
const double cutoff = 1.5;
nonbonded->setCutoffDistance(cutoff);
system.addForce(nonbonded);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
const double eps = 78.3;
const double krf = (1.0/(cutoff*cutoff*cutoff))*(eps-1.0)/(2.0*eps+1.0);
const double crf = (1.0/cutoff)*(3.0*eps)/(2.0*eps+1.0);
for (int iteration = 0; iteration < 50; iteration++) {
// Generate random positions for the two particles.
positions[0] = a*genrand_real2(sfmt) + b*genrand_real2(sfmt) + c*genrand_real2(sfmt);
positions[1] = a*genrand_real2(sfmt) + b*genrand_real2(sfmt) + c*genrand_real2(sfmt);
context.setPositions(positions);
// Loop over all possible periodic copies and find the nearest one.
Vec3 delta;
double distance2 = 100.0;
for (int i = -1; i < 2; i++)
for (int j = -1; j < 2; j++)
for (int k = -1; k < 2; k++) {
Vec3 d = positions[1]-positions[0]+a*i+b*j+c*k;
if (d.dot(d) < distance2) {
delta = d;
distance2 = d.dot(d);
}
}
double distance = sqrt(distance2);
// See if the force and energy are correct.
State state = context.getState(State::Forces | State::Energy);
if (distance >= cutoff) {
ASSERT_EQUAL(0.0, state.getPotentialEnergy());
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), state.getForces()[0], 0);
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), state.getForces()[1], 0);
}
else {
const Vec3 force = delta*ONE_4PI_EPS0*(-1.0/(distance*distance*distance)+2.0*krf);
ASSERT_EQUAL_TOL(ONE_4PI_EPS0*(1.0/distance+krf*distance*distance-crf), state.getPotentialEnergy(), TOL);
ASSERT_EQUAL_VEC(force, state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(-force, state.getForces()[1], TOL);
}
}
}
void testLargeSystem() { void testLargeSystem() {
const int numMolecules = 600; const int numMolecules = 600;
...@@ -875,6 +936,7 @@ int main(int argc, char* argv[]) { ...@@ -875,6 +936,7 @@ int main(int argc, char* argv[]) {
testCutoff(); testCutoff();
testCutoff14(); testCutoff14();
testPeriodic(); testPeriodic();
testTriclinic();
testLargeSystem(); testLargeSystem();
// testBlockInteractions(false); // testBlockInteractions(false);
// testBlockInteractions(true); // testBlockInteractions(true);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment