Commit c0a43bfc authored by Peter Eastman's avatar Peter Eastman
Browse files

Improved performance of CCMA

parent 022a0190
...@@ -99,7 +99,7 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S ...@@ -99,7 +99,7 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
posDelta(NULL), settleAtoms(NULL), settleParams(NULL), shakeAtoms(NULL), shakeParams(NULL), posDelta(NULL), settleAtoms(NULL), settleParams(NULL), shakeAtoms(NULL), shakeParams(NULL),
random(NULL), randomSeed(NULL), randomPos(0), stepSize(NULL), ccmaAtoms(NULL), ccmaDistance(NULL), random(NULL), randomSeed(NULL), randomPos(0), stepSize(NULL), ccmaAtoms(NULL), ccmaDistance(NULL),
ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL), ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL),
ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConverged(NULL), ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConverged(NULL), ccmaConvergedMemory(NULL),
vsite2AvgAtoms(NULL), vsite2AvgWeights(NULL), vsite3AvgAtoms(NULL), vsite3AvgWeights(NULL), vsite2AvgAtoms(NULL), vsite2AvgWeights(NULL), vsite3AvgAtoms(NULL), vsite3AvgWeights(NULL),
vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL) { vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL) {
// Create workspace arrays. // Create workspace arrays.
...@@ -468,6 +468,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S ...@@ -468,6 +468,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
ccmaNumAtomConstraints = CudaArray::create<int>(context, numAtoms, "CcmaAtomConstraintsIndex"); ccmaNumAtomConstraints = CudaArray::create<int>(context, numAtoms, "CcmaAtomConstraintsIndex");
ccmaConstraintMatrixColumn = CudaArray::create<int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn"); ccmaConstraintMatrixColumn = CudaArray::create<int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConverged = CudaArray::create<int>(context, 2, "ccmaConverged"); ccmaConverged = CudaArray::create<int>(context, 2, "ccmaConverged");
CHECK_RESULT2(cuMemHostAlloc((void**) &ccmaConvergedMemory, sizeof(int), CU_MEMHOSTALLOC_DEVICEMAP), "Error allocating pinned memory");
CHECK_RESULT2(cuMemHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory");
vector<int2> atomsVec(ccmaAtoms->getSize()); vector<int2> atomsVec(ccmaAtoms->getSize());
vector<int> atomConstraintsVec(ccmaAtomConstraints->getSize()); vector<int> atomConstraintsVec(ccmaAtomConstraints->getSize());
vector<int> numAtomConstraintsVec(ccmaNumAtomConstraints->getSize()); vector<int> numAtomConstraintsVec(ccmaNumAtomConstraints->getSize());
...@@ -681,6 +683,8 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() { ...@@ -681,6 +683,8 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() {
delete ccmaDelta2; delete ccmaDelta2;
if (ccmaConverged != NULL) if (ccmaConverged != NULL)
delete ccmaConverged; delete ccmaConverged;
if (ccmaConvergedMemory != NULL)
cuMemFreeHost(ccmaConvergedMemory);
if (vsite2AvgAtoms != NULL) if (vsite2AvgAtoms != NULL)
delete vsite2AvgAtoms; delete vsite2AvgAtoms;
if (vsite2AvgWeights != NULL) if (vsite2AvgWeights != NULL)
...@@ -739,7 +743,7 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double ...@@ -739,7 +743,7 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
void* forceArgs[] = {&ccmaAtoms->getDevicePointer(), &ccmaDistance->getDevicePointer(), void* forceArgs[] = {&ccmaAtoms->getDevicePointer(), &ccmaDistance->getDevicePointer(),
constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta->getDevicePointer(), constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta->getDevicePointer(),
&ccmaReducedMass->getDevicePointer(), &ccmaDelta1->getDevicePointer(), &ccmaConverged->getDevicePointer(), &ccmaReducedMass->getDevicePointer(), &ccmaDelta1->getDevicePointer(), &ccmaConverged->getDevicePointer(),
tolPointer, &i}; &ccmaConvergedDeviceMemory, tolPointer, &i};
void* multiplyArgs[] = {&ccmaDelta1->getDevicePointer(), &ccmaDelta2->getDevicePointer(), void* multiplyArgs[] = {&ccmaDelta1->getDevicePointer(), &ccmaDelta2->getDevicePointer(),
&ccmaConstraintMatrixColumn->getDevicePointer(), &ccmaConstraintMatrixValue->getDevicePointer(), &ccmaConverged->getDevicePointer(), &i}; &ccmaConstraintMatrixColumn->getDevicePointer(), &ccmaConstraintMatrixValue->getDevicePointer(), &ccmaConverged->getDevicePointer(), &i};
void* updateArgs[] = {&ccmaNumAtomConstraints->getDevicePointer(), &ccmaAtomConstraints->getDevicePointer(), &ccmaDistance->getDevicePointer(), void* updateArgs[] = {&ccmaNumAtomConstraints->getDevicePointer(), &ccmaAtomConstraints->getDevicePointer(), &ccmaDistance->getDevicePointer(),
...@@ -747,18 +751,16 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double ...@@ -747,18 +751,16 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
&context.getVelm().getDevicePointer(), &ccmaDelta1->getDevicePointer(), &ccmaDelta2->getDevicePointer(), &context.getVelm().getDevicePointer(), &ccmaDelta1->getDevicePointer(), &ccmaDelta2->getDevicePointer(),
&ccmaConverged->getDevicePointer(), &i}; &ccmaConverged->getDevicePointer(), &i};
const int checkInterval = 4; const int checkInterval = 4;
int* converged = (int*) context.getPinnedBuffer(); ccmaConvergedMemory[0] = 0;
for (i = 0; i < 150; i++) { for (i = 0; i < 150; i++) {
context.executeKernel(ccmaForceKernel, forceArgs, ccmaAtoms->getSize()); context.executeKernel(ccmaForceKernel, forceArgs, ccmaAtoms->getSize());
if ((i+1)%checkInterval == 0) { if ((i+1)%checkInterval == 0)
ccmaConverged->download(converged, false);
CHECK_RESULT2(cuEventRecord(ccmaEvent, 0), "Error recording event for CCMA"); CHECK_RESULT2(cuEventRecord(ccmaEvent, 0), "Error recording event for CCMA");
}
context.executeKernel(ccmaMultiplyKernel, multiplyArgs, ccmaAtoms->getSize()); context.executeKernel(ccmaMultiplyKernel, multiplyArgs, ccmaAtoms->getSize());
context.executeKernel(ccmaUpdateKernel, updateArgs, context.getNumAtoms()); context.executeKernel(ccmaUpdateKernel, updateArgs, context.getNumAtoms());
if ((i+1)%checkInterval == 0) { if ((i+1)%checkInterval == 0) {
CHECK_RESULT2(cuEventSynchronize(ccmaEvent), "Error synchronizing on event for CCMA"); CHECK_RESULT2(cuEventSynchronize(ccmaEvent), "Error synchronizing on event for CCMA");
if (converged[i%2]) if (ccmaConvergedMemory[0])
break; break;
} }
} }
......
...@@ -141,6 +141,8 @@ private: ...@@ -141,6 +141,8 @@ private:
CudaArray* ccmaDelta1; CudaArray* ccmaDelta1;
CudaArray* ccmaDelta2; CudaArray* ccmaDelta2;
CudaArray* ccmaConverged; CudaArray* ccmaConverged;
int* ccmaConvergedMemory;
CUdeviceptr ccmaConvergedDeviceMemory;
CUevent ccmaEvent; CUevent ccmaEvent;
CudaArray* vsite2AvgAtoms; CudaArray* vsite2AvgAtoms;
CudaArray* vsite2AvgWeights; CudaArray* vsite2AvgWeights;
......
...@@ -598,11 +598,13 @@ extern "C" __global__ void computeCCMAConstraintDirections(const int2* __restric ...@@ -598,11 +598,13 @@ extern "C" __global__ void computeCCMAConstraintDirections(const int2* __restric
* Compute the force applied by each CCMA position constraint. * Compute the force applied by each CCMA position constraint.
*/ */
extern "C" __global__ void computeCCMAPositionConstraintForce(const int2* __restrict__ constraintAtoms, const mixed4* __restrict__ constraintDistance, const mixed4* __restrict__ atomPositions, extern "C" __global__ void computeCCMAPositionConstraintForce(const int2* __restrict__ constraintAtoms, const mixed4* __restrict__ constraintDistance, const mixed4* __restrict__ atomPositions,
const mixed* __restrict__ reducedMass, mixed* __restrict__ delta1, int* __restrict__ converged, mixed tol, int iteration) { const mixed* __restrict__ reducedMass, mixed* __restrict__ delta1, int* __restrict__ converged, int* __restrict__ hostConvergedFlag, mixed tol, int iteration) {
__shared__ int groupConverged; __shared__ int groupConverged;
if (converged[1-iteration%2]) { if (converged[1-iteration%2]) {
if (blockIdx.x == 0 && threadIdx.x == 0) if (blockIdx.x == 0 && threadIdx.x == 0) {
converged[iteration%2] = 1; converged[iteration%2] = 1;
hostConvergedFlag[0] = 1;
}
return; // The constraint iteration has already converged. return; // The constraint iteration has already converged.
} }
if (threadIdx.x == 0) if (threadIdx.x == 0)
...@@ -639,11 +641,13 @@ extern "C" __global__ void computeCCMAPositionConstraintForce(const int2* __rest ...@@ -639,11 +641,13 @@ extern "C" __global__ void computeCCMAPositionConstraintForce(const int2* __rest
* Compute the force applied by each CCMA velocity constraint. * Compute the force applied by each CCMA velocity constraint.
*/ */
extern "C" __global__ void computeCCMAVelocityConstraintForce(const int2* __restrict__ constraintAtoms, const mixed4* __restrict__ constraintDistance, const mixed4* __restrict__ atomPositions, extern "C" __global__ void computeCCMAVelocityConstraintForce(const int2* __restrict__ constraintAtoms, const mixed4* __restrict__ constraintDistance, const mixed4* __restrict__ atomPositions,
const mixed* __restrict__ reducedMass, mixed* __restrict__ delta1, int* __restrict__ converged, mixed tol, int iteration) { const mixed* __restrict__ reducedMass, mixed* __restrict__ delta1, int* __restrict__ converged, int* __restrict__ hostConvergedFlag, mixed tol, int iteration) {
__shared__ int groupConverged; __shared__ int groupConverged;
if (converged[1-iteration%2]) { if (converged[1-iteration%2]) {
if (blockIdx.x == 0 && threadIdx.x == 0) if (blockIdx.x == 0 && threadIdx.x == 0) {
converged[iteration%2] = 1; converged[iteration%2] = 1;
hostConvergedFlag[0] = 1;
}
return; // The constraint iteration has already converged. return; // The constraint iteration has already converged.
} }
if (threadIdx.x == 0) if (threadIdx.x == 0)
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009 Stanford University and the Authors. * * Portions copyright (c) 2009-2013 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -98,7 +98,7 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -98,7 +98,7 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
posDelta(NULL), settleAtoms(NULL), settleParams(NULL), shakeAtoms(NULL), shakeParams(NULL), posDelta(NULL), settleAtoms(NULL), settleParams(NULL), shakeAtoms(NULL), shakeParams(NULL),
random(NULL), randomSeed(NULL), randomPos(0), stepSize(NULL), ccmaAtoms(NULL), ccmaDistance(NULL), random(NULL), randomSeed(NULL), randomPos(0), stepSize(NULL), ccmaAtoms(NULL), ccmaDistance(NULL),
ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL), ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL),
ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConverged(NULL), ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConverged(NULL), ccmaConvergedHostBuffer(NULL),
vsite2AvgAtoms(NULL), vsite2AvgWeights(NULL), vsite3AvgAtoms(NULL), vsite3AvgWeights(NULL), vsite2AvgAtoms(NULL), vsite2AvgWeights(NULL), vsite3AvgAtoms(NULL), vsite3AvgWeights(NULL),
vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL), hasInitializedPosConstraintKernels(false), hasInitializedVelConstraintKernels(false) { vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL), hasInitializedPosConstraintKernels(false), hasInitializedVelConstraintKernels(false) {
// Create workspace arrays. // Create workspace arrays.
...@@ -479,6 +479,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -479,6 +479,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
ccmaNumAtomConstraints = OpenCLArray::create<cl_int>(context, numAtoms, "CcmaAtomConstraintsIndex"); ccmaNumAtomConstraints = OpenCLArray::create<cl_int>(context, numAtoms, "CcmaAtomConstraintsIndex");
ccmaConstraintMatrixColumn = OpenCLArray::create<cl_int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn"); ccmaConstraintMatrixColumn = OpenCLArray::create<cl_int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConverged = OpenCLArray::create<cl_int>(context, 2, "CcmaConverged"); ccmaConverged = OpenCLArray::create<cl_int>(context, 2, "CcmaConverged");
ccmaConvergedHostBuffer = OpenCLArray::create<cl_int>(context, 1, "CcmaConvergedHostBuffer", CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR);
ccmaConvergedHostMemory = (int*) context.getQueue().enqueueMapBuffer(ccmaConvergedHostBuffer->getDeviceBuffer(), CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int));
// Different communication mechanisms give optimal performance on AMD and on NVIDIA.
string vendor = context.getDevice().getInfo<CL_DEVICE_VENDOR>();
ccmaUseDirectBuffer = (vendor.size() >= 28 && vendor.substr(0, 28) == "Advanced Micro Devices, Inc.");
vector<mm_int2> atomsVec(ccmaAtoms->getSize()); vector<mm_int2> atomsVec(ccmaAtoms->getSize());
vector<cl_int> atomConstraintsVec(ccmaAtomConstraints->getSize()); vector<cl_int> atomConstraintsVec(ccmaAtomConstraints->getSize());
vector<cl_int> numAtomConstraintsVec(ccmaNumAtomConstraints->getSize()); vector<cl_int> numAtomConstraintsVec(ccmaNumAtomConstraints->getSize());
...@@ -720,6 +725,8 @@ OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() { ...@@ -720,6 +725,8 @@ OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() {
delete ccmaDelta2; delete ccmaDelta2;
if (ccmaConverged != NULL) if (ccmaConverged != NULL)
delete ccmaConverged; delete ccmaConverged;
if (ccmaConvergedHostBuffer != NULL)
delete ccmaConvergedHostBuffer;
if (vsite2AvgAtoms != NULL) if (vsite2AvgAtoms != NULL)
delete vsite2AvgAtoms; delete vsite2AvgAtoms;
if (vsite2AvgWeights != NULL) if (vsite2AvgWeights != NULL)
...@@ -814,6 +821,7 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub ...@@ -814,6 +821,7 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
ccmaForceKernel.setArg<cl::Buffer>(3, ccmaReducedMass->getDeviceBuffer()); ccmaForceKernel.setArg<cl::Buffer>(3, ccmaReducedMass->getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(4, ccmaDelta1->getDeviceBuffer()); ccmaForceKernel.setArg<cl::Buffer>(4, ccmaDelta1->getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(5, ccmaConverged->getDeviceBuffer()); ccmaForceKernel.setArg<cl::Buffer>(5, ccmaConverged->getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(6, ccmaConvergedHostBuffer->getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(0, ccmaDelta1->getDeviceBuffer()); ccmaMultiplyKernel.setArg<cl::Buffer>(0, ccmaDelta1->getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(1, ccmaDelta2->getDeviceBuffer()); ccmaMultiplyKernel.setArg<cl::Buffer>(1, ccmaDelta2->getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(2, ccmaConstraintMatrixColumn->getDeviceBuffer()); ccmaMultiplyKernel.setArg<cl::Buffer>(2, ccmaConstraintMatrixColumn->getDeviceBuffer());
...@@ -829,29 +837,40 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub ...@@ -829,29 +837,40 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
ccmaUpdateKernel.setArg<cl::Buffer>(7, ccmaConverged->getDeviceBuffer()); ccmaUpdateKernel.setArg<cl::Buffer>(7, ccmaConverged->getDeviceBuffer());
} }
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
ccmaForceKernel.setArg<cl_double>(6, (cl_double) tol); ccmaForceKernel.setArg<cl_double>(7, (cl_double) tol);
else else
ccmaForceKernel.setArg<cl_float>(6, (cl_float) tol); ccmaForceKernel.setArg<cl_float>(7, (cl_float) tol);
context.executeKernel(ccmaDirectionsKernel, ccmaAtoms->getSize()); context.executeKernel(ccmaDirectionsKernel, ccmaAtoms->getSize());
const int checkInterval = 4; const int checkInterval = 4;
cl::Event event; cl::Event event;
int* converged = (int*) context.getPinnedBuffer(); int* converged = (int*) context.getPinnedBuffer();
ccmaConvergedHostMemory[0] = 0;
for (int i = 0; i < 150; i++) { for (int i = 0; i < 150; i++) {
ccmaForceKernel.setArg<cl_int>(7, i); ccmaForceKernel.setArg<cl_int>(8, i);
context.executeKernel(ccmaForceKernel, ccmaAtoms->getSize()); context.executeKernel(ccmaForceKernel, ccmaAtoms->getSize());
if ((i+1)%checkInterval == 0) if ((i+1)%checkInterval == 0) {
if (ccmaUseDirectBuffer)
context.getQueue().enqueueMarker(&event);
else
context.getQueue().enqueueReadBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(cl_int), converged, NULL, &event); context.getQueue().enqueueReadBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(cl_int), converged, NULL, &event);
}
ccmaMultiplyKernel.setArg<cl_int>(5, i); ccmaMultiplyKernel.setArg<cl_int>(5, i);
context.executeKernel(ccmaMultiplyKernel, ccmaAtoms->getSize()); context.executeKernel(ccmaMultiplyKernel, ccmaAtoms->getSize());
ccmaUpdateKernel.setArg<cl_int>(8, i); ccmaUpdateKernel.setArg<cl_int>(8, i);
context.executeKernel(ccmaUpdateKernel, context.getNumAtoms()); context.executeKernel(ccmaUpdateKernel, context.getNumAtoms());
if ((i+1)%checkInterval == 0) { if ((i+1)%checkInterval == 0) {
event.wait(); event.wait();
if (ccmaUseDirectBuffer) {
if (ccmaConvergedHostMemory[0])
break;
}
else {
if (converged[i%2]) if (converged[i%2])
break; break;
} }
} }
} }
}
} }
void OpenCLIntegrationUtilities::computeVirtualSites() { void OpenCLIntegrationUtilities::computeVirtualSites() {
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009 Stanford University and the Authors. * * Portions copyright (c) 2009-2013 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -141,6 +141,8 @@ private: ...@@ -141,6 +141,8 @@ private:
OpenCLArray* ccmaDelta1; OpenCLArray* ccmaDelta1;
OpenCLArray* ccmaDelta2; OpenCLArray* ccmaDelta2;
OpenCLArray* ccmaConverged; OpenCLArray* ccmaConverged;
int* ccmaConvergedHostMemory;
OpenCLArray* ccmaConvergedHostBuffer;
OpenCLArray* vsite2AvgAtoms; OpenCLArray* vsite2AvgAtoms;
OpenCLArray* vsite2AvgWeights; OpenCLArray* vsite2AvgWeights;
OpenCLArray* vsite3AvgAtoms; OpenCLArray* vsite3AvgAtoms;
...@@ -149,7 +151,7 @@ private: ...@@ -149,7 +151,7 @@ private:
OpenCLArray* vsiteOutOfPlaneWeights; OpenCLArray* vsiteOutOfPlaneWeights;
int randomPos; int randomPos;
int lastSeed, numVsites; int lastSeed, numVsites;
bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels; bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels, ccmaUseDirectBuffer;
struct ShakeCluster; struct ShakeCluster;
struct ConstraintOrderer; struct ConstraintOrderer;
}; };
......
...@@ -34,11 +34,13 @@ __kernel void computeConstraintDirections(__global const int2* restrict constrai ...@@ -34,11 +34,13 @@ __kernel void computeConstraintDirections(__global const int2* restrict constrai
* Compute the force applied by each constraint. * Compute the force applied by each constraint.
*/ */
__kernel void computeConstraintForce(__global const int2* restrict constraintAtoms, __global const mixed4* restrict constraintDistance, __global const mixed4* restrict atomPositions, __kernel void computeConstraintForce(__global const int2* restrict constraintAtoms, __global const mixed4* restrict constraintDistance, __global const mixed4* restrict atomPositions,
__global const mixed* restrict reducedMass, __global mixed* restrict delta1, __global int* restrict converged, mixed tol, int iteration) { __global const mixed* restrict reducedMass, __global mixed* restrict delta1, __global int* restrict converged, __global int* restrict hostConvergedFlag, mixed tol, int iteration) {
__local int groupConverged; __local int groupConverged;
if (converged[1-iteration%2]) { if (converged[1-iteration%2]) {
if (get_global_id(0) == 0) if (get_global_id(0) == 0) {
converged[iteration%2] = 1; converged[iteration%2] = 1;
hostConvergedFlag[0] = 1;
}
return; // The constraint iteration has already converged. return; // The constraint iteration has already converged.
} }
if (get_local_id(0) == 0) if (get_local_id(0) == 0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment