Commit 82f9a867 authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimizations to CCMA

parent cf4c6d52
...@@ -454,9 +454,9 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -454,9 +454,9 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
ccmaNumAtomConstraints = new OpenCLArray<cl_int>(context, numAtoms, "CcmaAtomConstraintsIndex"); ccmaNumAtomConstraints = new OpenCLArray<cl_int>(context, numAtoms, "CcmaAtomConstraintsIndex");
ccmaDelta1 = new OpenCLArray<cl_float>(context, numCCMA, "CcmaDelta1"); ccmaDelta1 = new OpenCLArray<cl_float>(context, numCCMA, "CcmaDelta1");
ccmaDelta2 = new OpenCLArray<cl_float>(context, numCCMA, "CcmaDelta2"); ccmaDelta2 = new OpenCLArray<cl_float>(context, numCCMA, "CcmaDelta2");
ccmaConverged = new OpenCLArray<cl_int>(context, 1, "CcmaConverged"); ccmaConverged = new OpenCLArray<cl_int>(context, 2, "CcmaConverged");
ccmaConvergedBuffer = new cl::Buffer(context.getContext(), CL_MEM_ALLOC_HOST_PTR, sizeof(cl_int)); ccmaConvergedBuffer = new cl::Buffer(context.getContext(), CL_MEM_ALLOC_HOST_PTR, 2*sizeof(cl_int));
ccmaConvergedMemory = (cl_int*) context.getQueue().enqueueMapBuffer(*ccmaConvergedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int)); ccmaConvergedMemory = (cl_int*) context.getQueue().enqueueMapBuffer(*ccmaConvergedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, 2*sizeof(cl_int));
ccmaReducedMass = new OpenCLArray<cl_float>(context, numCCMA, "CcmaReducedMass"); ccmaReducedMass = new OpenCLArray<cl_float>(context, numCCMA, "CcmaReducedMass");
ccmaConstraintMatrixColumn = new OpenCLArray<cl_int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn"); ccmaConstraintMatrixColumn = new OpenCLArray<cl_int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConstraintMatrixValue = new OpenCLArray<cl_float>(context, numCCMA*maxRowElements, "ConstraintMatrixValue"); ccmaConstraintMatrixValue = new OpenCLArray<cl_float>(context, numCCMA*maxRowElements, "ConstraintMatrixValue");
...@@ -602,23 +602,25 @@ void OpenCLIntegrationUtilities::applyConstraints(double tol) { ...@@ -602,23 +602,25 @@ void OpenCLIntegrationUtilities::applyConstraints(double tol) {
} }
ccmaForceKernel.setArg<cl_float>(6, (cl_float) tol); ccmaForceKernel.setArg<cl_float>(6, (cl_float) tol);
context.executeKernel(ccmaDirectionsKernel, ccmaAtoms->getSize()); context.executeKernel(ccmaDirectionsKernel, ccmaAtoms->getSize());
const int checkInterval = 3; const int checkInterval = 4;
cl::Event event; cl::Event event;
for (int i = 0; i < 150; i++) { for (int i = 0; i < 150; i++) {
if ((i+1)%checkInterval == 0) { ccmaForceKernel.setArg<cl_int>(7, i);
if (i == 0) {
ccmaConvergedMemory[0] = 1; ccmaConvergedMemory[0] = 1;
context.getQueue().enqueueWriteBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, sizeof(cl_int), ccmaConvergedMemory); ccmaConvergedMemory[1] = 0;
context.getQueue().enqueueWriteBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(cl_int), ccmaConvergedMemory);
} }
context.executeKernel(ccmaForceKernel, ccmaAtoms->getSize()); context.executeKernel(ccmaForceKernel, ccmaAtoms->getSize());
if ((i+1)%checkInterval == 0) { if ((i+1)%checkInterval == 0)
context.getQueue().enqueueReadBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, sizeof(cl_int), ccmaConvergedMemory, NULL, &event); context.getQueue().enqueueReadBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(cl_int), ccmaConvergedMemory, NULL, &event);
} ccmaMultiplyKernel.setArg<cl_int>(5, i);
context.executeKernel(ccmaMultiplyKernel, ccmaAtoms->getSize()); context.executeKernel(ccmaMultiplyKernel, ccmaAtoms->getSize());
ccmaUpdateKernel.setArg<cl_int>(8, i); ccmaUpdateKernel.setArg<cl_int>(8, i);
context.executeKernel(ccmaUpdateKernel, context.getNumAtoms()); context.executeKernel(ccmaUpdateKernel, context.getNumAtoms());
if ((i+1)%checkInterval == 0) { if ((i+1)%checkInterval == 0) {
event.wait(); event.wait();
if (ccmaConvergedMemory[0]) if (ccmaConvergedMemory[i%2])
break; break;
} }
} }
......
...@@ -20,8 +20,13 @@ __kernel void computeConstraintDirections(__global int2* constraintAtoms, __glob ...@@ -20,8 +20,13 @@ __kernel void computeConstraintDirections(__global int2* constraintAtoms, __glob
* Compute the force applied by each constraint. * Compute the force applied by each constraint.
*/ */
__kernel void computeConstraintForce(__global int2* constraintAtoms, __global float4* constraintDistance, __global float4* atomPositions, __kernel void computeConstraintForce(__global int2* constraintAtoms, __global float4* constraintDistance, __global float4* atomPositions,
__global float* reducedMass, __global float* delta1, __global int* converged, float tol) { __global float* reducedMass, __global float* delta1, __global int* converged, float tol, int iteration) {
__local int groupConverged; __local int groupConverged;
if (converged[1-iteration%2]) {
if (get_global_id(0) == 0)
converged[iteration%2] = 1;
return; // The constraint iteration has already converged.
}
if (get_local_id(0) == 0) if (get_local_id(0) == 0)
groupConverged = 1; groupConverged = 1;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
...@@ -45,7 +50,7 @@ __kernel void computeConstraintForce(__global int2* constraintAtoms, __global fl ...@@ -45,7 +50,7 @@ __kernel void computeConstraintForce(__global int2* constraintAtoms, __global fl
if (groupConverged && (rp2 < lowerTol*dist2 || rp2 > upperTol*dist2)) { if (groupConverged && (rp2 < lowerTol*dist2 || rp2 > upperTol*dist2)) {
groupConverged = 0; groupConverged = 0;
converged[0] = 0; converged[iteration%2] = 0;
} }
} }
} }
...@@ -54,8 +59,8 @@ __kernel void computeConstraintForce(__global int2* constraintAtoms, __global fl ...@@ -54,8 +59,8 @@ __kernel void computeConstraintForce(__global int2* constraintAtoms, __global fl
* Multiply the vector of constraint forces by the constraint matrix. * Multiply the vector of constraint forces by the constraint matrix.
*/ */
__kernel void multiplyByConstraintMatrix(__global float* delta1, __global float* delta2, __global int* constraintMatrixColumn, __kernel void multiplyByConstraintMatrix(__global float* delta1, __global float* delta2, __global int* constraintMatrixColumn,
__global float* constraintMatrixValue, __global int* converged) { __global float* constraintMatrixValue, __global int* converged, int iteration) {
if (converged[0]) if (converged[iteration%2])
return; // The constraint iteration has already converged. return; // The constraint iteration has already converged.
// Multiply by the inverse constraint matrix. // Multiply by the inverse constraint matrix.
...@@ -78,7 +83,9 @@ __kernel void multiplyByConstraintMatrix(__global float* delta1, __global float* ...@@ -78,7 +83,9 @@ __kernel void multiplyByConstraintMatrix(__global float* delta1, __global float*
*/ */
__kernel void updateAtomPositions(__global int* numAtomConstraints, __global int* atomConstraints, __global float4* constraintDistance, __kernel void updateAtomPositions(__global int* numAtomConstraints, __global int* atomConstraints, __global float4* constraintDistance,
__global float4* atomPositions, __global float4* velm, __global float* delta1, __global float* delta2, __global int* converged, int iteration) { __global float4* atomPositions, __global float4* velm, __global float* delta1, __global float* delta2, __global int* converged, int iteration) {
if (converged[0]) if (get_global_id(0) == 0)
converged[1-iteration%2] = 1;
if (converged[iteration%2])
return; // The constraint iteration has already converged. return; // The constraint iteration has already converged.
float damping = (iteration < 2 ? 0.5f : 1.0f); float damping = (iteration < 2 ? 0.5f : 1.0f);
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) { for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment