"serialization/vscode:/vscode.git/clone" did not exist on "6ee040ff5596c9f34c0bac15680f39e88b129f48"
Commit 82f9a867 authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimizations to CCMA

parent cf4c6d52
......@@ -454,9 +454,9 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
ccmaNumAtomConstraints = new OpenCLArray<cl_int>(context, numAtoms, "CcmaAtomConstraintsIndex");
ccmaDelta1 = new OpenCLArray<cl_float>(context, numCCMA, "CcmaDelta1");
ccmaDelta2 = new OpenCLArray<cl_float>(context, numCCMA, "CcmaDelta2");
ccmaConverged = new OpenCLArray<cl_int>(context, 1, "CcmaConverged");
ccmaConvergedBuffer = new cl::Buffer(context.getContext(), CL_MEM_ALLOC_HOST_PTR, sizeof(cl_int));
ccmaConvergedMemory = (cl_int*) context.getQueue().enqueueMapBuffer(*ccmaConvergedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int));
ccmaConverged = new OpenCLArray<cl_int>(context, 2, "CcmaConverged");
ccmaConvergedBuffer = new cl::Buffer(context.getContext(), CL_MEM_ALLOC_HOST_PTR, 2*sizeof(cl_int));
ccmaConvergedMemory = (cl_int*) context.getQueue().enqueueMapBuffer(*ccmaConvergedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, 2*sizeof(cl_int));
ccmaReducedMass = new OpenCLArray<cl_float>(context, numCCMA, "CcmaReducedMass");
ccmaConstraintMatrixColumn = new OpenCLArray<cl_int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConstraintMatrixValue = new OpenCLArray<cl_float>(context, numCCMA*maxRowElements, "ConstraintMatrixValue");
......@@ -602,23 +602,25 @@ void OpenCLIntegrationUtilities::applyConstraints(double tol) {
}
ccmaForceKernel.setArg<cl_float>(6, (cl_float) tol);
context.executeKernel(ccmaDirectionsKernel, ccmaAtoms->getSize());
const int checkInterval = 3;
const int checkInterval = 4;
cl::Event event;
for (int i = 0; i < 150; i++) {
if ((i+1)%checkInterval == 0) {
ccmaForceKernel.setArg<cl_int>(7, i);
if (i == 0) {
ccmaConvergedMemory[0] = 1;
context.getQueue().enqueueWriteBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, sizeof(cl_int), ccmaConvergedMemory);
ccmaConvergedMemory[1] = 0;
context.getQueue().enqueueWriteBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(cl_int), ccmaConvergedMemory);
}
context.executeKernel(ccmaForceKernel, ccmaAtoms->getSize());
if ((i+1)%checkInterval == 0) {
context.getQueue().enqueueReadBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, sizeof(cl_int), ccmaConvergedMemory, NULL, &event);
}
if ((i+1)%checkInterval == 0)
context.getQueue().enqueueReadBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(cl_int), ccmaConvergedMemory, NULL, &event);
ccmaMultiplyKernel.setArg<cl_int>(5, i);
context.executeKernel(ccmaMultiplyKernel, ccmaAtoms->getSize());
ccmaUpdateKernel.setArg<cl_int>(8, i);
context.executeKernel(ccmaUpdateKernel, context.getNumAtoms());
if ((i+1)%checkInterval == 0) {
event.wait();
if (ccmaConvergedMemory[0])
if (ccmaConvergedMemory[i%2])
break;
}
}
......
......@@ -20,8 +20,13 @@ __kernel void computeConstraintDirections(__global int2* constraintAtoms, __glob
* Compute the force applied by each constraint.
*/
__kernel void computeConstraintForce(__global int2* constraintAtoms, __global float4* constraintDistance, __global float4* atomPositions,
__global float* reducedMass, __global float* delta1, __global int* converged, float tol) {
__global float* reducedMass, __global float* delta1, __global int* converged, float tol, int iteration) {
__local int groupConverged;
if (converged[1-iteration%2]) {
if (get_global_id(0) == 0)
converged[iteration%2] = 1;
return; // The constraint iteration has already converged.
}
if (get_local_id(0) == 0)
groupConverged = 1;
barrier(CLK_LOCAL_MEM_FENCE);
......@@ -45,7 +50,7 @@ __kernel void computeConstraintForce(__global int2* constraintAtoms, __global fl
if (groupConverged && (rp2 < lowerTol*dist2 || rp2 > upperTol*dist2)) {
groupConverged = 0;
converged[0] = 0;
converged[iteration%2] = 0;
}
}
}
......@@ -54,8 +59,8 @@ __kernel void computeConstraintForce(__global int2* constraintAtoms, __global fl
* Multiply the vector of constraint forces by the constraint matrix.
*/
__kernel void multiplyByConstraintMatrix(__global float* delta1, __global float* delta2, __global int* constraintMatrixColumn,
__global float* constraintMatrixValue, __global int* converged) {
if (converged[0])
__global float* constraintMatrixValue, __global int* converged, int iteration) {
if (converged[iteration%2])
return; // The constraint iteration has already converged.
// Multiply by the inverse constraint matrix.
......@@ -78,7 +83,9 @@ __kernel void multiplyByConstraintMatrix(__global float* delta1, __global float*
*/
__kernel void updateAtomPositions(__global int* numAtomConstraints, __global int* atomConstraints, __global float4* constraintDistance,
__global float4* atomPositions, __global float4* velm, __global float* delta1, __global float* delta2, __global int* converged, int iteration) {
if (converged[0])
if (get_global_id(0) == 0)
converged[1-iteration%2] = 1;
if (converged[iteration%2])
return; // The constraint iteration has already converged.
float damping = (iteration < 2 ? 0.5f : 1.0f);
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment