Commit 27777280 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed bugs in AMD specific code for CCMA

parent 87b06907
...@@ -487,7 +487,6 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -487,7 +487,6 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
ccmaConstraintMatrixColumn = OpenCLArray::create<cl_int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn"); ccmaConstraintMatrixColumn = OpenCLArray::create<cl_int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConverged = OpenCLArray::create<cl_int>(context, 2, "CcmaConverged"); ccmaConverged = OpenCLArray::create<cl_int>(context, 2, "CcmaConverged");
ccmaConvergedHostBuffer = OpenCLArray::create<cl_int>(context, 1, "CcmaConvergedHostBuffer", CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR); ccmaConvergedHostBuffer = OpenCLArray::create<cl_int>(context, 1, "CcmaConvergedHostBuffer", CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR);
ccmaConvergedHostMemory = (int*) context.getQueue().enqueueMapBuffer(ccmaConvergedHostBuffer->getDeviceBuffer(), CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int));
// Different communication mechanisms give optimal performance on AMD and on NVIDIA. // Different communication mechanisms give optimal performance on AMD and on NVIDIA.
string vendor = context.getDevice().getInfo<CL_DEVICE_VENDOR>(); string vendor = context.getDevice().getInfo<CL_DEVICE_VENDOR>();
ccmaUseDirectBuffer = (vendor.size() >= 28 && vendor.substr(0, 28) == "Advanced Micro Devices, Inc."); ccmaUseDirectBuffer = (vendor.size() >= 28 && vendor.substr(0, 28) == "Advanced Micro Devices, Inc.");
...@@ -849,32 +848,33 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub ...@@ -849,32 +848,33 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
ccmaForceKernel.setArg<cl_float>(7, (cl_float) tol); ccmaForceKernel.setArg<cl_float>(7, (cl_float) tol);
context.executeKernel(ccmaDirectionsKernel, ccmaAtoms->getSize()); context.executeKernel(ccmaDirectionsKernel, ccmaAtoms->getSize());
const int checkInterval = 4; const int checkInterval = 4;
cl::Event event;
int* converged = (int*) context.getPinnedBuffer(); int* converged = (int*) context.getPinnedBuffer();
int* ccmaConvergedHostMemory = (int*) context.getQueue().enqueueMapBuffer(ccmaConvergedHostBuffer->getDeviceBuffer(), CL_TRUE, CL_MAP_WRITE, 0, sizeof(cl_int));
ccmaConvergedHostMemory[0] = 0; ccmaConvergedHostMemory[0] = 0;
context.getQueue().enqueueUnmapMemObject(ccmaConvergedHostBuffer->getDeviceBuffer(), ccmaConvergedHostMemory);
for (int i = 0; i < 150; i++) { for (int i = 0; i < 150; i++) {
ccmaForceKernel.setArg<cl_int>(8, i); ccmaForceKernel.setArg<cl_int>(8, i);
context.executeKernel(ccmaForceKernel, ccmaAtoms->getSize()); context.executeKernel(ccmaForceKernel, ccmaAtoms->getSize());
if ((i+1)%checkInterval == 0) { cl::Event event;
if (ccmaUseDirectBuffer) if ((i+1)%checkInterval == 0 && !ccmaUseDirectBuffer)
context.getQueue().enqueueMarker(&event); context.getQueue().enqueueReadBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(cl_int), converged, NULL, &event);
else
context.getQueue().enqueueReadBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(cl_int), converged, NULL, &event);
}
ccmaMultiplyKernel.setArg<cl_int>(5, i); ccmaMultiplyKernel.setArg<cl_int>(5, i);
context.executeKernel(ccmaMultiplyKernel, ccmaAtoms->getSize()); context.executeKernel(ccmaMultiplyKernel, ccmaAtoms->getSize());
ccmaUpdateKernel.setArg<cl_int>(8, i); ccmaUpdateKernel.setArg<cl_int>(8, i);
context.executeKernel(ccmaUpdateKernel, context.getNumAtoms()); context.executeKernel(ccmaUpdateKernel, context.getNumAtoms());
if ((i+1)%checkInterval == 0) { if ((i+1)%checkInterval == 0) {
event.wait();
if (ccmaUseDirectBuffer) { if (ccmaUseDirectBuffer) {
if (ccmaConvergedHostMemory[0]) ccmaConvergedHostMemory = (int*) context.getQueue().enqueueMapBuffer(ccmaConvergedHostBuffer->getDeviceBuffer(), CL_FALSE, CL_MAP_READ, 0, sizeof(cl_int), NULL, &event);
break; context.getQueue().flush();
} while (event.getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>() != CL_COMPLETE)
else { ;
if (converged[i%2]) converged[i%2] = ccmaConvergedHostMemory[0];
break; context.getQueue().enqueueUnmapMemObject(ccmaConvergedHostBuffer->getDeviceBuffer(), ccmaConvergedHostMemory);
} }
else
event.wait();
if (converged[i%2])
break;
} }
} }
} }
......
...@@ -141,7 +141,6 @@ private: ...@@ -141,7 +141,6 @@ private:
OpenCLArray* ccmaDelta1; OpenCLArray* ccmaDelta1;
OpenCLArray* ccmaDelta2; OpenCLArray* ccmaDelta2;
OpenCLArray* ccmaConverged; OpenCLArray* ccmaConverged;
int* ccmaConvergedHostMemory;
OpenCLArray* ccmaConvergedHostBuffer; OpenCLArray* ccmaConvergedHostBuffer;
OpenCLArray* vsite2AvgAtoms; OpenCLArray* vsite2AvgAtoms;
OpenCLArray* vsite2AvgWeights; OpenCLArray* vsite2AvgWeights;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment