Unverified Commit 94057ffb authored by Dann239's avatar Dann239 Committed by GitHub
Browse files

Made OpenCLContext::reduceBuffer additionally store the result into a 64-bit...

Made OpenCLContext::reduceBuffer additionally store the result into a 64-bit fixed point buffer. (#3209)
parent 6bdaa495
......@@ -364,11 +364,13 @@ public:
/**
* Given a collection of floating point buffers packed into an array, sum them and store
* the sum in the first buffer.
* Also, write the result into a 64-bit fixed point buffer (overwriting its contents).
*
* @param array the array containing the buffers to reduce
* @param longBuffer the 64-bit fixed point buffer to write the result into
* @param numBuffers the number of buffers packed into the array
*/
void reduceBuffer(OpenCLArray& array, int numBuffers);
void reduceBuffer(OpenCLArray& array, OpenCLArray& longBuffer, int numBuffers);
/**
* Sum the buffers containing forces.
*/
......
......@@ -782,11 +782,12 @@ void OpenCLContext::reduceForces() {
executeKernel(reduceForcesKernel, paddedNumAtoms, 128);
}
void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
void OpenCLContext::reduceBuffer(OpenCLArray& array, OpenCLArray& longBuffer, int numBuffers) {
int bufferSize = array.getSize()/numBuffers;
reduceReal4Kernel.setArg<cl::Buffer>(0, array.getDeviceBuffer());
reduceReal4Kernel.setArg<cl_int>(1, bufferSize);
reduceReal4Kernel.setArg<cl_int>(2, numBuffers);
reduceReal4Kernel.setArg<cl::Buffer>(1, longBuffer.getDeviceBuffer());
reduceReal4Kernel.setArg<cl_int>(2, bufferSize);
reduceReal4Kernel.setArg<cl_int>(3, numBuffers);
executeKernel(reduceReal4Kernel, bufferSize, 128);
}
......
......@@ -179,7 +179,7 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueWriteBuffer(contextForces.getDeviceBuffer(), CL_FALSE, numAtoms*elementSize,
numAtoms*(data.contexts.size()-1)*elementSize, pinnedForceMemory);
cl.reduceBuffer(contextForces, data.contexts.size());
cl.reduceBuffer(contextForces, cl.getLongForceBuffer(), data.contexts.size());
// Balance work between the contexts by transferring a little nonbonded work from the context that
// finished last to the one that finished first.
......
......@@ -67,9 +67,10 @@ __kernel void clearSixBuffers(__global int* restrict buffer1, int size1, __globa
/**
* Sum a collection of buffers into the first one.
* Also, write the result into a 64-bit fixed point buffer (overwriting its contents).
*/
__kernel void reduceReal4Buffer(__global real4* restrict buffer, int bufferSize, int numBuffers) {
__kernel void reduceReal4Buffer(__global real4* restrict buffer, __global long* restrict longBuffer, int bufferSize, int numBuffers) {
int index = get_global_id(0);
int totalSize = bufferSize*numBuffers;
while (index < bufferSize) {
......@@ -77,6 +78,9 @@ __kernel void reduceReal4Buffer(__global real4* restrict buffer, int bufferSize,
for (int i = index+bufferSize; i < totalSize; i += bufferSize)
sum += buffer[i];
buffer[index] = sum;
longBuffer[index] = (long) (sum.x*0x100000000);
longBuffer[index+bufferSize] = (long) (sum.y*0x100000000);
longBuffer[index+2*bufferSize] = (long) (sum.z*0x100000000);
index += get_global_size(0);
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment