Unverified Commit 88224d12 authored by Andy Simmonett's avatar Andy Simmonett
Browse files

Fix OpenCL and CUDA kernels after review

parent 1fea6058
......@@ -1769,7 +1769,7 @@ public:
private:
int sumWorkGroupSize;
CudaContext& cu;
CudaArray scaleFactorBuffer, kineticEnergyBuffer, chainMasses, chainForces, heatBathEnergy;
CudaArray energyBuffer, scaleFactorBuffer, kineticEnergyBuffer, chainMasses, chainForces, heatBathEnergy;
std::map<int, CudaArray> atomlists, pairlists;
std::map<int, CUfunction> propagateKernels;
CUfunction reduceEnergyKernel;
......
......@@ -8428,6 +8428,13 @@ void CudaNoseHooverChainKernel::initialize() {
computePairsKineticEnergyKernel = cu.getKernel(module, "computePairsKineticEnergy");
scaleAtomsVelocitiesKernel = cu.getKernel(module, "scaleAtomsVelocities");
scalePairsVelocitiesKernel = cu.getKernel(module, "scalePairsVelocities");
int energyBufferSize = cu.getEnergyBuffer().getSize();
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
energyBuffer.initialize<double2>(cu, energyBufferSize, "energyBuffer");
} else {
energyBuffer.initialize<float2>(cu, energyBufferSize, "energyBuffer");
}
}
std::pair<double, double> CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep) {
......@@ -8709,9 +8716,9 @@ std::pair<double, double> CudaNoseHooverChainKernel::computeMaskedKineticEnergy(
}
//taken from CudaContext::reduceEnergy(); the final kinetic energy will live in the kineticEnergy buffer
int bufferSize = cu.getEnergyBuffer().getSize() / 2; // Halve it to account for the fact that we're storing mixed2 instead of mixed in there
void* args2[] = {&cu.getEnergyBuffer().getDevicePointer(), &kineticEnergyBuffer.getDevicePointer(), &bufferSize, &sumWorkGroupSize};
cu.executeKernel(reduceEnergyKernel, args2, sumWorkGroupSize, sumWorkGroupSize, 2*sumWorkGroupSize*cu.getEnergyBuffer().getElementSize());
int bufferSize = energyBuffer.getSize();
void* args2[] = {&energyBuffer.getDevicePointer(), &kineticEnergyBuffer.getDevicePointer(), &bufferSize, &sumWorkGroupSize};
cu.executeKernel(reduceEnergyKernel, args2, sumWorkGroupSize, sumWorkGroupSize, sumWorkGroupSize*energyBuffer.getElementSize());
std::pair<double, double> KEs = {0, 0};
if (downloadValue) {
......
......@@ -125,8 +125,8 @@ extern "C" __global__ void scalePairsVelocities(mixed2 * __restrict__ scaleFacto
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numPairs; index += blockDim.x*gridDim.x) {
int atom1 = pairs[index].x;
int atom2 = pairs[index].y;
mixed4 &v1 = velm[atom1];
mixed4 &v2 = velm[atom2];
mixed4 v1 = velm[atom1];
mixed4 v2 = velm[atom2];
mixed m1 = v1.w == 0 ? 0 : 1 / v1.w;
mixed m2 = v2.w == 0 ? 0 : 1 / v2.w;
mixed4 cv;
......@@ -143,6 +143,8 @@ extern "C" __global__ void scalePairsVelocities(mixed2 * __restrict__ scaleFacto
v2.x = absScale * cv.x + relScale * rv.x * m1 / (m1 + m2);
v2.y = absScale * cv.y + relScale * rv.y * m1 / (m1 + m2);
v2.z = absScale * cv.z + relScale * rv.z * m1 / (m1 + m2);
velm[atom1] = v1;
velm[atom2] = v2;
}
}
......
......@@ -1761,7 +1761,7 @@ public:
private:
int sumWorkGroupSize;
OpenCLContext& cl;
OpenCLArray scaleFactorBuffer, kineticEnergyBuffer, chainMasses, chainForces, heatBathEnergy;
OpenCLArray energyBuffer, scaleFactorBuffer, kineticEnergyBuffer, chainMasses, chainForces, heatBathEnergy;
std::map<int, OpenCLArray> atomlists, pairlists;
std::map<int, cl::Kernel> propagateKernels;
cl::Kernel reduceEnergyKernel;
......
......@@ -8855,6 +8855,12 @@ void OpenCLNoseHooverChainKernel::initialize() {
computePairsKineticEnergyKernel = cl::Kernel(program, "computePairsKineticEnergy");
scaleAtomsVelocitiesKernel = cl::Kernel(program, "scaleAtomsVelocities");
scalePairsVelocitiesKernel = cl::Kernel(program, "scalePairsVelocities");
int energyBufferSize = cl.getEnergyBuffer().getSize();
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
energyBuffer.initialize<mm_double2>(cl, energyBufferSize, "energyBuffer");
} else {
energyBuffer.initialize<mm_float2>(cl, energyBufferSize, "energyBuffer");
}
}
std::pair<double, double> OpenCLNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep) {
......@@ -9145,28 +9151,28 @@ std::pair<double, double> OpenCLNoseHooverChainKernel::computeMaskedKineticEnerg
}
cl.clearBuffer(cl.getEnergyBuffer());
if (nAtoms) {
computeAtomsKineticEnergyKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
computeAtomsKineticEnergyKernel.setArg<cl::Buffer>(0, energyBuffer.getDeviceBuffer());
computeAtomsKineticEnergyKernel.setArg<cl_int>(1, nAtoms);
computeAtomsKineticEnergyKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
computeAtomsKineticEnergyKernel.setArg<cl::Buffer>(3, atomlists[chainID].getDeviceBuffer());
cl.executeKernel(computeAtomsKineticEnergyKernel, nAtoms);
}
if (nPairs) {
computePairsKineticEnergyKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
computePairsKineticEnergyKernel.setArg<cl::Buffer>(0, energyBuffer.getDeviceBuffer());
computePairsKineticEnergyKernel.setArg<cl_int>(1, nPairs);
computePairsKineticEnergyKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
computePairsKineticEnergyKernel.setArg<cl::Buffer>(3, pairlists[chainID].getDeviceBuffer());
cl.executeKernel(computePairsKineticEnergyKernel, nPairs);
}
int bufferSize = cl.getEnergyBuffer().getSize() / 2; // Halve it to account for the fact that we're storing mixed2 instead of mixed in there
int bufferSize = energyBuffer.getSize();
int workGroupSize = cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
if (workGroupSize > 512)
workGroupSize = 512;
reduceEnergyKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
reduceEnergyKernel.setArg<cl::Buffer>(0, energyBuffer.getDeviceBuffer());
reduceEnergyKernel.setArg<cl::Buffer>(1, kineticEnergyBuffer.getDeviceBuffer());
reduceEnergyKernel.setArg<cl_int>(2, bufferSize);
reduceEnergyKernel.setArg<cl_int>(3, workGroupSize);
reduceEnergyKernel.setArg(4, 2*workGroupSize*cl.getEnergyBuffer().getElementSize(), NULL);
reduceEnergyKernel.setArg(4, workGroupSize*energyBuffer.getElementSize(), NULL);
cl.executeKernel(reduceEnergyKernel, workGroupSize, workGroupSize);
std::pair<double, double> KEs = {0, 0};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment