Commit ab60bff4 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed bug in OpenCL RPMD implementation

parent 326627a8
......@@ -79,7 +79,7 @@ public:
/**
* Copy positions and velocities for one copy into the context.
*/
virtual void copyToContext(int copy, ContextImpl& context) const = 0;
virtual void copyToContext(int copy, ContextImpl& context) = 0;
};
} // namespace OpenMM
......
......@@ -80,7 +80,7 @@ void RPMDIntegrator::setVelocities(int copy, const vector<Vec3>& velocities) {
}
State RPMDIntegrator::getState(int copy, int types, bool enforcePeriodicBox) {
dynamic_cast<const IntegrateRPMDStepKernel&>(kernel.getImpl()).copyToContext(copy, *context);
dynamic_cast<IntegrateRPMDStepKernel&>(kernel.getImpl()).copyToContext(copy, *context);
return context->getOwner().getState(types, enforcePeriodicBox);
}
......
......@@ -91,6 +91,8 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
pileKernel = cl::Kernel(program, "applyPileThermostat");
stepKernel = cl::Kernel(program, "integrateStep");
velocitiesKernel = cl::Kernel(program, "advanceVelocities");
copyToContextKernel = cl::Kernel(program, "copyToContext");
copyFromContextKernel = cl::Kernel(program, "copyFromContext");
}
void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) {
......@@ -118,13 +120,19 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
// Loop over copies and compute the force on each one.
copyToContextKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer());
if (!forcesAreValid) {
for (int i = 0; i < numCopies; i++) {
cl.getQueue().enqueueCopyBuffer(positions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(),
i*paddedParticles*sizeof(mm_float4), 0, paddedParticles*sizeof(mm_float4));
copyToContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
context.calcForcesAndEnergy(true, false);
cl.getQueue().enqueueCopyBuffer(cl.getForce().getDeviceBuffer(), forces->getDeviceBuffer(),
0, i*paddedParticles*sizeof(mm_float4), paddedParticles*sizeof(mm_float4));
copyFromContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyFromContextKernel, cl.getNumAtoms());
}
}
......@@ -146,11 +154,11 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
// Calculate forces based on the updated positions.
for (int i = 0; i < numCopies; i++) {
cl.getQueue().enqueueCopyBuffer(positions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(),
i*paddedParticles*sizeof(mm_float4), 0, paddedParticles*sizeof(mm_float4));
copyToContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
context.calcForcesAndEnergy(true, false);
cl.getQueue().enqueueCopyBuffer(cl.getForce().getDeviceBuffer(), forces->getDeviceBuffer(),
0, i*paddedParticles*sizeof(mm_float4), paddedParticles*sizeof(mm_float4));
copyFromContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyFromContextKernel, cl.getNumAtoms());
}
// Update velocities.
......@@ -190,12 +198,15 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>&
cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]);
}
void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) const {
int paddedParticles = cl.getPaddedNumAtoms();
cl.getQueue().enqueueCopyBuffer(positions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(),
copy*paddedParticles*sizeof(mm_float4), 0, paddedParticles*sizeof(mm_float4));
cl.getQueue().enqueueCopyBuffer(velocities->getDeviceBuffer(), cl.getVelm().getDeviceBuffer(),
copy*paddedParticles*sizeof(mm_float4), 0, paddedParticles*sizeof(mm_float4));
void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
copyToContextKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer());
copyToContextKernel.setArg<cl_int>(3, copy);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
copyToContextKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
}
string OpenCLIntegrateRPMDStepKernel::createFFT(int size, const string& variable, bool forward) {
......
......@@ -75,7 +75,7 @@ public:
/**
* Copy positions and velocities for one copy into the context.
*/
void copyToContext(int copy, ContextImpl& context) const;
void copyToContext(int copy, ContextImpl& context);
private:
std::string createFFT(int size, const std::string& variable, bool forward);
OpenCLContext& cl;
......@@ -84,7 +84,7 @@ private:
OpenCLArray<mm_float4>* forces;
OpenCLArray<mm_float4>* positions;
OpenCLArray<mm_float4>* velocities;
cl::Kernel pileKernel, stepKernel, velocitiesKernel;
cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel;
};
} // namespace OpenMM
......
......@@ -170,3 +170,23 @@ __kernel void advanceVelocities(__global float4* velm, __global float4* force, f
velm[index] = particleVelm;
}
}
/**
* Copy a set of per-atom values from the integrator's arrays to the context.
*/
__kernel void copyToContext(__global float4* src, __global float4* dst, __global int* order, int copy) {
const int base = copy*PADDED_NUM_ATOMS;
for (int particle = get_global_id(0); particle < NUM_ATOMS; particle += get_global_size(0)) {
dst[particle] = src[base+order[particle]];
}
}
/**
* Copy a set of per-atom values from the context to the integrator's arrays.
*/
__kernel void copyFromContext(__global float4* src, __global float4* dst, __global int* order, int copy) {
const int base = copy*PADDED_NUM_ATOMS;
for (int particle = get_global_id(0); particle < NUM_ATOMS; particle += get_global_size(0)) {
dst[base+order[particle]] = src[particle];
}
}
......@@ -332,7 +332,7 @@ void ReferenceIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3
velocities[copy][i] = vel[i];
}
void ReferenceIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) const {
void ReferenceIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
extractPositions(context) = positions[copy];
extractVelocities(context) = velocities[copy];
}
......@@ -76,7 +76,7 @@ public:
/**
* Copy positions and velocities for one copy into the context.
*/
void copyToContext(int copy, ContextImpl& context) const;
void copyToContext(int copy, ContextImpl& context);
private:
std::vector<std::vector<RealVec> > positions;
std::vector<std::vector<RealVec> > velocities;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment