Commit 49e268f1 authored by Peter Eastman's avatar Peter Eastman
Browse files

Bug fixes to OpenCL RPMD

parent 4405b165
......@@ -61,8 +61,8 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
}
OpenCLContext::OpenCLContext(int numParticles, int deviceIndex, OpenCLPlatform::PlatformData& platformData) :
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), posq(NULL), velm(NULL),
forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndex(NULL), integration(NULL),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), atomsWereReordered(false), posq(NULL),
velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndex(NULL), integration(NULL),
bonded(NULL), nonbonded(NULL), thread(NULL) {
try {
contextIndex = platformData.contexts.size();
......@@ -573,6 +573,7 @@ void OpenCLContext::findMoleculeGroups(const System& system) {
void OpenCLContext::reorderAtoms() {
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
return;
atomsWereReordered = true;
// Find the range of positions and the number of bins along each axis.
......
......@@ -444,6 +444,18 @@ public:
WorkThread& getWorkThread() {
return *thread;
}
/**
* Get whether atoms were reordered during the most recent force/energy computation.
*/
bool getAtomsWereReordered() const {
return atomsWereReordered;
}
/**
* Set whether atoms were reordered during the most recent force/energy computation.
*/
void setAtomsWereReordered(bool wereReordered) {
atomsWereReordered = wereReordered;
}
/**
* Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
* together in the arrays.
......@@ -466,7 +478,7 @@ private:
int numThreadBlocks;
int numForceBuffers;
int simdWidth;
bool supports64BitGlobalAtomics;
bool supports64BitGlobalAtomics, atomsWereReordered;
mm_float4 periodicBoxSize;
mm_float4 invPeriodicBoxSize;
std::string defaultOptimizationOptions;
......
......@@ -79,6 +79,7 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
}
void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy) {
cl.setAtomsWereReordered(false);
if (cl.getNonbondedUtilities().getUseCutoff() && cl.getComputeForceCount()%100 == 0) {
cl.reorderAtoms();
cl.getNonbondedUtilities().updateNeighborListSize();
......
......@@ -35,6 +35,7 @@
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLExpressionUtilities.h"
#include "OpenCLFFT3D.h"
#include "OpenCLNonbondedUtilities.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
using namespace OpenMM;
......@@ -93,6 +94,7 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
velocitiesKernel = cl::Kernel(program, "advanceVelocities");
copyToContextKernel = cl::Kernel(program, "copyToContext");
copyFromContextKernel = cl::Kernel(program, "copyFromContext");
translateKernel = cl::Kernel(program, "applyCellTranslations");
}
void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) {
......@@ -116,6 +118,9 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
stepKernel.setArg(6, numCopies*sizeof(mm_float2), NULL);
velocitiesKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
velocitiesKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer());
}
// Loop over copies and compute the force on each one.
......@@ -126,15 +131,8 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer());
if (!forcesAreValid) {
for (int i = 0; i < numCopies; i++) {
copyToContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
context.calcForcesAndEnergy(true, false);
copyFromContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyFromContextKernel, cl.getNumAtoms());
}
}
if (!forcesAreValid)
computeForces(context);
// Apply the PILE-L thermostat.
......@@ -153,13 +151,7 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
// Calculate forces based on the updated positions.
for (int i = 0; i < numCopies; i++) {
copyToContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
context.calcForcesAndEnergy(true, false);
copyFromContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyFromContextKernel, cl.getNumAtoms());
}
computeForces(context);
// Update velocities.
velocitiesKernel.setArg<cl_float>(2, dt);
......@@ -176,6 +168,23 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
cl.setStepCount(cl.getStepCount()+1);
}
void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
for (int i = 0; i < numCopies; i++) {
copyToContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
context.calcForcesAndEnergy(true, false);
copyFromContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyFromContextKernel, cl.getNumAtoms());
if (cl.getAtomsWereReordered() && cl.getNonbondedUtilities().getUsePeriodic()) {
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
translateKernel.setArg<cl_int>(3, i);
cl.executeKernel(translateKernel, cl.getNumAtoms());
}
}
}
void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) {
if (positions == NULL)
throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
......
......@@ -77,6 +77,7 @@ public:
*/
void copyToContext(int copy, ContextImpl& context);
private:
void computeForces(ContextImpl& context);
std::string createFFT(int size, const std::string& variable, bool forward);
OpenCLContext& cl;
bool hasInitializedKernel;
......@@ -84,7 +85,7 @@ private:
OpenCLArray<mm_float4>* forces;
OpenCLArray<mm_float4>* positions;
OpenCLArray<mm_float4>* velocities;
cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel;
cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
};
} // namespace OpenMM
......
......@@ -190,3 +190,15 @@ __kernel void copyFromContext(__global float4* src, __global float4* dst, __glob
dst[base+order[particle]] = src[particle];
}
}
/**
* Update atom positions so all copies are offset by the same number of periodic box widths.
*/
__kernel void applyCellTranslations(__global float4* posq, __global float4* movedPos, __global int* order, int movedCopy) {
for (int particle = get_global_id(0); particle < NUM_ATOMS; particle += get_global_size(0)) {
int index = order[particle];
float4 delta = movedPos[particle]-posq[movedCopy*PADDED_NUM_ATOMS+index];
for (int copy = 0; copy < NUM_COPIES; copy++)
posq[copy*PADDED_NUM_ATOMS+index] += delta;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment