Commit 49e268f1 authored by Peter Eastman's avatar Peter Eastman
Browse files

Bug fixes to OpenCL RPMD

parent 4405b165
...@@ -61,8 +61,8 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i ...@@ -61,8 +61,8 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
} }
OpenCLContext::OpenCLContext(int numParticles, int deviceIndex, OpenCLPlatform::PlatformData& platformData) : OpenCLContext::OpenCLContext(int numParticles, int deviceIndex, OpenCLPlatform::PlatformData& platformData) :
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), posq(NULL), velm(NULL), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), atomsWereReordered(false), posq(NULL),
forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndex(NULL), integration(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndex(NULL), integration(NULL),
bonded(NULL), nonbonded(NULL), thread(NULL) { bonded(NULL), nonbonded(NULL), thread(NULL) {
try { try {
contextIndex = platformData.contexts.size(); contextIndex = platformData.contexts.size();
...@@ -573,6 +573,7 @@ void OpenCLContext::findMoleculeGroups(const System& system) { ...@@ -573,6 +573,7 @@ void OpenCLContext::findMoleculeGroups(const System& system) {
void OpenCLContext::reorderAtoms() { void OpenCLContext::reorderAtoms() {
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff()) if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
return; return;
atomsWereReordered = true;
// Find the range of positions and the number of bins along each axis. // Find the range of positions and the number of bins along each axis.
......
...@@ -444,6 +444,18 @@ public: ...@@ -444,6 +444,18 @@ public:
WorkThread& getWorkThread() { WorkThread& getWorkThread() {
return *thread; return *thread;
} }
/**
* Get whether atoms were reordered during the most recent force/energy computation.
*/
bool getAtomsWereReordered() const {
return atomsWereReordered;
}
/**
* Set whether atoms were reordered during the most recent force/energy computation.
*/
void setAtomsWereReordered(bool wereReordered) {
atomsWereReordered = wereReordered;
}
/** /**
* Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close * Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
* together in the arrays. * together in the arrays.
...@@ -466,7 +478,7 @@ private: ...@@ -466,7 +478,7 @@ private:
int numThreadBlocks; int numThreadBlocks;
int numForceBuffers; int numForceBuffers;
int simdWidth; int simdWidth;
bool supports64BitGlobalAtomics; bool supports64BitGlobalAtomics, atomsWereReordered;
mm_float4 periodicBoxSize; mm_float4 periodicBoxSize;
mm_float4 invPeriodicBoxSize; mm_float4 invPeriodicBoxSize;
std::string defaultOptimizationOptions; std::string defaultOptimizationOptions;
......
...@@ -79,6 +79,7 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -79,6 +79,7 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
} }
void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy) { void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy) {
cl.setAtomsWereReordered(false);
if (cl.getNonbondedUtilities().getUseCutoff() && cl.getComputeForceCount()%100 == 0) { if (cl.getNonbondedUtilities().getUseCutoff() && cl.getComputeForceCount()%100 == 0) {
cl.reorderAtoms(); cl.reorderAtoms();
cl.getNonbondedUtilities().updateNeighborListSize(); cl.getNonbondedUtilities().updateNeighborListSize();
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include "OpenCLIntegrationUtilities.h" #include "OpenCLIntegrationUtilities.h"
#include "OpenCLExpressionUtilities.h" #include "OpenCLExpressionUtilities.h"
#include "OpenCLFFT3D.h" #include "OpenCLFFT3D.h"
#include "OpenCLNonbondedUtilities.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h" #include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
using namespace OpenMM; using namespace OpenMM;
...@@ -93,6 +94,7 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI ...@@ -93,6 +94,7 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
velocitiesKernel = cl::Kernel(program, "advanceVelocities"); velocitiesKernel = cl::Kernel(program, "advanceVelocities");
copyToContextKernel = cl::Kernel(program, "copyToContext"); copyToContextKernel = cl::Kernel(program, "copyToContext");
copyFromContextKernel = cl::Kernel(program, "copyFromContext"); copyFromContextKernel = cl::Kernel(program, "copyFromContext");
translateKernel = cl::Kernel(program, "applyCellTranslations");
} }
void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) { void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) {
...@@ -116,6 +118,9 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte ...@@ -116,6 +118,9 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
stepKernel.setArg(6, numCopies*sizeof(mm_float2), NULL); stepKernel.setArg(6, numCopies*sizeof(mm_float2), NULL);
velocitiesKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer()); velocitiesKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
velocitiesKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer()); velocitiesKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer());
} }
// Loop over copies and compute the force on each one. // Loop over copies and compute the force on each one.
...@@ -126,15 +131,8 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte ...@@ -126,15 +131,8 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer());
if (!forcesAreValid) { if (!forcesAreValid)
for (int i = 0; i < numCopies; i++) { computeForces(context);
copyToContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
context.calcForcesAndEnergy(true, false);
copyFromContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyFromContextKernel, cl.getNumAtoms());
}
}
// Apply the PILE-L thermostat. // Apply the PILE-L thermostat.
...@@ -153,13 +151,7 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte ...@@ -153,13 +151,7 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
// Calculate forces based on the updated positions. // Calculate forces based on the updated positions.
for (int i = 0; i < numCopies; i++) { computeForces(context);
copyToContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
context.calcForcesAndEnergy(true, false);
copyFromContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyFromContextKernel, cl.getNumAtoms());
}
// Update velocities. // Update velocities.
velocitiesKernel.setArg<cl_float>(2, dt); velocitiesKernel.setArg<cl_float>(2, dt);
...@@ -176,6 +168,23 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte ...@@ -176,6 +168,23 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
} }
void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
for (int i = 0; i < numCopies; i++) {
copyToContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
context.calcForcesAndEnergy(true, false);
copyFromContextKernel.setArg<cl_int>(3, i);
cl.executeKernel(copyFromContextKernel, cl.getNumAtoms());
if (cl.getAtomsWereReordered() && cl.getNonbondedUtilities().getUsePeriodic()) {
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
translateKernel.setArg<cl_int>(3, i);
cl.executeKernel(translateKernel, cl.getNumAtoms());
}
}
}
void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) { void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) {
if (positions == NULL) if (positions == NULL)
throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context"); throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
......
...@@ -77,6 +77,7 @@ public: ...@@ -77,6 +77,7 @@ public:
*/ */
void copyToContext(int copy, ContextImpl& context); void copyToContext(int copy, ContextImpl& context);
private: private:
void computeForces(ContextImpl& context);
std::string createFFT(int size, const std::string& variable, bool forward); std::string createFFT(int size, const std::string& variable, bool forward);
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernel; bool hasInitializedKernel;
...@@ -84,7 +85,7 @@ private: ...@@ -84,7 +85,7 @@ private:
OpenCLArray<mm_float4>* forces; OpenCLArray<mm_float4>* forces;
OpenCLArray<mm_float4>* positions; OpenCLArray<mm_float4>* positions;
OpenCLArray<mm_float4>* velocities; OpenCLArray<mm_float4>* velocities;
cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel; cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -190,3 +190,15 @@ __kernel void copyFromContext(__global float4* src, __global float4* dst, __glob ...@@ -190,3 +190,15 @@ __kernel void copyFromContext(__global float4* src, __global float4* dst, __glob
dst[base+order[particle]] = src[particle]; dst[base+order[particle]] = src[particle];
} }
} }
/**
* Update atom positions so all copies are offset by the same number of periodic box widths.
*/
__kernel void applyCellTranslations(__global float4* posq, __global float4* movedPos, __global int* order, int movedCopy) {
for (int particle = get_global_id(0); particle < NUM_ATOMS; particle += get_global_size(0)) {
int index = order[particle];
float4 delta = movedPos[particle]-posq[movedCopy*PADDED_NUM_ATOMS+index];
for (int copy = 0; copy < NUM_COPIES; copy++)
posq[copy*PADDED_NUM_ATOMS+index] += delta;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment