Commit c589f1cc authored by Yutong Zhao's avatar Yutong Zhao
Browse files

Merge pull request #11 from peastman/master

Improved infrastructure for integrators
parents 6943ef5b 18f78c1e
...@@ -67,7 +67,7 @@ bool CudaContext::hasInitializedCuda = false; ...@@ -67,7 +67,7 @@ bool CudaContext::hasInitializedCuda = false;
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler, CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, CudaPlatform::PlatformData& platformData) : system(system), compiler(compiler), const string& tempDir, CudaPlatform::PlatformData& platformData) : system(system), compiler(compiler),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), contextIsValid(false), atomsWereReordered(false), pinnedBuffer(NULL), posq(NULL), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), pinnedBuffer(NULL), posq(NULL),
posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (!hasInitializedCuda) { if (!hasInitializedCuda) {
CHECK_RESULT2(cuInit(0), "Error initializing CUDA"); CHECK_RESULT2(cuInit(0), "Error initializing CUDA");
...@@ -282,7 +282,6 @@ void CudaContext::initialize() { ...@@ -282,7 +282,6 @@ void CudaContext::initialize() {
atomIndex[i] = i; atomIndex[i] = i;
atomIndexDevice->upload(atomIndex); atomIndexDevice->upload(atomIndex);
findMoleculeGroups(); findMoleculeGroups();
moleculesInvalid = false;
nonbonded->initialize(system); nonbonded->initialize(system);
} }
...@@ -821,11 +820,6 @@ void CudaContext::findMoleculeGroups() { ...@@ -821,11 +820,6 @@ void CudaContext::findMoleculeGroups() {
} }
void CudaContext::invalidateMolecules() { void CudaContext::invalidateMolecules() {
moleculesInvalid = true;
}
void CudaContext::validateMolecules() {
moleculesInvalid = false;
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff()) if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
return; return;
bool valid = true; bool valid = true;
...@@ -925,24 +919,28 @@ void CudaContext::validateMolecules() { ...@@ -925,24 +919,28 @@ void CudaContext::validateMolecules() {
findMoleculeGroups(); findMoleculeGroups();
for (int i = 0; i < (int) reorderListeners.size(); i++) for (int i = 0; i < (int) reorderListeners.size(); i++)
reorderListeners[i]->execute(); reorderListeners[i]->execute();
reorderAtoms();
} }
void CudaContext::reorderAtoms(bool enforcePeriodic) { void CudaContext::reorderAtoms() {
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff()) atomsWereReordered = false;
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff() || stepsSinceReorder < 100) {
stepsSinceReorder++;
return; return;
if (moleculesInvalid) }
validateMolecules();
atomsWereReordered = true; atomsWereReordered = true;
stepsSinceReorder = 0;
if (useDoublePrecision) if (useDoublePrecision)
reorderAtomsImpl<double, double4, double, double4>(enforcePeriodic); reorderAtomsImpl<double, double4, double, double4>();
else if (useMixedPrecision) else if (useMixedPrecision)
reorderAtomsImpl<float, float4, double, double4>(enforcePeriodic); reorderAtomsImpl<float, float4, double, double4>();
else else
reorderAtomsImpl<float, float4, float, float4>(enforcePeriodic); reorderAtomsImpl<float, float4, float, float4>();
nonbonded->updateNeighborListSize();
} }
template <class Real, class Real4, class Mixed, class Mixed4> template <class Real, class Real4, class Mixed, class Mixed4>
void CudaContext::reorderAtomsImpl(bool enforcePeriodic) { void CudaContext::reorderAtomsImpl() {
// Find the range of positions and the number of bins along each axis. // Find the range of positions and the number of bins along each axis.
Real4 padding = {0, 0, 0, 0}; Real4 padding = {0, 0, 0, 0};
...@@ -1019,18 +1017,16 @@ void CudaContext::reorderAtomsImpl(bool enforcePeriodic) { ...@@ -1019,18 +1017,16 @@ void CudaContext::reorderAtomsImpl(bool enforcePeriodic) {
molPos[i].x -= dx; molPos[i].x -= dx;
molPos[i].y -= dy; molPos[i].y -= dy;
molPos[i].z -= dz; molPos[i].z -= dz;
if (enforcePeriodic) { for (int j = 0; j < (int) atoms.size(); j++) {
for (int j = 0; j < (int) atoms.size(); j++) { int atom = atoms[j]+mol.offsets[i];
int atom = atoms[j]+mol.offsets[i]; Real4 p = oldPosq[atom];
Real4 p = oldPosq[atom]; p.x -= dx;
p.x -= dx; p.y -= dy;
p.y -= dy; p.z -= dz;
p.z -= dz; oldPosq[atom] = p;
oldPosq[atom] = p; posCellOffsets[atom].x -= xcell;
posCellOffsets[atom].x -= xcell; posCellOffsets[atom].y -= ycell;
posCellOffsets[atom].y -= ycell; posCellOffsets[atom].z -= zcell;
posCellOffsets[atom].z -= zcell;
}
} }
} }
} }
......
...@@ -298,6 +298,18 @@ public: ...@@ -298,6 +298,18 @@ public:
void setComputeForceCount(int count) { void setComputeForceCount(int count) {
computeForceCount = count; computeForceCount = count;
} }
/**
* Get the number of time steps since the atoms were reordered.
*/
int getStepsSinceReorder() const {
return stepsSinceReorder;
}
/**
* Set the number of time steps since the atoms were reordered.
*/
void setStepsSinceReorder(int steps) {
stepsSinceReorder = steps;
}
/** /**
* Get the number of atoms. * Get the number of atoms.
*/ */
...@@ -429,10 +441,8 @@ public: ...@@ -429,10 +441,8 @@ public:
/** /**
* Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close * Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
* together in the arrays. * together in the arrays.
*
* @param enforcePeriodic if true, the atom positions may be altered to enforce periodic boundary conditions
*/ */
void reorderAtoms(bool enforcePeriodic); void reorderAtoms();
/** /**
* Add a listener that should be called whenever atoms get reordered. The CudaContext * Add a listener that should be called whenever atoms get reordered. The CudaContext
* assumes ownership of the object, and deletes it when the context itself is deleted. * assumes ownership of the object, and deletes it when the context itself is deleted.
...@@ -447,15 +457,9 @@ public: ...@@ -447,15 +457,9 @@ public:
/** /**
* Mark that the current molecule definitions (and hence the atom order) may be invalid. * Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions * This should be called whenever force field parameters change. It will cause the definitions
* and order to be revalidated the next to reorderAtoms() is called. * and order to be revalidated.
*/ */
void invalidateMolecules(); void invalidateMolecules();
/**
* Get whether the current molecule definitions are valid.
*/
bool getMoleculesAreInvalid() {
return moleculesInvalid;
}
private: private:
struct Molecule; struct Molecule;
struct MoleculeGroup; struct MoleculeGroup;
...@@ -472,7 +476,7 @@ private: ...@@ -472,7 +476,7 @@ private:
* This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use. * This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use.
*/ */
template <class Real, class Real4, class Mixed, class Mixed4> template <class Real, class Real4, class Mixed, class Mixed4>
void reorderAtomsImpl(bool enforcePeriodic); void reorderAtomsImpl();
static bool hasInitializedCuda; static bool hasInitializedCuda;
const System& system; const System& system;
double time, computeCapability; double time, computeCapability;
...@@ -481,11 +485,12 @@ private: ...@@ -481,11 +485,12 @@ private:
int contextIndex; int contextIndex;
int stepCount; int stepCount;
int computeForceCount; int computeForceCount;
int stepsSinceReorder;
int numAtoms; int numAtoms;
int paddedNumAtoms; int paddedNumAtoms;
int numAtomBlocks; int numAtomBlocks;
int numThreadBlocks; int numThreadBlocks;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, moleculesInvalid; bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered;
std::string compiler, tempDir, gpuArchitecture; std::string compiler, tempDir, gpuArchitecture;
float4 periodicBoxSizeFloat, invPeriodicBoxSizeFloat; float4 periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
double4 periodicBoxSize, invPeriodicBoxSize; double4 periodicBoxSize, invPeriodicBoxSize;
......
...@@ -86,11 +86,6 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool ...@@ -86,11 +86,6 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool
cu.setAsCurrent(); cu.setAsCurrent();
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities(); CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
bool includeNonbonded = ((groups&(1<<nb.getForceGroup())) != 0); bool includeNonbonded = ((groups&(1<<nb.getForceGroup())) != 0);
cu.setAtomsWereReordered(false);
if (nb.getUseCutoff() && includeNonbonded && (cu.getMoleculesAreInvalid() || cu.getComputeForceCount()%100 == 0)) {
cu.reorderAtoms(!cu.getMoleculesAreInvalid());
nb.updateNeighborListSize();
}
cu.setComputeForceCount(cu.getComputeForceCount()+1); cu.setComputeForceCount(cu.getComputeForceCount()+1);
cu.clearAutoclearBuffers(); cu.clearAutoclearBuffers();
if (includeNonbonded) if (includeNonbonded)
...@@ -220,6 +215,7 @@ void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const vector< ...@@ -220,6 +215,7 @@ void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<
} }
for (int i = 0; i < (int) cu.getPosCellOffsets().size(); i++) for (int i = 0; i < (int) cu.getPosCellOffsets().size(); i++)
cu.getPosCellOffsets()[i] = make_int4(0, 0, 0, 0); cu.getPosCellOffsets()[i] = make_int4(0, 0, 0, 0);
cu.reorderAtoms();
} }
void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) { void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) {
...@@ -317,8 +313,8 @@ void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& ...@@ -317,8 +313,8 @@ void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream&
stream.write((char*) &time, sizeof(double)); stream.write((char*) &time, sizeof(double));
int stepCount = cu.getStepCount(); int stepCount = cu.getStepCount();
stream.write((char*) &stepCount, sizeof(int)); stream.write((char*) &stepCount, sizeof(int));
int computeForceCount = cu.getComputeForceCount(); int stepsSinceReorder = cu.getStepsSinceReorder();
stream.write((char*) &computeForceCount, sizeof(int)); stream.write((char*) &stepsSinceReorder, sizeof(int));
char* buffer = (char*) cu.getPinnedBuffer(); char* buffer = (char*) cu.getPinnedBuffer();
cu.getPosq().download(buffer); cu.getPosq().download(buffer);
stream.write(buffer, cu.getPosq().getSize()*cu.getPosq().getElementSize()); stream.write(buffer, cu.getPosq().getSize()*cu.getPosq().getElementSize());
...@@ -349,14 +345,14 @@ void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& st ...@@ -349,14 +345,14 @@ void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& st
throw OpenMMException("Checkpoint was created with a different numeric precision"); throw OpenMMException("Checkpoint was created with a different numeric precision");
double time; double time;
stream.read((char*) &time, sizeof(double)); stream.read((char*) &time, sizeof(double));
int stepCount, computeForceCount; int stepCount, stepsSinceReorder;
stream.read((char*) &stepCount, sizeof(int)); stream.read((char*) &stepCount, sizeof(int));
stream.read((char*) &computeForceCount, sizeof(int)); stream.read((char*) &stepsSinceReorder, sizeof(int));
vector<CudaContext*>& contexts = cu.getPlatformData().contexts; vector<CudaContext*>& contexts = cu.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++) { for (int i = 0; i < (int) contexts.size(); i++) {
contexts[i]->setTime(time); contexts[i]->setTime(time);
contexts[i]->setStepCount(stepCount); contexts[i]->setStepCount(stepCount);
contexts[i]->setComputeForceCount(computeForceCount); contexts[i]->setStepsSinceReorder(stepsSinceReorder);
} }
char* buffer = (char*) cu.getPinnedBuffer(); char* buffer = (char*) cu.getPinnedBuffer();
stream.read(buffer, cu.getPosq().getSize()*cu.getPosq().getElementSize()); stream.read(buffer, cu.getPosq().getSize()*cu.getPosq().getElementSize());
...@@ -4134,6 +4130,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn ...@@ -4134,6 +4130,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
cu.setTime(cu.getTime()+dt); cu.setTime(cu.getTime()+dt);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
} }
double CudaIntegrateVerletStepKernel::computeKineticEnergy(ContextImpl& context, const VerletIntegrator& integrator) { double CudaIntegrateVerletStepKernel::computeKineticEnergy(ContextImpl& context, const VerletIntegrator& integrator) {
...@@ -4221,6 +4218,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev ...@@ -4221,6 +4218,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
cu.setTime(cu.getTime()+stepSize); cu.setTime(cu.getTime()+stepSize);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
} }
double CudaIntegrateLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const LangevinIntegrator& integrator) { double CudaIntegrateLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const LangevinIntegrator& integrator) {
...@@ -4283,6 +4281,7 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni ...@@ -4283,6 +4281,7 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
cu.setTime(cu.getTime()+stepSize); cu.setTime(cu.getTime()+stepSize);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
} }
double CudaIntegrateBrownianStepKernel::computeKineticEnergy(ContextImpl& context, const BrownianIntegrator& integrator) { double CudaIntegrateBrownianStepKernel::computeKineticEnergy(ContextImpl& context, const BrownianIntegrator& integrator) {
...@@ -4363,6 +4362,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons ...@@ -4363,6 +4362,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
} }
cu.setTime(time); cu.setTime(time);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
return dt; return dt;
} }
...@@ -4457,6 +4457,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co ...@@ -4457,6 +4457,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
} }
cu.setTime(time); cu.setTime(time);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
return dt; return dt;
} }
...@@ -5129,6 +5130,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat ...@@ -5129,6 +5130,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
cu.setTime(cu.getTime()+integrator.getStepSize()); cu.setTime(cu.getTime()+integrator.getStepSize());
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
} }
double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) { double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) {
...@@ -5359,40 +5361,12 @@ void CudaApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, d ...@@ -5359,40 +5361,12 @@ void CudaApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, d
void CudaApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) { void CudaApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
cu.setAsCurrent(); cu.setAsCurrent();
if (cu.getAtomsWereReordered()) { int bytesToCopy = cu.getPosq().getSize()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
// The atoms were reordered since we saved the positions, so we need to fix them. CUresult result = cuMemcpyDtoD(cu.getPosq().getDevicePointer(), savedPositions->getDevicePointer(), bytesToCopy);
if (result != CUDA_SUCCESS) {
const vector<int> atomOrder = cu.getAtomIndex(); std::stringstream m;
int numAtoms = cu.getNumAtoms(); m<<"Error restoring positions for MC barostat: "<<cu.getErrorString(result)<<" ("<<result<<")";
if (cu.getUseDoublePrecision()) { throw OpenMMException(m.str());
double4* pos = (double4*) cu.getPinnedBuffer();
savedPositions->download(pos);
vector<double4> fixedPos(cu.getPaddedNumAtoms());
for (int i = 0; i < numAtoms; i++)
fixedPos[lastAtomOrder[i]] = pos[i];
for (int i = 0; i < numAtoms; i++)
pos[i] = fixedPos[atomOrder[i]];
cu.getPosq().upload(pos);
}
else {
float4* pos = (float4*) cu.getPinnedBuffer();
savedPositions->download(pos);
vector<float4> fixedPos(cu.getPaddedNumAtoms());
for (int i = 0; i < numAtoms; i++)
fixedPos[lastAtomOrder[i]] = pos[i];
for (int i = 0; i < numAtoms; i++)
pos[i] = fixedPos[atomOrder[i]];
cu.getPosq().upload(pos);
}
}
else {
int bytesToCopy = cu.getPosq().getSize()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
CUresult result = cuMemcpyDtoD(cu.getPosq().getDevicePointer(), savedPositions->getDevicePointer(), bytesToCopy);
if (result != CUDA_SUCCESS) {
std::stringstream m;
m<<"Error restoring positions for MC barostat: "<<cu.getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(m.str());
}
} }
} }
......
...@@ -739,12 +739,6 @@ void testChangingParameters() { ...@@ -739,12 +739,6 @@ void testChangingParameters() {
nonbonded->getParticleParameters(i, charge, sigma, epsilon); nonbonded->getParticleParameters(i, charge, sigma, epsilon);
nonbonded->setParticleParameters(i, 1.5*charge, 1.1*sigma, 1.7*epsilon); nonbonded->setParticleParameters(i, 1.5*charge, 1.1*sigma, 1.7*epsilon);
} }
double total = 0;
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
nonbonded->getParticleParameters(i, charge, sigma, epsilon);
total += charge;
}
nonbonded->updateParametersInContext(cuContext); nonbonded->updateParametersInContext(cuContext);
nonbonded->updateParametersInContext(referenceContext); nonbonded->updateParametersInContext(referenceContext);
cuState = cuContext.getState(State::Forces | State::Energy); cuState = cuContext.getState(State::Forces | State::Energy);
......
...@@ -66,7 +66,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i ...@@ -66,7 +66,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
} }
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData) : OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), atomsWereReordered(false), posq(NULL), system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), posq(NULL),
posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL), posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL),
expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (precision == "single") { if (precision == "single") {
...@@ -407,7 +407,6 @@ void OpenCLContext::initialize() { ...@@ -407,7 +407,6 @@ void OpenCLContext::initialize() {
atomIndex[i] = i; atomIndex[i] = i;
atomIndexDevice->upload(atomIndex); atomIndexDevice->upload(atomIndex);
findMoleculeGroups(); findMoleculeGroups();
moleculesInvalid = false;
nonbonded->initialize(system); nonbonded->initialize(system);
} }
...@@ -826,11 +825,6 @@ void OpenCLContext::findMoleculeGroups() { ...@@ -826,11 +825,6 @@ void OpenCLContext::findMoleculeGroups() {
} }
void OpenCLContext::invalidateMolecules() { void OpenCLContext::invalidateMolecules() {
moleculesInvalid = true;
}
void OpenCLContext::validateMolecules() {
moleculesInvalid = false;
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff()) if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
return; return;
bool valid = true; bool valid = true;
...@@ -930,24 +924,28 @@ void OpenCLContext::validateMolecules() { ...@@ -930,24 +924,28 @@ void OpenCLContext::validateMolecules() {
findMoleculeGroups(); findMoleculeGroups();
for (int i = 0; i < (int) reorderListeners.size(); i++) for (int i = 0; i < (int) reorderListeners.size(); i++)
reorderListeners[i]->execute(); reorderListeners[i]->execute();
reorderAtoms();
} }
void OpenCLContext::reorderAtoms(bool enforcePeriodic) { void OpenCLContext::reorderAtoms() {
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff()) atomsWereReordered = false;
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff() || stepsSinceReorder < 100) {
stepsSinceReorder++;
return; return;
if (moleculesInvalid) }
validateMolecules();
atomsWereReordered = true; atomsWereReordered = true;
stepsSinceReorder = 0;
if (useDoublePrecision) if (useDoublePrecision)
reorderAtomsImpl<cl_double, mm_double4, cl_double, mm_double4>(enforcePeriodic); reorderAtomsImpl<cl_double, mm_double4, cl_double, mm_double4>();
else if (useMixedPrecision) else if (useMixedPrecision)
reorderAtomsImpl<cl_float, mm_float4, cl_double, mm_double4>(enforcePeriodic); reorderAtomsImpl<cl_float, mm_float4, cl_double, mm_double4>();
else else
reorderAtomsImpl<cl_float, mm_float4, cl_float, mm_float4>(enforcePeriodic); reorderAtomsImpl<cl_float, mm_float4, cl_float, mm_float4>();
nonbonded->updateNeighborListSize();
} }
template <class Real, class Real4, class Mixed, class Mixed4> template <class Real, class Real4, class Mixed, class Mixed4>
void OpenCLContext::reorderAtomsImpl(bool enforcePeriodic) { void OpenCLContext::reorderAtomsImpl() {
// Find the range of positions and the number of bins along each axis. // Find the range of positions and the number of bins along each axis.
...@@ -1023,18 +1021,16 @@ void OpenCLContext::reorderAtomsImpl(bool enforcePeriodic) { ...@@ -1023,18 +1021,16 @@ void OpenCLContext::reorderAtomsImpl(bool enforcePeriodic) {
molPos[i].x -= dx; molPos[i].x -= dx;
molPos[i].y -= dy; molPos[i].y -= dy;
molPos[i].z -= dz; molPos[i].z -= dz;
if (enforcePeriodic) { for (int j = 0; j < (int) atoms.size(); j++) {
for (int j = 0; j < (int) atoms.size(); j++) { int atom = atoms[j]+mol.offsets[i];
int atom = atoms[j]+mol.offsets[i]; Real4 p = oldPosq[atom];
Real4 p = oldPosq[atom]; p.x -= dx;
p.x -= dx; p.y -= dy;
p.y -= dy; p.z -= dz;
p.z -= dz; oldPosq[atom] = p;
oldPosq[atom] = p; posCellOffsets[atom].x -= xcell;
posCellOffsets[atom].x -= xcell; posCellOffsets[atom].y -= ycell;
posCellOffsets[atom].y -= ycell; posCellOffsets[atom].z -= zcell;
posCellOffsets[atom].z -= zcell;
}
} }
} }
} }
......
...@@ -381,6 +381,18 @@ public: ...@@ -381,6 +381,18 @@ public:
void setComputeForceCount(int count) { void setComputeForceCount(int count) {
computeForceCount = count; computeForceCount = count;
} }
/**
* Get the number of time steps since the atoms were reordered.
*/
int getStepsSinceReorder() const {
return stepsSinceReorder;
}
/**
* Set the number of time steps since the atoms were reordered.
*/
void setStepsSinceReorder(int steps) {
stepsSinceReorder = steps;
}
/** /**
* Get the number of atoms. * Get the number of atoms.
*/ */
...@@ -529,10 +541,8 @@ public: ...@@ -529,10 +541,8 @@ public:
/** /**
* Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close * Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
* together in the arrays. * together in the arrays.
*
* @param enforcePeriodic if true, the atom positions may be altered to enforce periodic boundary conditions
*/ */
void reorderAtoms(bool enforcePeriodic); void reorderAtoms();
/** /**
* Add a listener that should be called whenever atoms get reordered. The OpenCLContext * Add a listener that should be called whenever atoms get reordered. The OpenCLContext
* assumes ownership of the object, and deletes it when the context itself is deleted. * assumes ownership of the object, and deletes it when the context itself is deleted.
...@@ -547,15 +557,9 @@ public: ...@@ -547,15 +557,9 @@ public:
/** /**
* Mark that the current molecule definitions (and hence the atom order) may be invalid. * Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions * This should be called whenever force field parameters change. It will cause the definitions
* and order to be revalidated the next to reorderAtoms() is called. * and order to be revalidated.
*/ */
void invalidateMolecules(); void invalidateMolecules();
/**
* Get whether the current molecule definitions are valid.
*/
bool getMoleculesAreInvalid() {
return moleculesInvalid;
}
private: private:
struct Molecule; struct Molecule;
struct MoleculeGroup; struct MoleculeGroup;
...@@ -572,7 +576,7 @@ private: ...@@ -572,7 +576,7 @@ private:
* This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use. * This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use.
*/ */
template <class Real, class Real4, class Mixed, class Mixed4> template <class Real, class Real4, class Mixed, class Mixed4>
void reorderAtomsImpl(bool enforcePeriodic); void reorderAtomsImpl();
const System& system; const System& system;
double time; double time;
OpenCLPlatform::PlatformData& platformData; OpenCLPlatform::PlatformData& platformData;
...@@ -580,13 +584,14 @@ private: ...@@ -580,13 +584,14 @@ private:
int contextIndex; int contextIndex;
int stepCount; int stepCount;
int computeForceCount; int computeForceCount;
int stepsSinceReorder;
int numAtoms; int numAtoms;
int paddedNumAtoms; int paddedNumAtoms;
int numAtomBlocks; int numAtomBlocks;
int numThreadBlocks; int numThreadBlocks;
int numForceBuffers; int numForceBuffers;
int simdWidth; int simdWidth;
bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered, moleculesInvalid; bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered;
mm_float4 periodicBoxSize, invPeriodicBoxSize; mm_float4 periodicBoxSize, invPeriodicBoxSize;
mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble; mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble;
std::string defaultOptimizationOptions; std::string defaultOptimizationOptions;
......
...@@ -106,11 +106,6 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -106,11 +106,6 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) { void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities(); OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
bool includeNonbonded = ((groups&(1<<nb.getForceGroup())) != 0); bool includeNonbonded = ((groups&(1<<nb.getForceGroup())) != 0);
cl.setAtomsWereReordered(false);
if (nb.getUseCutoff() && includeNonbonded && (cl.getMoleculesAreInvalid() || cl.getComputeForceCount()%100 == 0)) {
cl.reorderAtoms(!cl.getMoleculesAreInvalid());
nb.updateNeighborListSize();
}
cl.setComputeForceCount(cl.getComputeForceCount()+1); cl.setComputeForceCount(cl.getComputeForceCount()+1);
cl.clearAutoclearBuffers(); cl.clearAutoclearBuffers();
if (includeNonbonded) if (includeNonbonded)
...@@ -239,6 +234,7 @@ void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const vecto ...@@ -239,6 +234,7 @@ void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const vecto
} }
for (int i = 0; i < (int) cl.getPosCellOffsets().size(); i++) for (int i = 0; i < (int) cl.getPosCellOffsets().size(); i++)
cl.getPosCellOffsets()[i] = mm_int4(0, 0, 0, 0); cl.getPosCellOffsets()[i] = mm_int4(0, 0, 0, 0);
cl.reorderAtoms();
} }
void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) { void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) {
...@@ -342,8 +338,8 @@ void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream ...@@ -342,8 +338,8 @@ void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream
stream.write((char*) &time, sizeof(double)); stream.write((char*) &time, sizeof(double));
int stepCount = cl.getStepCount(); int stepCount = cl.getStepCount();
stream.write((char*) &stepCount, sizeof(int)); stream.write((char*) &stepCount, sizeof(int));
int computeForceCount = cl.getComputeForceCount(); int stepsSinceReorder = cl.getStepsSinceReorder();
stream.write((char*) &computeForceCount, sizeof(int)); stream.write((char*) &stepsSinceReorder, sizeof(int));
char* buffer = (char*) cl.getPinnedBuffer(); char* buffer = (char*) cl.getPinnedBuffer();
cl.getPosq().download(buffer); cl.getPosq().download(buffer);
stream.write(buffer, cl.getPosq().getSize()*cl.getPosq().getElementSize()); stream.write(buffer, cl.getPosq().getSize()*cl.getPosq().getElementSize());
...@@ -373,14 +369,14 @@ void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& ...@@ -373,14 +369,14 @@ void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream&
throw OpenMMException("Checkpoint was created with a different numeric precision"); throw OpenMMException("Checkpoint was created with a different numeric precision");
double time; double time;
stream.read((char*) &time, sizeof(double)); stream.read((char*) &time, sizeof(double));
int stepCount, computeForceCount; int stepCount, stepsSinceReorder;
stream.read((char*) &stepCount, sizeof(int)); stream.read((char*) &stepCount, sizeof(int));
stream.read((char*) &computeForceCount, sizeof(int)); stream.read((char*) &stepsSinceReorder, sizeof(int));
vector<OpenCLContext*>& contexts = cl.getPlatformData().contexts; vector<OpenCLContext*>& contexts = cl.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++) { for (int i = 0; i < (int) contexts.size(); i++) {
contexts[i]->setTime(time); contexts[i]->setTime(time);
contexts[i]->setStepCount(stepCount); contexts[i]->setStepCount(stepCount);
contexts[i]->setComputeForceCount(computeForceCount); contexts[i]->setStepsSinceReorder(stepsSinceReorder);
} }
char* buffer = (char*) cl.getPinnedBuffer(); char* buffer = (char*) cl.getPinnedBuffer();
stream.read(buffer, cl.getPosq().getSize()*cl.getPosq().getElementSize()); stream.read(buffer, cl.getPosq().getSize()*cl.getPosq().getElementSize());
...@@ -4296,6 +4292,7 @@ void OpenCLIntegrateVerletStepKernel::execute(ContextImpl& context, const Verlet ...@@ -4296,6 +4292,7 @@ void OpenCLIntegrateVerletStepKernel::execute(ContextImpl& context, const Verlet
cl.setTime(cl.getTime()+dt); cl.setTime(cl.getTime()+dt);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag. // Reduce UI lag.
...@@ -4395,6 +4392,7 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang ...@@ -4395,6 +4392,7 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
cl.setTime(cl.getTime()+stepSize); cl.setTime(cl.getTime()+stepSize);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag. // Reduce UI lag.
...@@ -4473,6 +4471,7 @@ void OpenCLIntegrateBrownianStepKernel::execute(ContextImpl& context, const Brow ...@@ -4473,6 +4471,7 @@ void OpenCLIntegrateBrownianStepKernel::execute(ContextImpl& context, const Brow
cl.setTime(cl.getTime()+stepSize); cl.setTime(cl.getTime()+stepSize);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag. // Reduce UI lag.
...@@ -4578,6 +4577,7 @@ double OpenCLIntegrateVariableVerletStepKernel::execute(ContextImpl& context, co ...@@ -4578,6 +4577,7 @@ double OpenCLIntegrateVariableVerletStepKernel::execute(ContextImpl& context, co
} }
cl.setTime(time); cl.setTime(time);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
return dt; return dt;
} }
...@@ -4691,6 +4691,7 @@ double OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, ...@@ -4691,6 +4691,7 @@ double OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context,
} }
cl.setTime(time); cl.setTime(time);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
return dt; return dt;
} }
...@@ -5354,6 +5355,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -5354,6 +5355,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
cl.setTime(cl.getTime()+integrator.getStepSize()); cl.setTime(cl.getTime()+integrator.getStepSize());
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag. // Reduce UI lag.
...@@ -5580,34 +5582,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, ...@@ -5580,34 +5582,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
} }
void OpenCLApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) { void OpenCLApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
if (cl.getAtomsWereReordered()) { cl.getQueue().enqueueCopyBuffer(savedPositions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(), 0, 0, cl.getPosq().getSize()*sizeof(mm_float4));
// The atoms were reordered since we saved the positions, so we need to fix them.
const vector<int> atomOrder = cl.getAtomIndex();
int numAtoms = cl.getNumAtoms();
if (cl.getUseDoublePrecision()) {
mm_double4* pos = (mm_double4*) cl.getPinnedBuffer();
savedPositions->download(pos);
vector<mm_double4> fixedPos(cl.getPaddedNumAtoms());
for (int i = 0; i < numAtoms; i++)
fixedPos[lastAtomOrder[i]] = pos[i];
for (int i = 0; i < numAtoms; i++)
pos[i] = fixedPos[atomOrder[i]];
cl.getPosq().upload(pos);
}
else {
mm_float4* pos = (mm_float4*) cl.getPinnedBuffer();
savedPositions->download(pos);
vector<mm_float4> fixedPos(cl.getPaddedNumAtoms());
for (int i = 0; i < numAtoms; i++)
fixedPos[lastAtomOrder[i]] = pos[i];
for (int i = 0; i < numAtoms; i++)
pos[i] = fixedPos[atomOrder[i]];
cl.getPosq().upload(pos);
}
}
else
cl.getQueue().enqueueCopyBuffer(savedPositions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(), 0, 0, cl.getPosq().getSize()*sizeof(mm_float4));
} }
OpenCLRemoveCMMotionKernel::~OpenCLRemoveCMMotionKernel() { OpenCLRemoveCMMotionKernel::~OpenCLRemoveCMMotionKernel() {
......
...@@ -742,12 +742,6 @@ void testChangingParameters() { ...@@ -742,12 +742,6 @@ void testChangingParameters() {
nonbonded->getParticleParameters(i, charge, sigma, epsilon); nonbonded->getParticleParameters(i, charge, sigma, epsilon);
nonbonded->setParticleParameters(i, 1.5*charge, 1.1*sigma, 1.7*epsilon); nonbonded->setParticleParameters(i, 1.5*charge, 1.1*sigma, 1.7*epsilon);
} }
double total = 0;
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
nonbonded->getParticleParameters(i, charge, sigma, epsilon);
total += charge;
}
nonbonded->updateParametersInContext(clContext); nonbonded->updateParametersInContext(clContext);
nonbonded->updateParametersInContext(referenceContext); nonbonded->updateParametersInContext(referenceContext);
clState = clContext.getState(State::Forces | State::Energy); clState = clContext.getState(State::Forces | State::Energy);
......
...@@ -358,33 +358,13 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D ...@@ -358,33 +358,13 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
cu.setTime(cu.getTime()+stepSize); cu.setTime(cu.getTime()+stepSize);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
} }
double CudaIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) { double CudaIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize()); return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
} }
class CudaIntegrateDrudeSCFStepKernel::ReorderListener : public CudaContext::ReorderListener {
public:
ReorderListener(CudaContext& cu, const vector<int>& drudeParticles, vector<int>& reorderedDrudeParticles) :
cu(cu), drudeParticles(drudeParticles), reorderedDrudeParticles(reorderedDrudeParticles) {
}
void execute() {
const vector<int>& order = cu.getAtomIndex();
int numParticles = order.size();
vector<int> inverseOrder(numParticles);
for (int i = 0; i < numParticles; i++)
inverseOrder[order[i]] = i;
int numDrudeParticles = drudeParticles.size();
for (int i = 0; i < numDrudeParticles; i++)
reorderedDrudeParticles[i] = inverseOrder[drudeParticles[i]];
}
private:
CudaContext& cu;
const vector<int>& drudeParticles;
vector<int>& reorderedDrudeParticles;
};
CudaIntegrateDrudeSCFStepKernel::~CudaIntegrateDrudeSCFStepKernel() { CudaIntegrateDrudeSCFStepKernel::~CudaIntegrateDrudeSCFStepKernel() {
if (minimizerPos != NULL) if (minimizerPos != NULL)
lbfgs_free(minimizerPos); lbfgs_free(minimizerPos);
...@@ -401,9 +381,7 @@ void CudaIntegrateDrudeSCFStepKernel::initialize(const System& system, const Dru ...@@ -401,9 +381,7 @@ void CudaIntegrateDrudeSCFStepKernel::initialize(const System& system, const Dru
double charge, polarizability, aniso12, aniso34; double charge, polarizability, aniso12, aniso34;
force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34); force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
drudeParticles.push_back(p); drudeParticles.push_back(p);
reorderedDrudeParticles.push_back(p);
} }
cu.addReorderListener(new ReorderListener(cu, drudeParticles, reorderedDrudeParticles));
// Initialize the energy minimizer. // Initialize the energy minimizer.
...@@ -469,6 +447,7 @@ void CudaIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeS ...@@ -469,6 +447,7 @@ void CudaIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeS
cu.setTime(cu.getTime()+dt); cu.setTime(cu.getTime()+dt);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
} }
double CudaIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator) { double CudaIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
...@@ -478,39 +457,36 @@ double CudaIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& contex ...@@ -478,39 +457,36 @@ double CudaIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& contex
struct MinimizerData { struct MinimizerData {
ContextImpl& context; ContextImpl& context;
CudaContext& cu; CudaContext& cu;
vector<int>& reorderedDrudeParticles; vector<int>& drudeParticles;
MinimizerData(ContextImpl& context, CudaContext& cu, vector<int>& reorderedDrudeParticles) : context(context), cu(cu), reorderedDrudeParticles(reorderedDrudeParticles) {} MinimizerData(ContextImpl& context, CudaContext& cu, vector<int>& drudeParticles) : context(context), cu(cu), drudeParticles(drudeParticles) {}
}; };
static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) { static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
MinimizerData* data = reinterpret_cast<MinimizerData*>(instance); MinimizerData* data = reinterpret_cast<MinimizerData*>(instance);
ContextImpl& context = data->context; ContextImpl& context = data->context;
CudaContext& cu = data->cu; CudaContext& cu = data->cu;
vector<int>& reorderedDrudeParticles = data->reorderedDrudeParticles; vector<int>& drudeParticles = data->drudeParticles;
int numDrudeParticles = reorderedDrudeParticles.size(); int numDrudeParticles = drudeParticles.size();
// Set the particle positions. // Set the particle positions.
cu.getPosq().download(cu.getPinnedBuffer()); cu.getPosq().download(cu.getPinnedBuffer());
double4 periodicBoxSize = cu.getPeriodicBoxSize();
if (cu.getUseDoublePrecision()) { if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer(); double4* posq = (double4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
double4& p = posq[reorderedDrudeParticles[i]]; double4& p = posq[drudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]]; p.x = x[3*i];
p.x = x[3*i]+offset.x*periodicBoxSize.x; p.y = x[3*i+1];
p.y = x[3*i+1]+offset.y*periodicBoxSize.y; p.z = x[3*i+2];
p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
} }
} }
else { else {
float4* posq = (float4*) cu.getPinnedBuffer(); float4* posq = (float4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
float4& p = posq[reorderedDrudeParticles[i]]; float4& p = posq[drudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]]; p.x = x[3*i];
p.x = x[3*i]+offset.x*periodicBoxSize.x; p.y = x[3*i+1];
p.y = x[3*i+1]+offset.y*periodicBoxSize.y; p.z = x[3*i+2];
p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
} }
} }
cu.getPosq().upload(cu.getPinnedBuffer()); cu.getPosq().upload(cu.getPinnedBuffer());
...@@ -523,7 +499,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf ...@@ -523,7 +499,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
double forceScale = -1.0/0x100000000; double forceScale = -1.0/0x100000000;
int paddedNumAtoms = cu.getPaddedNumAtoms(); int paddedNumAtoms = cu.getPaddedNumAtoms();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
int index = reorderedDrudeParticles[i]; int index = drudeParticles[i];
g[3*i] = forceScale*force[index]; g[3*i] = forceScale*force[index];
g[3*i+1] = forceScale*force[index+paddedNumAtoms]; g[3*i+1] = forceScale*force[index+paddedNumAtoms];
g[3*i+2] = forceScale*force[index+paddedNumAtoms*2]; g[3*i+2] = forceScale*force[index+paddedNumAtoms*2];
...@@ -534,27 +510,24 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf ...@@ -534,27 +510,24 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
void CudaIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) { void CudaIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
// Record the initial positions. // Record the initial positions.
int numDrudeParticles = reorderedDrudeParticles.size(); int numDrudeParticles = drudeParticles.size();
cu.getPosq().download(cu.getPinnedBuffer()); cu.getPosq().download(cu.getPinnedBuffer());
double4 periodicBoxSize = cu.getPeriodicBoxSize();
if (cu.getUseDoublePrecision()) { if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer(); double4* posq = (double4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
double4 p = posq[reorderedDrudeParticles[i]]; double4 p = posq[drudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]]; minimizerPos[3*i] = p.x;
minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x; minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y; minimizerPos[3*i+2] = p.z;
minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
} }
} }
else { else {
float4* posq = (float4*) cu.getPinnedBuffer(); float4* posq = (float4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
float4 p = posq[reorderedDrudeParticles[i]]; float4 p = posq[drudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]]; minimizerPos[3*i] = p.x;
minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x; minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y; minimizerPos[3*i+2] = p.z;
minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
} }
minimizerParams.xtol = 1e-7; minimizerParams.xtol = 1e-7;
} }
...@@ -571,6 +544,6 @@ void CudaIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tole ...@@ -571,6 +544,6 @@ void CudaIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tole
// Perform the minimization. // Perform the minimization.
lbfgsfloatval_t fx; lbfgsfloatval_t fx;
MinimizerData data(context, cu, reorderedDrudeParticles); MinimizerData data(context, cu, drudeParticles);
lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams); lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams);
} }
\ No newline at end of file
...@@ -148,12 +148,10 @@ public: ...@@ -148,12 +148,10 @@ public:
*/ */
double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator); double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator);
private: private:
class ReorderListener;
void minimize(ContextImpl& context, double tolerance); void minimize(ContextImpl& context, double tolerance);
CudaContext& cu; CudaContext& cu;
double prevStepSize; double prevStepSize;
std::vector<int> drudeParticles; std::vector<int> drudeParticles;
std::vector<int> reorderedDrudeParticles;
lbfgsfloatval_t *minimizerPos; lbfgsfloatval_t *minimizerPos;
lbfgs_parameter_t minimizerParams; lbfgs_parameter_t minimizerParams;
CUfunction kernel1, kernel2; CUfunction kernel1, kernel2;
......
...@@ -364,33 +364,13 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const ...@@ -364,33 +364,13 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
cl.setTime(cl.getTime()+stepSize); cl.setTime(cl.getTime()+stepSize);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
} }
double OpenCLIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) { double OpenCLIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize()); return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
} }
class OpenCLIntegrateDrudeSCFStepKernel::ReorderListener : public OpenCLContext::ReorderListener {
public:
ReorderListener(OpenCLContext& cl, const vector<int>& drudeParticles, vector<int>& reorderedDrudeParticles) :
cl(cl), drudeParticles(drudeParticles), reorderedDrudeParticles(reorderedDrudeParticles) {
}
void execute() {
const vector<int>& order = cl.getAtomIndex();
int numParticles = order.size();
vector<int> inverseOrder(numParticles);
for (int i = 0; i < numParticles; i++)
inverseOrder[order[i]] = i;
int numDrudeParticles = drudeParticles.size();
for (int i = 0; i < numDrudeParticles; i++)
reorderedDrudeParticles[i] = inverseOrder[drudeParticles[i]];
}
private:
OpenCLContext& cl;
const vector<int>& drudeParticles;
vector<int>& reorderedDrudeParticles;
};
OpenCLIntegrateDrudeSCFStepKernel::~OpenCLIntegrateDrudeSCFStepKernel() { OpenCLIntegrateDrudeSCFStepKernel::~OpenCLIntegrateDrudeSCFStepKernel() {
if (minimizerPos != NULL) if (minimizerPos != NULL)
lbfgs_free(minimizerPos); lbfgs_free(minimizerPos);
...@@ -406,9 +386,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::initialize(const System& system, const D ...@@ -406,9 +386,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::initialize(const System& system, const D
double charge, polarizability, aniso12, aniso34; double charge, polarizability, aniso12, aniso34;
force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34); force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
drudeParticles.push_back(p); drudeParticles.push_back(p);
reorderedDrudeParticles.push_back(p);
} }
cl.addReorderListener(new ReorderListener(cl, drudeParticles, reorderedDrudeParticles));
// Initialize the energy minimizer. // Initialize the energy minimizer.
...@@ -481,6 +459,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const Drud ...@@ -481,6 +459,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const Drud
cl.setTime(cl.getTime()+dt); cl.setTime(cl.getTime()+dt);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag. // Reduce UI lag.
...@@ -496,39 +475,36 @@ double OpenCLIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& cont ...@@ -496,39 +475,36 @@ double OpenCLIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& cont
struct MinimizerData { struct MinimizerData {
ContextImpl& context; ContextImpl& context;
OpenCLContext& cl; OpenCLContext& cl;
vector<int>& reorderedDrudeParticles; vector<int>& drudeParticles;
MinimizerData(ContextImpl& context, OpenCLContext& cl, vector<int>& reorderedDrudeParticles) : context(context), cl(cl), reorderedDrudeParticles(reorderedDrudeParticles) {} MinimizerData(ContextImpl& context, OpenCLContext& cl, vector<int>& drudeParticles) : context(context), cl(cl), drudeParticles(drudeParticles) {}
}; };
static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) { static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
MinimizerData* data = reinterpret_cast<MinimizerData*>(instance); MinimizerData* data = reinterpret_cast<MinimizerData*>(instance);
ContextImpl& context = data->context; ContextImpl& context = data->context;
OpenCLContext& cl = data->cl; OpenCLContext& cl = data->cl;
vector<int>& reorderedDrudeParticles = data->reorderedDrudeParticles; vector<int>& drudeParticles = data->drudeParticles;
int numDrudeParticles = reorderedDrudeParticles.size(); int numDrudeParticles = drudeParticles.size();
// Set the particle positions. // Set the particle positions.
cl.getPosq().download(cl.getPinnedBuffer()); cl.getPosq().download(cl.getPinnedBuffer());
mm_double4 periodicBoxSize = cl.getPeriodicBoxSizeDouble();
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer(); mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
mm_double4& p = posq[reorderedDrudeParticles[i]]; mm_double4& p = posq[drudeParticles[i]];
mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]]; p.x = x[3*i];
p.x = x[3*i]+offset.x*periodicBoxSize.x; p.y = x[3*i+1];
p.y = x[3*i+1]+offset.y*periodicBoxSize.y; p.z = x[3*i+2];
p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
} }
} }
else { else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer(); mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
mm_float4& p = posq[reorderedDrudeParticles[i]]; mm_float4& p = posq[drudeParticles[i]];
mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]]; p.x = x[3*i];
p.x = x[3*i]+offset.x*periodicBoxSize.x; p.y = x[3*i+1];
p.y = x[3*i+1]+offset.y*periodicBoxSize.y; p.z = x[3*i+2];
p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
} }
} }
cl.getPosq().upload(cl.getPinnedBuffer()); cl.getPosq().upload(cl.getPinnedBuffer());
...@@ -540,7 +516,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf ...@@ -540,7 +516,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
mm_double4* force = (mm_double4*) cl.getPinnedBuffer(); mm_double4* force = (mm_double4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
int index = reorderedDrudeParticles[i]; int index = drudeParticles[i];
g[3*i] = -force[index].x; g[3*i] = -force[index].x;
g[3*i+1] = -force[index].y; g[3*i+1] = -force[index].y;
g[3*i+2] = -force[index].z; g[3*i+2] = -force[index].z;
...@@ -549,7 +525,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf ...@@ -549,7 +525,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
else { else {
mm_float4* force = (mm_float4*) cl.getPinnedBuffer(); mm_float4* force = (mm_float4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
int index = reorderedDrudeParticles[i]; int index = drudeParticles[i];
g[3*i] = -force[index].x; g[3*i] = -force[index].x;
g[3*i+1] = -force[index].y; g[3*i+1] = -force[index].y;
g[3*i+2] = -force[index].z; g[3*i+2] = -force[index].z;
...@@ -561,27 +537,24 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf ...@@ -561,27 +537,24 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) { void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
// Record the initial positions. // Record the initial positions.
int numDrudeParticles = reorderedDrudeParticles.size(); int numDrudeParticles = drudeParticles.size();
cl.getPosq().download(cl.getPinnedBuffer()); cl.getPosq().download(cl.getPinnedBuffer());
mm_double4 periodicBoxSize = cl.getPeriodicBoxSizeDouble();
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer(); mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
mm_double4 p = posq[reorderedDrudeParticles[i]]; mm_double4 p = posq[drudeParticles[i]];
mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]]; minimizerPos[3*i] = p.x;
minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x; minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y; minimizerPos[3*i+2] = p.z;
minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
} }
} }
else { else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer(); mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
mm_float4 p = posq[reorderedDrudeParticles[i]]; mm_float4 p = posq[drudeParticles[i]];
mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]]; minimizerPos[3*i] = p.x;
minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x; minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y; minimizerPos[3*i+2] = p.z;
minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
} }
minimizerParams.xtol = 1e-7; minimizerParams.xtol = 1e-7;
} }
...@@ -598,6 +571,6 @@ void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double to ...@@ -598,6 +571,6 @@ void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double to
// Perform the minimization. // Perform the minimization.
lbfgsfloatval_t fx; lbfgsfloatval_t fx;
MinimizerData data(context, cl, reorderedDrudeParticles); MinimizerData data(context, cl, drudeParticles);
lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams); lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams);
} }
\ No newline at end of file
...@@ -149,13 +149,11 @@ public: ...@@ -149,13 +149,11 @@ public:
*/ */
double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator); double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator);
private: private:
class ReorderListener;
void minimize(ContextImpl& context, double tolerance); void minimize(ContextImpl& context, double tolerance);
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernels; bool hasInitializedKernels;
double prevStepSize; double prevStepSize;
std::vector<int> drudeParticles; std::vector<int> drudeParticles;
std::vector<int> reorderedDrudeParticles;
lbfgsfloatval_t *minimizerPos; lbfgsfloatval_t *minimizerPos;
lbfgs_parameter_t minimizerParams; lbfgs_parameter_t minimizerParams;
cl::Kernel kernel1, kernel2; cl::Kernel kernel1, kernel2;
......
...@@ -132,6 +132,7 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt ...@@ -132,6 +132,7 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
} }
void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) { void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities(); CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
// Loop over copies and compute the force on each one. // Loop over copies and compute the force on each one.
...@@ -178,6 +179,15 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr ...@@ -178,6 +179,15 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
cu.setTime(cu.getTime()+dt); cu.setTime(cu.getTime()+dt);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
if (cu.getAtomsWereReordered() && cu.getNonbondedUtilities().getUsePeriodic()) {
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
int i = numCopies-1;
void* args[] = {&positions->getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(translateKernel, args, cu.getNumAtoms());
}
} }
void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
...@@ -188,13 +198,6 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { ...@@ -188,13 +198,6 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
context.computeVirtualSites(); context.computeVirtualSites();
context.updateContextState(); context.updateContextState();
context.calcForcesAndEnergy(true, false); context.calcForcesAndEnergy(true, false);
if (cu.getAtomsWereReordered() && cu.getNonbondedUtilities().getUsePeriodic()) {
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
void* args[] = {&positions->getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(translateKernel, args, cu.getNumAtoms());
}
void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &forces->getDevicePointer(), &cu.getVelm().getDevicePointer(), void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &forces->getDevicePointer(), &cu.getVelm().getDevicePointer(),
&velocities->getDevicePointer(), &cu.getPosq().getDevicePointer(), &positions->getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i}; &velocities->getDevicePointer(), &cu.getPosq().getDevicePointer(), &positions->getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms()); cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms());
......
...@@ -190,6 +190,14 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte ...@@ -190,6 +190,14 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
cl.setTime(cl.getTime()+dt); cl.setTime(cl.getTime()+dt);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
if (cl.getAtomsWereReordered() && cl.getNonbondedUtilities().getUsePeriodic()) {
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
translateKernel.setArg<cl_int>(3, numCopies-1);
cl.executeKernel(translateKernel, cl.getNumAtoms());
}
} }
void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
...@@ -199,13 +207,6 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { ...@@ -199,13 +207,6 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
context.computeVirtualSites(); context.computeVirtualSites();
context.updateContextState(); context.updateContextState();
context.calcForcesAndEnergy(true, false); context.calcForcesAndEnergy(true, false);
if (cl.getAtomsWereReordered() && cl.getNonbondedUtilities().getUsePeriodic()) {
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
translateKernel.setArg<cl_int>(3, i);
cl.executeKernel(translateKernel, cl.getNumAtoms());
}
copyFromContextKernel.setArg<cl_int>(7, i); copyFromContextKernel.setArg<cl_int>(7, i);
cl.executeKernel(copyFromContextKernel, cl.getNumAtoms()); cl.executeKernel(copyFromContextKernel, cl.getNumAtoms());
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment