"vscode:/vscode.git/clone" did not exist on "2c55959680b1436a3c5d70e27c39bdad9b2c311c"
Commit 18f78c1e authored by peastman's avatar peastman
Browse files

Integrators control when atom reordering gets done, instead of having it...

Integrators control when atom reordering gets done, instead of having it happen automatically when forces are evaluated.  This simplifies some code, and also improves unintuitive behavior with CustomIntegrator.
parent 8a8873c6
...@@ -67,7 +67,7 @@ bool CudaContext::hasInitializedCuda = false; ...@@ -67,7 +67,7 @@ bool CudaContext::hasInitializedCuda = false;
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler, CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, CudaPlatform::PlatformData& platformData) : system(system), compiler(compiler), const string& tempDir, CudaPlatform::PlatformData& platformData) : system(system), compiler(compiler),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), contextIsValid(false), atomsWereReordered(false), pinnedBuffer(NULL), posq(NULL), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), pinnedBuffer(NULL), posq(NULL),
posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (!hasInitializedCuda) { if (!hasInitializedCuda) {
CHECK_RESULT2(cuInit(0), "Error initializing CUDA"); CHECK_RESULT2(cuInit(0), "Error initializing CUDA");
...@@ -282,7 +282,6 @@ void CudaContext::initialize() { ...@@ -282,7 +282,6 @@ void CudaContext::initialize() {
atomIndex[i] = i; atomIndex[i] = i;
atomIndexDevice->upload(atomIndex); atomIndexDevice->upload(atomIndex);
findMoleculeGroups(); findMoleculeGroups();
moleculesInvalid = false;
nonbonded->initialize(system); nonbonded->initialize(system);
} }
...@@ -821,11 +820,6 @@ void CudaContext::findMoleculeGroups() { ...@@ -821,11 +820,6 @@ void CudaContext::findMoleculeGroups() {
} }
void CudaContext::invalidateMolecules() { void CudaContext::invalidateMolecules() {
moleculesInvalid = true;
}
void CudaContext::validateMolecules() {
moleculesInvalid = false;
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff()) if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
return; return;
bool valid = true; bool valid = true;
...@@ -925,24 +919,28 @@ void CudaContext::validateMolecules() { ...@@ -925,24 +919,28 @@ void CudaContext::validateMolecules() {
findMoleculeGroups(); findMoleculeGroups();
for (int i = 0; i < (int) reorderListeners.size(); i++) for (int i = 0; i < (int) reorderListeners.size(); i++)
reorderListeners[i]->execute(); reorderListeners[i]->execute();
reorderAtoms();
} }
void CudaContext::reorderAtoms(bool enforcePeriodic) { void CudaContext::reorderAtoms() {
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff()) atomsWereReordered = false;
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff() || stepsSinceReorder < 100) {
stepsSinceReorder++;
return; return;
if (moleculesInvalid) }
validateMolecules();
atomsWereReordered = true; atomsWereReordered = true;
stepsSinceReorder = 0;
if (useDoublePrecision) if (useDoublePrecision)
reorderAtomsImpl<double, double4, double, double4>(enforcePeriodic); reorderAtomsImpl<double, double4, double, double4>();
else if (useMixedPrecision) else if (useMixedPrecision)
reorderAtomsImpl<float, float4, double, double4>(enforcePeriodic); reorderAtomsImpl<float, float4, double, double4>();
else else
reorderAtomsImpl<float, float4, float, float4>(enforcePeriodic); reorderAtomsImpl<float, float4, float, float4>();
nonbonded->updateNeighborListSize();
} }
template <class Real, class Real4, class Mixed, class Mixed4> template <class Real, class Real4, class Mixed, class Mixed4>
void CudaContext::reorderAtomsImpl(bool enforcePeriodic) { void CudaContext::reorderAtomsImpl() {
// Find the range of positions and the number of bins along each axis. // Find the range of positions and the number of bins along each axis.
Real4 padding = {0, 0, 0, 0}; Real4 padding = {0, 0, 0, 0};
...@@ -1019,7 +1017,6 @@ void CudaContext::reorderAtomsImpl(bool enforcePeriodic) { ...@@ -1019,7 +1017,6 @@ void CudaContext::reorderAtomsImpl(bool enforcePeriodic) {
molPos[i].x -= dx; molPos[i].x -= dx;
molPos[i].y -= dy; molPos[i].y -= dy;
molPos[i].z -= dz; molPos[i].z -= dz;
if (enforcePeriodic) {
for (int j = 0; j < (int) atoms.size(); j++) { for (int j = 0; j < (int) atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i]; int atom = atoms[j]+mol.offsets[i];
Real4 p = oldPosq[atom]; Real4 p = oldPosq[atom];
...@@ -1034,7 +1031,6 @@ void CudaContext::reorderAtomsImpl(bool enforcePeriodic) { ...@@ -1034,7 +1031,6 @@ void CudaContext::reorderAtomsImpl(bool enforcePeriodic) {
} }
} }
} }
}
// Select a bin for each molecule, then sort them by bin. // Select a bin for each molecule, then sort them by bin.
......
...@@ -298,6 +298,18 @@ public: ...@@ -298,6 +298,18 @@ public:
void setComputeForceCount(int count) { void setComputeForceCount(int count) {
computeForceCount = count; computeForceCount = count;
} }
/**
* Get the number of time steps since the atoms were reordered.
*/
int getStepsSinceReorder() const {
return stepsSinceReorder;
}
/**
* Set the number of time steps since the atoms were reordered.
*/
void setStepsSinceReorder(int steps) {
stepsSinceReorder = steps;
}
/** /**
* Get the number of atoms. * Get the number of atoms.
*/ */
...@@ -429,10 +441,8 @@ public: ...@@ -429,10 +441,8 @@ public:
/** /**
* Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close * Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
* together in the arrays. * together in the arrays.
*
* @param enforcePeriodic if true, the atom positions may be altered to enforce periodic boundary conditions
*/ */
void reorderAtoms(bool enforcePeriodic); void reorderAtoms();
/** /**
* Add a listener that should be called whenever atoms get reordered. The CudaContext * Add a listener that should be called whenever atoms get reordered. The CudaContext
* assumes ownership of the object, and deletes it when the context itself is deleted. * assumes ownership of the object, and deletes it when the context itself is deleted.
...@@ -447,15 +457,9 @@ public: ...@@ -447,15 +457,9 @@ public:
/** /**
* Mark that the current molecule definitions (and hence the atom order) may be invalid. * Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions * This should be called whenever force field parameters change. It will cause the definitions
* and order to be revalidated the next to reorderAtoms() is called. * and order to be revalidated.
*/ */
void invalidateMolecules(); void invalidateMolecules();
/**
* Get whether the current molecule definitions are valid.
*/
bool getMoleculesAreInvalid() {
return moleculesInvalid;
}
private: private:
struct Molecule; struct Molecule;
struct MoleculeGroup; struct MoleculeGroup;
...@@ -472,7 +476,7 @@ private: ...@@ -472,7 +476,7 @@ private:
* This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use. * This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use.
*/ */
template <class Real, class Real4, class Mixed, class Mixed4> template <class Real, class Real4, class Mixed, class Mixed4>
void reorderAtomsImpl(bool enforcePeriodic); void reorderAtomsImpl();
static bool hasInitializedCuda; static bool hasInitializedCuda;
const System& system; const System& system;
double time, computeCapability; double time, computeCapability;
...@@ -481,11 +485,12 @@ private: ...@@ -481,11 +485,12 @@ private:
int contextIndex; int contextIndex;
int stepCount; int stepCount;
int computeForceCount; int computeForceCount;
int stepsSinceReorder;
int numAtoms; int numAtoms;
int paddedNumAtoms; int paddedNumAtoms;
int numAtomBlocks; int numAtomBlocks;
int numThreadBlocks; int numThreadBlocks;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, moleculesInvalid; bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered;
std::string compiler, tempDir, gpuArchitecture; std::string compiler, tempDir, gpuArchitecture;
float4 periodicBoxSizeFloat, invPeriodicBoxSizeFloat; float4 periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
double4 periodicBoxSize, invPeriodicBoxSize; double4 periodicBoxSize, invPeriodicBoxSize;
......
...@@ -86,11 +86,6 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool ...@@ -86,11 +86,6 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool
cu.setAsCurrent(); cu.setAsCurrent();
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities(); CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
bool includeNonbonded = ((groups&(1<<nb.getForceGroup())) != 0); bool includeNonbonded = ((groups&(1<<nb.getForceGroup())) != 0);
cu.setAtomsWereReordered(false);
if (nb.getUseCutoff() && includeNonbonded && (cu.getMoleculesAreInvalid() || cu.getComputeForceCount()%100 == 0)) {
cu.reorderAtoms(!cu.getMoleculesAreInvalid());
nb.updateNeighborListSize();
}
cu.setComputeForceCount(cu.getComputeForceCount()+1); cu.setComputeForceCount(cu.getComputeForceCount()+1);
cu.clearAutoclearBuffers(); cu.clearAutoclearBuffers();
if (includeNonbonded) if (includeNonbonded)
...@@ -220,6 +215,7 @@ void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const vector< ...@@ -220,6 +215,7 @@ void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<
} }
for (int i = 0; i < (int) cu.getPosCellOffsets().size(); i++) for (int i = 0; i < (int) cu.getPosCellOffsets().size(); i++)
cu.getPosCellOffsets()[i] = make_int4(0, 0, 0, 0); cu.getPosCellOffsets()[i] = make_int4(0, 0, 0, 0);
cu.reorderAtoms();
} }
void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) { void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) {
...@@ -317,8 +313,8 @@ void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& ...@@ -317,8 +313,8 @@ void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream&
stream.write((char*) &time, sizeof(double)); stream.write((char*) &time, sizeof(double));
int stepCount = cu.getStepCount(); int stepCount = cu.getStepCount();
stream.write((char*) &stepCount, sizeof(int)); stream.write((char*) &stepCount, sizeof(int));
int computeForceCount = cu.getComputeForceCount(); int stepsSinceReorder = cu.getStepsSinceReorder();
stream.write((char*) &computeForceCount, sizeof(int)); stream.write((char*) &stepsSinceReorder, sizeof(int));
char* buffer = (char*) cu.getPinnedBuffer(); char* buffer = (char*) cu.getPinnedBuffer();
cu.getPosq().download(buffer); cu.getPosq().download(buffer);
stream.write(buffer, cu.getPosq().getSize()*cu.getPosq().getElementSize()); stream.write(buffer, cu.getPosq().getSize()*cu.getPosq().getElementSize());
...@@ -349,14 +345,14 @@ void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& st ...@@ -349,14 +345,14 @@ void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& st
throw OpenMMException("Checkpoint was created with a different numeric precision"); throw OpenMMException("Checkpoint was created with a different numeric precision");
double time; double time;
stream.read((char*) &time, sizeof(double)); stream.read((char*) &time, sizeof(double));
int stepCount, computeForceCount; int stepCount, stepsSinceReorder;
stream.read((char*) &stepCount, sizeof(int)); stream.read((char*) &stepCount, sizeof(int));
stream.read((char*) &computeForceCount, sizeof(int)); stream.read((char*) &stepsSinceReorder, sizeof(int));
vector<CudaContext*>& contexts = cu.getPlatformData().contexts; vector<CudaContext*>& contexts = cu.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++) { for (int i = 0; i < (int) contexts.size(); i++) {
contexts[i]->setTime(time); contexts[i]->setTime(time);
contexts[i]->setStepCount(stepCount); contexts[i]->setStepCount(stepCount);
contexts[i]->setComputeForceCount(computeForceCount); contexts[i]->setStepsSinceReorder(stepsSinceReorder);
} }
char* buffer = (char*) cu.getPinnedBuffer(); char* buffer = (char*) cu.getPinnedBuffer();
stream.read(buffer, cu.getPosq().getSize()*cu.getPosq().getElementSize()); stream.read(buffer, cu.getPosq().getSize()*cu.getPosq().getElementSize());
...@@ -4134,6 +4130,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn ...@@ -4134,6 +4130,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
cu.setTime(cu.getTime()+dt); cu.setTime(cu.getTime()+dt);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
} }
double CudaIntegrateVerletStepKernel::computeKineticEnergy(ContextImpl& context, const VerletIntegrator& integrator) { double CudaIntegrateVerletStepKernel::computeKineticEnergy(ContextImpl& context, const VerletIntegrator& integrator) {
...@@ -4221,6 +4218,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev ...@@ -4221,6 +4218,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
cu.setTime(cu.getTime()+stepSize); cu.setTime(cu.getTime()+stepSize);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
} }
double CudaIntegrateLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const LangevinIntegrator& integrator) { double CudaIntegrateLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const LangevinIntegrator& integrator) {
...@@ -4283,6 +4281,7 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni ...@@ -4283,6 +4281,7 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
cu.setTime(cu.getTime()+stepSize); cu.setTime(cu.getTime()+stepSize);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
} }
double CudaIntegrateBrownianStepKernel::computeKineticEnergy(ContextImpl& context, const BrownianIntegrator& integrator) { double CudaIntegrateBrownianStepKernel::computeKineticEnergy(ContextImpl& context, const BrownianIntegrator& integrator) {
...@@ -4363,6 +4362,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons ...@@ -4363,6 +4362,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
} }
cu.setTime(time); cu.setTime(time);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
return dt; return dt;
} }
...@@ -4457,6 +4457,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co ...@@ -4457,6 +4457,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
} }
cu.setTime(time); cu.setTime(time);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
return dt; return dt;
} }
...@@ -5129,6 +5130,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat ...@@ -5129,6 +5130,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
cu.setTime(cu.getTime()+integrator.getStepSize()); cu.setTime(cu.getTime()+integrator.getStepSize());
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
} }
double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) { double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) {
...@@ -5359,33 +5361,6 @@ void CudaApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, d ...@@ -5359,33 +5361,6 @@ void CudaApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, d
void CudaApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) { void CudaApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
cu.setAsCurrent(); cu.setAsCurrent();
if (cu.getAtomsWereReordered()) {
// The atoms were reordered since we saved the positions, so we need to fix them.
const vector<int> atomOrder = cu.getAtomIndex();
int numAtoms = cu.getNumAtoms();
if (cu.getUseDoublePrecision()) {
double4* pos = (double4*) cu.getPinnedBuffer();
savedPositions->download(pos);
vector<double4> fixedPos(cu.getPaddedNumAtoms());
for (int i = 0; i < numAtoms; i++)
fixedPos[lastAtomOrder[i]] = pos[i];
for (int i = 0; i < numAtoms; i++)
pos[i] = fixedPos[atomOrder[i]];
cu.getPosq().upload(pos);
}
else {
float4* pos = (float4*) cu.getPinnedBuffer();
savedPositions->download(pos);
vector<float4> fixedPos(cu.getPaddedNumAtoms());
for (int i = 0; i < numAtoms; i++)
fixedPos[lastAtomOrder[i]] = pos[i];
for (int i = 0; i < numAtoms; i++)
pos[i] = fixedPos[atomOrder[i]];
cu.getPosq().upload(pos);
}
}
else {
int bytesToCopy = cu.getPosq().getSize()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4)); int bytesToCopy = cu.getPosq().getSize()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
CUresult result = cuMemcpyDtoD(cu.getPosq().getDevicePointer(), savedPositions->getDevicePointer(), bytesToCopy); CUresult result = cuMemcpyDtoD(cu.getPosq().getDevicePointer(), savedPositions->getDevicePointer(), bytesToCopy);
if (result != CUDA_SUCCESS) { if (result != CUDA_SUCCESS) {
...@@ -5393,7 +5368,6 @@ void CudaApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) ...@@ -5393,7 +5368,6 @@ void CudaApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context)
m<<"Error restoring positions for MC barostat: "<<cu.getErrorString(result)<<" ("<<result<<")"; m<<"Error restoring positions for MC barostat: "<<cu.getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(m.str()); throw OpenMMException(m.str());
} }
}
} }
CudaRemoveCMMotionKernel::~CudaRemoveCMMotionKernel() { CudaRemoveCMMotionKernel::~CudaRemoveCMMotionKernel() {
......
...@@ -739,12 +739,6 @@ void testChangingParameters() { ...@@ -739,12 +739,6 @@ void testChangingParameters() {
nonbonded->getParticleParameters(i, charge, sigma, epsilon); nonbonded->getParticleParameters(i, charge, sigma, epsilon);
nonbonded->setParticleParameters(i, 1.5*charge, 1.1*sigma, 1.7*epsilon); nonbonded->setParticleParameters(i, 1.5*charge, 1.1*sigma, 1.7*epsilon);
} }
double total = 0;
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
nonbonded->getParticleParameters(i, charge, sigma, epsilon);
total += charge;
}
nonbonded->updateParametersInContext(cuContext); nonbonded->updateParametersInContext(cuContext);
nonbonded->updateParametersInContext(referenceContext); nonbonded->updateParametersInContext(referenceContext);
cuState = cuContext.getState(State::Forces | State::Energy); cuState = cuContext.getState(State::Forces | State::Energy);
......
...@@ -66,7 +66,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i ...@@ -66,7 +66,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
} }
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData) : OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), atomsWereReordered(false), posq(NULL), system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), posq(NULL),
posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL), posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL),
expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (precision == "single") { if (precision == "single") {
...@@ -407,7 +407,6 @@ void OpenCLContext::initialize() { ...@@ -407,7 +407,6 @@ void OpenCLContext::initialize() {
atomIndex[i] = i; atomIndex[i] = i;
atomIndexDevice->upload(atomIndex); atomIndexDevice->upload(atomIndex);
findMoleculeGroups(); findMoleculeGroups();
moleculesInvalid = false;
nonbonded->initialize(system); nonbonded->initialize(system);
} }
...@@ -826,11 +825,6 @@ void OpenCLContext::findMoleculeGroups() { ...@@ -826,11 +825,6 @@ void OpenCLContext::findMoleculeGroups() {
} }
void OpenCLContext::invalidateMolecules() { void OpenCLContext::invalidateMolecules() {
moleculesInvalid = true;
}
void OpenCLContext::validateMolecules() {
moleculesInvalid = false;
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff()) if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
return; return;
bool valid = true; bool valid = true;
...@@ -930,24 +924,28 @@ void OpenCLContext::validateMolecules() { ...@@ -930,24 +924,28 @@ void OpenCLContext::validateMolecules() {
findMoleculeGroups(); findMoleculeGroups();
for (int i = 0; i < (int) reorderListeners.size(); i++) for (int i = 0; i < (int) reorderListeners.size(); i++)
reorderListeners[i]->execute(); reorderListeners[i]->execute();
reorderAtoms();
} }
void OpenCLContext::reorderAtoms(bool enforcePeriodic) { void OpenCLContext::reorderAtoms() {
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff()) atomsWereReordered = false;
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff() || stepsSinceReorder < 100) {
stepsSinceReorder++;
return; return;
if (moleculesInvalid) }
validateMolecules();
atomsWereReordered = true; atomsWereReordered = true;
stepsSinceReorder = 0;
if (useDoublePrecision) if (useDoublePrecision)
reorderAtomsImpl<cl_double, mm_double4, cl_double, mm_double4>(enforcePeriodic); reorderAtomsImpl<cl_double, mm_double4, cl_double, mm_double4>();
else if (useMixedPrecision) else if (useMixedPrecision)
reorderAtomsImpl<cl_float, mm_float4, cl_double, mm_double4>(enforcePeriodic); reorderAtomsImpl<cl_float, mm_float4, cl_double, mm_double4>();
else else
reorderAtomsImpl<cl_float, mm_float4, cl_float, mm_float4>(enforcePeriodic); reorderAtomsImpl<cl_float, mm_float4, cl_float, mm_float4>();
nonbonded->updateNeighborListSize();
} }
template <class Real, class Real4, class Mixed, class Mixed4> template <class Real, class Real4, class Mixed, class Mixed4>
void OpenCLContext::reorderAtomsImpl(bool enforcePeriodic) { void OpenCLContext::reorderAtomsImpl() {
// Find the range of positions and the number of bins along each axis. // Find the range of positions and the number of bins along each axis.
...@@ -1023,7 +1021,6 @@ void OpenCLContext::reorderAtomsImpl(bool enforcePeriodic) { ...@@ -1023,7 +1021,6 @@ void OpenCLContext::reorderAtomsImpl(bool enforcePeriodic) {
molPos[i].x -= dx; molPos[i].x -= dx;
molPos[i].y -= dy; molPos[i].y -= dy;
molPos[i].z -= dz; molPos[i].z -= dz;
if (enforcePeriodic) {
for (int j = 0; j < (int) atoms.size(); j++) { for (int j = 0; j < (int) atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i]; int atom = atoms[j]+mol.offsets[i];
Real4 p = oldPosq[atom]; Real4 p = oldPosq[atom];
...@@ -1038,7 +1035,6 @@ void OpenCLContext::reorderAtomsImpl(bool enforcePeriodic) { ...@@ -1038,7 +1035,6 @@ void OpenCLContext::reorderAtomsImpl(bool enforcePeriodic) {
} }
} }
} }
}
// Select a bin for each molecule, then sort them by bin. // Select a bin for each molecule, then sort them by bin.
......
...@@ -381,6 +381,18 @@ public: ...@@ -381,6 +381,18 @@ public:
void setComputeForceCount(int count) { void setComputeForceCount(int count) {
computeForceCount = count; computeForceCount = count;
} }
/**
* Get the number of time steps since the atoms were reordered.
*/
int getStepsSinceReorder() const {
return stepsSinceReorder;
}
/**
* Set the number of time steps since the atoms were reordered.
*/
void setStepsSinceReorder(int steps) {
stepsSinceReorder = steps;
}
/** /**
* Get the number of atoms. * Get the number of atoms.
*/ */
...@@ -529,10 +541,8 @@ public: ...@@ -529,10 +541,8 @@ public:
/** /**
* Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close * Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
* together in the arrays. * together in the arrays.
*
* @param enforcePeriodic if true, the atom positions may be altered to enforce periodic boundary conditions
*/ */
void reorderAtoms(bool enforcePeriodic); void reorderAtoms();
/** /**
* Add a listener that should be called whenever atoms get reordered. The OpenCLContext * Add a listener that should be called whenever atoms get reordered. The OpenCLContext
* assumes ownership of the object, and deletes it when the context itself is deleted. * assumes ownership of the object, and deletes it when the context itself is deleted.
...@@ -547,15 +557,9 @@ public: ...@@ -547,15 +557,9 @@ public:
/** /**
* Mark that the current molecule definitions (and hence the atom order) may be invalid. * Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions * This should be called whenever force field parameters change. It will cause the definitions
* and order to be revalidated the next to reorderAtoms() is called. * and order to be revalidated.
*/ */
void invalidateMolecules(); void invalidateMolecules();
/**
* Get whether the current molecule definitions are valid.
*/
bool getMoleculesAreInvalid() {
return moleculesInvalid;
}
private: private:
struct Molecule; struct Molecule;
struct MoleculeGroup; struct MoleculeGroup;
...@@ -572,7 +576,7 @@ private: ...@@ -572,7 +576,7 @@ private:
* This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use. * This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use.
*/ */
template <class Real, class Real4, class Mixed, class Mixed4> template <class Real, class Real4, class Mixed, class Mixed4>
void reorderAtomsImpl(bool enforcePeriodic); void reorderAtomsImpl();
const System& system; const System& system;
double time; double time;
OpenCLPlatform::PlatformData& platformData; OpenCLPlatform::PlatformData& platformData;
...@@ -580,13 +584,14 @@ private: ...@@ -580,13 +584,14 @@ private:
int contextIndex; int contextIndex;
int stepCount; int stepCount;
int computeForceCount; int computeForceCount;
int stepsSinceReorder;
int numAtoms; int numAtoms;
int paddedNumAtoms; int paddedNumAtoms;
int numAtomBlocks; int numAtomBlocks;
int numThreadBlocks; int numThreadBlocks;
int numForceBuffers; int numForceBuffers;
int simdWidth; int simdWidth;
bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered, moleculesInvalid; bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered;
mm_float4 periodicBoxSize, invPeriodicBoxSize; mm_float4 periodicBoxSize, invPeriodicBoxSize;
mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble; mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble;
std::string defaultOptimizationOptions; std::string defaultOptimizationOptions;
......
...@@ -106,11 +106,6 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -106,11 +106,6 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) { void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities(); OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
bool includeNonbonded = ((groups&(1<<nb.getForceGroup())) != 0); bool includeNonbonded = ((groups&(1<<nb.getForceGroup())) != 0);
cl.setAtomsWereReordered(false);
if (nb.getUseCutoff() && includeNonbonded && (cl.getMoleculesAreInvalid() || cl.getComputeForceCount()%100 == 0)) {
cl.reorderAtoms(!cl.getMoleculesAreInvalid());
nb.updateNeighborListSize();
}
cl.setComputeForceCount(cl.getComputeForceCount()+1); cl.setComputeForceCount(cl.getComputeForceCount()+1);
cl.clearAutoclearBuffers(); cl.clearAutoclearBuffers();
if (includeNonbonded) if (includeNonbonded)
...@@ -239,6 +234,7 @@ void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const vecto ...@@ -239,6 +234,7 @@ void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const vecto
} }
for (int i = 0; i < (int) cl.getPosCellOffsets().size(); i++) for (int i = 0; i < (int) cl.getPosCellOffsets().size(); i++)
cl.getPosCellOffsets()[i] = mm_int4(0, 0, 0, 0); cl.getPosCellOffsets()[i] = mm_int4(0, 0, 0, 0);
cl.reorderAtoms();
} }
void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) { void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) {
...@@ -342,8 +338,8 @@ void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream ...@@ -342,8 +338,8 @@ void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream
stream.write((char*) &time, sizeof(double)); stream.write((char*) &time, sizeof(double));
int stepCount = cl.getStepCount(); int stepCount = cl.getStepCount();
stream.write((char*) &stepCount, sizeof(int)); stream.write((char*) &stepCount, sizeof(int));
int computeForceCount = cl.getComputeForceCount(); int stepsSinceReorder = cl.getStepsSinceReorder();
stream.write((char*) &computeForceCount, sizeof(int)); stream.write((char*) &stepsSinceReorder, sizeof(int));
char* buffer = (char*) cl.getPinnedBuffer(); char* buffer = (char*) cl.getPinnedBuffer();
cl.getPosq().download(buffer); cl.getPosq().download(buffer);
stream.write(buffer, cl.getPosq().getSize()*cl.getPosq().getElementSize()); stream.write(buffer, cl.getPosq().getSize()*cl.getPosq().getElementSize());
...@@ -373,14 +369,14 @@ void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& ...@@ -373,14 +369,14 @@ void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream&
throw OpenMMException("Checkpoint was created with a different numeric precision"); throw OpenMMException("Checkpoint was created with a different numeric precision");
double time; double time;
stream.read((char*) &time, sizeof(double)); stream.read((char*) &time, sizeof(double));
int stepCount, computeForceCount; int stepCount, stepsSinceReorder;
stream.read((char*) &stepCount, sizeof(int)); stream.read((char*) &stepCount, sizeof(int));
stream.read((char*) &computeForceCount, sizeof(int)); stream.read((char*) &stepsSinceReorder, sizeof(int));
vector<OpenCLContext*>& contexts = cl.getPlatformData().contexts; vector<OpenCLContext*>& contexts = cl.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++) { for (int i = 0; i < (int) contexts.size(); i++) {
contexts[i]->setTime(time); contexts[i]->setTime(time);
contexts[i]->setStepCount(stepCount); contexts[i]->setStepCount(stepCount);
contexts[i]->setComputeForceCount(computeForceCount); contexts[i]->setStepsSinceReorder(stepsSinceReorder);
} }
char* buffer = (char*) cl.getPinnedBuffer(); char* buffer = (char*) cl.getPinnedBuffer();
stream.read(buffer, cl.getPosq().getSize()*cl.getPosq().getElementSize()); stream.read(buffer, cl.getPosq().getSize()*cl.getPosq().getElementSize());
...@@ -4296,6 +4292,7 @@ void OpenCLIntegrateVerletStepKernel::execute(ContextImpl& context, const Verlet ...@@ -4296,6 +4292,7 @@ void OpenCLIntegrateVerletStepKernel::execute(ContextImpl& context, const Verlet
cl.setTime(cl.getTime()+dt); cl.setTime(cl.getTime()+dt);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag. // Reduce UI lag.
...@@ -4395,6 +4392,7 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang ...@@ -4395,6 +4392,7 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
cl.setTime(cl.getTime()+stepSize); cl.setTime(cl.getTime()+stepSize);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag. // Reduce UI lag.
...@@ -4473,6 +4471,7 @@ void OpenCLIntegrateBrownianStepKernel::execute(ContextImpl& context, const Brow ...@@ -4473,6 +4471,7 @@ void OpenCLIntegrateBrownianStepKernel::execute(ContextImpl& context, const Brow
cl.setTime(cl.getTime()+stepSize); cl.setTime(cl.getTime()+stepSize);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag. // Reduce UI lag.
...@@ -4578,6 +4577,7 @@ double OpenCLIntegrateVariableVerletStepKernel::execute(ContextImpl& context, co ...@@ -4578,6 +4577,7 @@ double OpenCLIntegrateVariableVerletStepKernel::execute(ContextImpl& context, co
} }
cl.setTime(time); cl.setTime(time);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
return dt; return dt;
} }
...@@ -4691,6 +4691,7 @@ double OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, ...@@ -4691,6 +4691,7 @@ double OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context,
} }
cl.setTime(time); cl.setTime(time);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
return dt; return dt;
} }
...@@ -5354,6 +5355,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -5354,6 +5355,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
cl.setTime(cl.getTime()+integrator.getStepSize()); cl.setTime(cl.getTime()+integrator.getStepSize());
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag. // Reduce UI lag.
...@@ -5580,33 +5582,6 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, ...@@ -5580,33 +5582,6 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
} }
void OpenCLApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) { void OpenCLApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
if (cl.getAtomsWereReordered()) {
// The atoms were reordered since we saved the positions, so we need to fix them.
const vector<int> atomOrder = cl.getAtomIndex();
int numAtoms = cl.getNumAtoms();
if (cl.getUseDoublePrecision()) {
mm_double4* pos = (mm_double4*) cl.getPinnedBuffer();
savedPositions->download(pos);
vector<mm_double4> fixedPos(cl.getPaddedNumAtoms());
for (int i = 0; i < numAtoms; i++)
fixedPos[lastAtomOrder[i]] = pos[i];
for (int i = 0; i < numAtoms; i++)
pos[i] = fixedPos[atomOrder[i]];
cl.getPosq().upload(pos);
}
else {
mm_float4* pos = (mm_float4*) cl.getPinnedBuffer();
savedPositions->download(pos);
vector<mm_float4> fixedPos(cl.getPaddedNumAtoms());
for (int i = 0; i < numAtoms; i++)
fixedPos[lastAtomOrder[i]] = pos[i];
for (int i = 0; i < numAtoms; i++)
pos[i] = fixedPos[atomOrder[i]];
cl.getPosq().upload(pos);
}
}
else
cl.getQueue().enqueueCopyBuffer(savedPositions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(), 0, 0, cl.getPosq().getSize()*sizeof(mm_float4)); cl.getQueue().enqueueCopyBuffer(savedPositions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(), 0, 0, cl.getPosq().getSize()*sizeof(mm_float4));
} }
......
...@@ -742,12 +742,6 @@ void testChangingParameters() { ...@@ -742,12 +742,6 @@ void testChangingParameters() {
nonbonded->getParticleParameters(i, charge, sigma, epsilon); nonbonded->getParticleParameters(i, charge, sigma, epsilon);
nonbonded->setParticleParameters(i, 1.5*charge, 1.1*sigma, 1.7*epsilon); nonbonded->setParticleParameters(i, 1.5*charge, 1.1*sigma, 1.7*epsilon);
} }
double total = 0;
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
nonbonded->getParticleParameters(i, charge, sigma, epsilon);
total += charge;
}
nonbonded->updateParametersInContext(clContext); nonbonded->updateParametersInContext(clContext);
nonbonded->updateParametersInContext(referenceContext); nonbonded->updateParametersInContext(referenceContext);
clState = clContext.getState(State::Forces | State::Energy); clState = clContext.getState(State::Forces | State::Energy);
......
...@@ -358,33 +358,13 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D ...@@ -358,33 +358,13 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
cu.setTime(cu.getTime()+stepSize); cu.setTime(cu.getTime()+stepSize);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
} }
double CudaIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) { double CudaIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize()); return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
} }
class CudaIntegrateDrudeSCFStepKernel::ReorderListener : public CudaContext::ReorderListener {
public:
ReorderListener(CudaContext& cu, const vector<int>& drudeParticles, vector<int>& reorderedDrudeParticles) :
cu(cu), drudeParticles(drudeParticles), reorderedDrudeParticles(reorderedDrudeParticles) {
}
void execute() {
const vector<int>& order = cu.getAtomIndex();
int numParticles = order.size();
vector<int> inverseOrder(numParticles);
for (int i = 0; i < numParticles; i++)
inverseOrder[order[i]] = i;
int numDrudeParticles = drudeParticles.size();
for (int i = 0; i < numDrudeParticles; i++)
reorderedDrudeParticles[i] = inverseOrder[drudeParticles[i]];
}
private:
CudaContext& cu;
const vector<int>& drudeParticles;
vector<int>& reorderedDrudeParticles;
};
CudaIntegrateDrudeSCFStepKernel::~CudaIntegrateDrudeSCFStepKernel() { CudaIntegrateDrudeSCFStepKernel::~CudaIntegrateDrudeSCFStepKernel() {
if (minimizerPos != NULL) if (minimizerPos != NULL)
lbfgs_free(minimizerPos); lbfgs_free(minimizerPos);
...@@ -401,9 +381,7 @@ void CudaIntegrateDrudeSCFStepKernel::initialize(const System& system, const Dru ...@@ -401,9 +381,7 @@ void CudaIntegrateDrudeSCFStepKernel::initialize(const System& system, const Dru
double charge, polarizability, aniso12, aniso34; double charge, polarizability, aniso12, aniso34;
force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34); force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
drudeParticles.push_back(p); drudeParticles.push_back(p);
reorderedDrudeParticles.push_back(p);
} }
cu.addReorderListener(new ReorderListener(cu, drudeParticles, reorderedDrudeParticles));
// Initialize the energy minimizer. // Initialize the energy minimizer.
...@@ -469,6 +447,7 @@ void CudaIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeS ...@@ -469,6 +447,7 @@ void CudaIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeS
cu.setTime(cu.getTime()+dt); cu.setTime(cu.getTime()+dt);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
} }
double CudaIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator) { double CudaIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
...@@ -478,39 +457,36 @@ double CudaIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& contex ...@@ -478,39 +457,36 @@ double CudaIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& contex
struct MinimizerData { struct MinimizerData {
ContextImpl& context; ContextImpl& context;
CudaContext& cu; CudaContext& cu;
vector<int>& reorderedDrudeParticles; vector<int>& drudeParticles;
MinimizerData(ContextImpl& context, CudaContext& cu, vector<int>& reorderedDrudeParticles) : context(context), cu(cu), reorderedDrudeParticles(reorderedDrudeParticles) {} MinimizerData(ContextImpl& context, CudaContext& cu, vector<int>& drudeParticles) : context(context), cu(cu), drudeParticles(drudeParticles) {}
}; };
static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) { static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
MinimizerData* data = reinterpret_cast<MinimizerData*>(instance); MinimizerData* data = reinterpret_cast<MinimizerData*>(instance);
ContextImpl& context = data->context; ContextImpl& context = data->context;
CudaContext& cu = data->cu; CudaContext& cu = data->cu;
vector<int>& reorderedDrudeParticles = data->reorderedDrudeParticles; vector<int>& drudeParticles = data->drudeParticles;
int numDrudeParticles = reorderedDrudeParticles.size(); int numDrudeParticles = drudeParticles.size();
// Set the particle positions. // Set the particle positions.
cu.getPosq().download(cu.getPinnedBuffer()); cu.getPosq().download(cu.getPinnedBuffer());
double4 periodicBoxSize = cu.getPeriodicBoxSize();
if (cu.getUseDoublePrecision()) { if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer(); double4* posq = (double4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
double4& p = posq[reorderedDrudeParticles[i]]; double4& p = posq[drudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]]; p.x = x[3*i];
p.x = x[3*i]+offset.x*periodicBoxSize.x; p.y = x[3*i+1];
p.y = x[3*i+1]+offset.y*periodicBoxSize.y; p.z = x[3*i+2];
p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
} }
} }
else { else {
float4* posq = (float4*) cu.getPinnedBuffer(); float4* posq = (float4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
float4& p = posq[reorderedDrudeParticles[i]]; float4& p = posq[drudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]]; p.x = x[3*i];
p.x = x[3*i]+offset.x*periodicBoxSize.x; p.y = x[3*i+1];
p.y = x[3*i+1]+offset.y*periodicBoxSize.y; p.z = x[3*i+2];
p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
} }
} }
cu.getPosq().upload(cu.getPinnedBuffer()); cu.getPosq().upload(cu.getPinnedBuffer());
...@@ -523,7 +499,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf ...@@ -523,7 +499,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
double forceScale = -1.0/0x100000000; double forceScale = -1.0/0x100000000;
int paddedNumAtoms = cu.getPaddedNumAtoms(); int paddedNumAtoms = cu.getPaddedNumAtoms();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
int index = reorderedDrudeParticles[i]; int index = drudeParticles[i];
g[3*i] = forceScale*force[index]; g[3*i] = forceScale*force[index];
g[3*i+1] = forceScale*force[index+paddedNumAtoms]; g[3*i+1] = forceScale*force[index+paddedNumAtoms];
g[3*i+2] = forceScale*force[index+paddedNumAtoms*2]; g[3*i+2] = forceScale*force[index+paddedNumAtoms*2];
...@@ -534,27 +510,24 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf ...@@ -534,27 +510,24 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
void CudaIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) { void CudaIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
// Record the initial positions. // Record the initial positions.
int numDrudeParticles = reorderedDrudeParticles.size(); int numDrudeParticles = drudeParticles.size();
cu.getPosq().download(cu.getPinnedBuffer()); cu.getPosq().download(cu.getPinnedBuffer());
double4 periodicBoxSize = cu.getPeriodicBoxSize();
if (cu.getUseDoublePrecision()) { if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer(); double4* posq = (double4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
double4 p = posq[reorderedDrudeParticles[i]]; double4 p = posq[drudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]]; minimizerPos[3*i] = p.x;
minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x; minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y; minimizerPos[3*i+2] = p.z;
minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
} }
} }
else { else {
float4* posq = (float4*) cu.getPinnedBuffer(); float4* posq = (float4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
float4 p = posq[reorderedDrudeParticles[i]]; float4 p = posq[drudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]]; minimizerPos[3*i] = p.x;
minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x; minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y; minimizerPos[3*i+2] = p.z;
minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
} }
minimizerParams.xtol = 1e-7; minimizerParams.xtol = 1e-7;
} }
...@@ -571,6 +544,6 @@ void CudaIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tole ...@@ -571,6 +544,6 @@ void CudaIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tole
// Perform the minimization. // Perform the minimization.
lbfgsfloatval_t fx; lbfgsfloatval_t fx;
MinimizerData data(context, cu, reorderedDrudeParticles); MinimizerData data(context, cu, drudeParticles);
lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams); lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams);
} }
\ No newline at end of file
...@@ -148,12 +148,10 @@ public: ...@@ -148,12 +148,10 @@ public:
*/ */
double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator); double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator);
private: private:
class ReorderListener;
void minimize(ContextImpl& context, double tolerance); void minimize(ContextImpl& context, double tolerance);
CudaContext& cu; CudaContext& cu;
double prevStepSize; double prevStepSize;
std::vector<int> drudeParticles; std::vector<int> drudeParticles;
std::vector<int> reorderedDrudeParticles;
lbfgsfloatval_t *minimizerPos; lbfgsfloatval_t *minimizerPos;
lbfgs_parameter_t minimizerParams; lbfgs_parameter_t minimizerParams;
CUfunction kernel1, kernel2; CUfunction kernel1, kernel2;
......
...@@ -364,33 +364,13 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const ...@@ -364,33 +364,13 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
cl.setTime(cl.getTime()+stepSize); cl.setTime(cl.getTime()+stepSize);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
} }
double OpenCLIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) { double OpenCLIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize()); return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
} }
class OpenCLIntegrateDrudeSCFStepKernel::ReorderListener : public OpenCLContext::ReorderListener {
public:
ReorderListener(OpenCLContext& cl, const vector<int>& drudeParticles, vector<int>& reorderedDrudeParticles) :
cl(cl), drudeParticles(drudeParticles), reorderedDrudeParticles(reorderedDrudeParticles) {
}
void execute() {
const vector<int>& order = cl.getAtomIndex();
int numParticles = order.size();
vector<int> inverseOrder(numParticles);
for (int i = 0; i < numParticles; i++)
inverseOrder[order[i]] = i;
int numDrudeParticles = drudeParticles.size();
for (int i = 0; i < numDrudeParticles; i++)
reorderedDrudeParticles[i] = inverseOrder[drudeParticles[i]];
}
private:
OpenCLContext& cl;
const vector<int>& drudeParticles;
vector<int>& reorderedDrudeParticles;
};
OpenCLIntegrateDrudeSCFStepKernel::~OpenCLIntegrateDrudeSCFStepKernel() { OpenCLIntegrateDrudeSCFStepKernel::~OpenCLIntegrateDrudeSCFStepKernel() {
if (minimizerPos != NULL) if (minimizerPos != NULL)
lbfgs_free(minimizerPos); lbfgs_free(minimizerPos);
...@@ -406,9 +386,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::initialize(const System& system, const D ...@@ -406,9 +386,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::initialize(const System& system, const D
double charge, polarizability, aniso12, aniso34; double charge, polarizability, aniso12, aniso34;
force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34); force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
drudeParticles.push_back(p); drudeParticles.push_back(p);
reorderedDrudeParticles.push_back(p);
} }
cl.addReorderListener(new ReorderListener(cl, drudeParticles, reorderedDrudeParticles));
// Initialize the energy minimizer. // Initialize the energy minimizer.
...@@ -481,6 +459,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const Drud ...@@ -481,6 +459,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const Drud
cl.setTime(cl.getTime()+dt); cl.setTime(cl.getTime()+dt);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag. // Reduce UI lag.
...@@ -496,39 +475,36 @@ double OpenCLIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& cont ...@@ -496,39 +475,36 @@ double OpenCLIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& cont
struct MinimizerData { struct MinimizerData {
ContextImpl& context; ContextImpl& context;
OpenCLContext& cl; OpenCLContext& cl;
vector<int>& reorderedDrudeParticles; vector<int>& drudeParticles;
MinimizerData(ContextImpl& context, OpenCLContext& cl, vector<int>& reorderedDrudeParticles) : context(context), cl(cl), reorderedDrudeParticles(reorderedDrudeParticles) {} MinimizerData(ContextImpl& context, OpenCLContext& cl, vector<int>& drudeParticles) : context(context), cl(cl), drudeParticles(drudeParticles) {}
}; };
static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) { static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
MinimizerData* data = reinterpret_cast<MinimizerData*>(instance); MinimizerData* data = reinterpret_cast<MinimizerData*>(instance);
ContextImpl& context = data->context; ContextImpl& context = data->context;
OpenCLContext& cl = data->cl; OpenCLContext& cl = data->cl;
vector<int>& reorderedDrudeParticles = data->reorderedDrudeParticles; vector<int>& drudeParticles = data->drudeParticles;
int numDrudeParticles = reorderedDrudeParticles.size(); int numDrudeParticles = drudeParticles.size();
// Set the particle positions. // Set the particle positions.
cl.getPosq().download(cl.getPinnedBuffer()); cl.getPosq().download(cl.getPinnedBuffer());
mm_double4 periodicBoxSize = cl.getPeriodicBoxSizeDouble();
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer(); mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
mm_double4& p = posq[reorderedDrudeParticles[i]]; mm_double4& p = posq[drudeParticles[i]];
mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]]; p.x = x[3*i];
p.x = x[3*i]+offset.x*periodicBoxSize.x; p.y = x[3*i+1];
p.y = x[3*i+1]+offset.y*periodicBoxSize.y; p.z = x[3*i+2];
p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
} }
} }
else { else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer(); mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
mm_float4& p = posq[reorderedDrudeParticles[i]]; mm_float4& p = posq[drudeParticles[i]];
mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]]; p.x = x[3*i];
p.x = x[3*i]+offset.x*periodicBoxSize.x; p.y = x[3*i+1];
p.y = x[3*i+1]+offset.y*periodicBoxSize.y; p.z = x[3*i+2];
p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
} }
} }
cl.getPosq().upload(cl.getPinnedBuffer()); cl.getPosq().upload(cl.getPinnedBuffer());
...@@ -540,7 +516,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf ...@@ -540,7 +516,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
mm_double4* force = (mm_double4*) cl.getPinnedBuffer(); mm_double4* force = (mm_double4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
int index = reorderedDrudeParticles[i]; int index = drudeParticles[i];
g[3*i] = -force[index].x; g[3*i] = -force[index].x;
g[3*i+1] = -force[index].y; g[3*i+1] = -force[index].y;
g[3*i+2] = -force[index].z; g[3*i+2] = -force[index].z;
...@@ -549,7 +525,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf ...@@ -549,7 +525,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
else { else {
mm_float4* force = (mm_float4*) cl.getPinnedBuffer(); mm_float4* force = (mm_float4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
int index = reorderedDrudeParticles[i]; int index = drudeParticles[i];
g[3*i] = -force[index].x; g[3*i] = -force[index].x;
g[3*i+1] = -force[index].y; g[3*i+1] = -force[index].y;
g[3*i+2] = -force[index].z; g[3*i+2] = -force[index].z;
...@@ -561,27 +537,24 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf ...@@ -561,27 +537,24 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) { void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
// Record the initial positions. // Record the initial positions.
int numDrudeParticles = reorderedDrudeParticles.size(); int numDrudeParticles = drudeParticles.size();
cl.getPosq().download(cl.getPinnedBuffer()); cl.getPosq().download(cl.getPinnedBuffer());
mm_double4 periodicBoxSize = cl.getPeriodicBoxSizeDouble();
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer(); mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
mm_double4 p = posq[reorderedDrudeParticles[i]]; mm_double4 p = posq[drudeParticles[i]];
mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]]; minimizerPos[3*i] = p.x;
minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x; minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y; minimizerPos[3*i+2] = p.z;
minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
} }
} }
else { else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer(); mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) { for (int i = 0; i < numDrudeParticles; ++i) {
mm_float4 p = posq[reorderedDrudeParticles[i]]; mm_float4 p = posq[drudeParticles[i]];
mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]]; minimizerPos[3*i] = p.x;
minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x; minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y; minimizerPos[3*i+2] = p.z;
minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
} }
minimizerParams.xtol = 1e-7; minimizerParams.xtol = 1e-7;
} }
...@@ -598,6 +571,6 @@ void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double to ...@@ -598,6 +571,6 @@ void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double to
// Perform the minimization. // Perform the minimization.
lbfgsfloatval_t fx; lbfgsfloatval_t fx;
MinimizerData data(context, cl, reorderedDrudeParticles); MinimizerData data(context, cl, drudeParticles);
lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams); lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams);
} }
\ No newline at end of file
...@@ -149,13 +149,11 @@ public: ...@@ -149,13 +149,11 @@ public:
*/ */
double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator); double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator);
private: private:
class ReorderListener;
void minimize(ContextImpl& context, double tolerance); void minimize(ContextImpl& context, double tolerance);
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernels; bool hasInitializedKernels;
double prevStepSize; double prevStepSize;
std::vector<int> drudeParticles; std::vector<int> drudeParticles;
std::vector<int> reorderedDrudeParticles;
lbfgsfloatval_t *minimizerPos; lbfgsfloatval_t *minimizerPos;
lbfgs_parameter_t minimizerParams; lbfgs_parameter_t minimizerParams;
cl::Kernel kernel1, kernel2; cl::Kernel kernel1, kernel2;
......
...@@ -132,6 +132,7 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt ...@@ -132,6 +132,7 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
} }
void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) { void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities(); CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
// Loop over copies and compute the force on each one. // Loop over copies and compute the force on each one.
...@@ -178,6 +179,15 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr ...@@ -178,6 +179,15 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
cu.setTime(cu.getTime()+dt); cu.setTime(cu.getTime()+dt);
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
if (cu.getAtomsWereReordered() && cu.getNonbondedUtilities().getUsePeriodic()) {
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
int i = numCopies-1;
void* args[] = {&positions->getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(translateKernel, args, cu.getNumAtoms());
}
} }
void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
...@@ -188,13 +198,6 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { ...@@ -188,13 +198,6 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
context.computeVirtualSites(); context.computeVirtualSites();
context.updateContextState(); context.updateContextState();
context.calcForcesAndEnergy(true, false); context.calcForcesAndEnergy(true, false);
if (cu.getAtomsWereReordered() && cu.getNonbondedUtilities().getUsePeriodic()) {
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
void* args[] = {&positions->getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(translateKernel, args, cu.getNumAtoms());
}
void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &forces->getDevicePointer(), &cu.getVelm().getDevicePointer(), void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &forces->getDevicePointer(), &cu.getVelm().getDevicePointer(),
&velocities->getDevicePointer(), &cu.getPosq().getDevicePointer(), &positions->getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i}; &velocities->getDevicePointer(), &cu.getPosq().getDevicePointer(), &positions->getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms()); cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms());
......
...@@ -190,6 +190,14 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte ...@@ -190,6 +190,14 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
cl.setTime(cl.getTime()+dt); cl.setTime(cl.getTime()+dt);
cl.setStepCount(cl.getStepCount()+1); cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
if (cl.getAtomsWereReordered() && cl.getNonbondedUtilities().getUsePeriodic()) {
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
translateKernel.setArg<cl_int>(3, numCopies-1);
cl.executeKernel(translateKernel, cl.getNumAtoms());
}
} }
void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
...@@ -199,13 +207,6 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { ...@@ -199,13 +207,6 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
context.computeVirtualSites(); context.computeVirtualSites();
context.updateContextState(); context.updateContextState();
context.calcForcesAndEnergy(true, false); context.calcForcesAndEnergy(true, false);
if (cl.getAtomsWereReordered() && cl.getNonbondedUtilities().getUsePeriodic()) {
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
translateKernel.setArg<cl_int>(3, i);
cl.executeKernel(translateKernel, cl.getNumAtoms());
}
copyFromContextKernel.setArg<cl_int>(7, i); copyFromContextKernel.setArg<cl_int>(7, i);
cl.executeKernel(copyFromContextKernel, cl.getNumAtoms()); cl.executeKernel(copyFromContextKernel, cl.getNumAtoms());
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment