Commit c589f1cc authored by Yutong Zhao's avatar Yutong Zhao
Browse files

Merge pull request #11 from peastman/master

Improved infrastructure for integrators
parents 6943ef5b 18f78c1e
......@@ -67,7 +67,7 @@ bool CudaContext::hasInitializedCuda = false;
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, CudaPlatform::PlatformData& platformData) : system(system), compiler(compiler),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), contextIsValid(false), atomsWereReordered(false), pinnedBuffer(NULL), posq(NULL),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), pinnedBuffer(NULL), posq(NULL),
posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (!hasInitializedCuda) {
CHECK_RESULT2(cuInit(0), "Error initializing CUDA");
......@@ -282,7 +282,6 @@ void CudaContext::initialize() {
atomIndex[i] = i;
atomIndexDevice->upload(atomIndex);
findMoleculeGroups();
moleculesInvalid = false;
nonbonded->initialize(system);
}
......@@ -821,11 +820,6 @@ void CudaContext::findMoleculeGroups() {
}
void CudaContext::invalidateMolecules() {
moleculesInvalid = true;
}
void CudaContext::validateMolecules() {
moleculesInvalid = false;
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
return;
bool valid = true;
......@@ -925,24 +919,28 @@ void CudaContext::validateMolecules() {
findMoleculeGroups();
for (int i = 0; i < (int) reorderListeners.size(); i++)
reorderListeners[i]->execute();
reorderAtoms();
}
void CudaContext::reorderAtoms(bool enforcePeriodic) {
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
void CudaContext::reorderAtoms() {
atomsWereReordered = false;
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff() || stepsSinceReorder < 100) {
stepsSinceReorder++;
return;
if (moleculesInvalid)
validateMolecules();
}
atomsWereReordered = true;
stepsSinceReorder = 0;
if (useDoublePrecision)
reorderAtomsImpl<double, double4, double, double4>(enforcePeriodic);
reorderAtomsImpl<double, double4, double, double4>();
else if (useMixedPrecision)
reorderAtomsImpl<float, float4, double, double4>(enforcePeriodic);
reorderAtomsImpl<float, float4, double, double4>();
else
reorderAtomsImpl<float, float4, float, float4>(enforcePeriodic);
reorderAtomsImpl<float, float4, float, float4>();
nonbonded->updateNeighborListSize();
}
template <class Real, class Real4, class Mixed, class Mixed4>
void CudaContext::reorderAtomsImpl(bool enforcePeriodic) {
void CudaContext::reorderAtomsImpl() {
// Find the range of positions and the number of bins along each axis.
Real4 padding = {0, 0, 0, 0};
......@@ -1019,18 +1017,16 @@ void CudaContext::reorderAtomsImpl(bool enforcePeriodic) {
molPos[i].x -= dx;
molPos[i].y -= dy;
molPos[i].z -= dz;
if (enforcePeriodic) {
for (int j = 0; j < (int) atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i];
Real4 p = oldPosq[atom];
p.x -= dx;
p.y -= dy;
p.z -= dz;
oldPosq[atom] = p;
posCellOffsets[atom].x -= xcell;
posCellOffsets[atom].y -= ycell;
posCellOffsets[atom].z -= zcell;
}
for (int j = 0; j < (int) atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i];
Real4 p = oldPosq[atom];
p.x -= dx;
p.y -= dy;
p.z -= dz;
oldPosq[atom] = p;
posCellOffsets[atom].x -= xcell;
posCellOffsets[atom].y -= ycell;
posCellOffsets[atom].z -= zcell;
}
}
}
......
......@@ -298,6 +298,18 @@ public:
void setComputeForceCount(int count) {
computeForceCount = count;
}
/**
* Get the number of time steps since the atoms were reordered.
*/
int getStepsSinceReorder() const {
return stepsSinceReorder;
}
/**
* Set the number of time steps since the atoms were reordered.
*/
void setStepsSinceReorder(int steps) {
stepsSinceReorder = steps;
}
/**
* Get the number of atoms.
*/
......@@ -429,10 +441,8 @@ public:
/**
* Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
* together in the arrays.
*
* @param enforcePeriodic if true, the atom positions may be altered to enforce periodic boundary conditions
*/
void reorderAtoms(bool enforcePeriodic);
void reorderAtoms();
/**
* Add a listener that should be called whenever atoms get reordered. The CudaContext
* assumes ownership of the object, and deletes it when the context itself is deleted.
......@@ -447,15 +457,9 @@ public:
/**
* Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions
* and order to be revalidated the next to reorderAtoms() is called.
* and order to be revalidated.
*/
void invalidateMolecules();
/**
* Get whether the current molecule definitions are valid.
*/
bool getMoleculesAreInvalid() {
return moleculesInvalid;
}
private:
struct Molecule;
struct MoleculeGroup;
......@@ -472,7 +476,7 @@ private:
* This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use.
*/
template <class Real, class Real4, class Mixed, class Mixed4>
void reorderAtomsImpl(bool enforcePeriodic);
void reorderAtomsImpl();
static bool hasInitializedCuda;
const System& system;
double time, computeCapability;
......@@ -481,11 +485,12 @@ private:
int contextIndex;
int stepCount;
int computeForceCount;
int stepsSinceReorder;
int numAtoms;
int paddedNumAtoms;
int numAtomBlocks;
int numThreadBlocks;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, moleculesInvalid;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered;
std::string compiler, tempDir, gpuArchitecture;
float4 periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
double4 periodicBoxSize, invPeriodicBoxSize;
......
......@@ -86,11 +86,6 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool
cu.setAsCurrent();
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
bool includeNonbonded = ((groups&(1<<nb.getForceGroup())) != 0);
cu.setAtomsWereReordered(false);
if (nb.getUseCutoff() && includeNonbonded && (cu.getMoleculesAreInvalid() || cu.getComputeForceCount()%100 == 0)) {
cu.reorderAtoms(!cu.getMoleculesAreInvalid());
nb.updateNeighborListSize();
}
cu.setComputeForceCount(cu.getComputeForceCount()+1);
cu.clearAutoclearBuffers();
if (includeNonbonded)
......@@ -220,6 +215,7 @@ void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<
}
for (int i = 0; i < (int) cu.getPosCellOffsets().size(); i++)
cu.getPosCellOffsets()[i] = make_int4(0, 0, 0, 0);
cu.reorderAtoms();
}
void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) {
......@@ -317,8 +313,8 @@ void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream&
stream.write((char*) &time, sizeof(double));
int stepCount = cu.getStepCount();
stream.write((char*) &stepCount, sizeof(int));
int computeForceCount = cu.getComputeForceCount();
stream.write((char*) &computeForceCount, sizeof(int));
int stepsSinceReorder = cu.getStepsSinceReorder();
stream.write((char*) &stepsSinceReorder, sizeof(int));
char* buffer = (char*) cu.getPinnedBuffer();
cu.getPosq().download(buffer);
stream.write(buffer, cu.getPosq().getSize()*cu.getPosq().getElementSize());
......@@ -349,14 +345,14 @@ void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& st
throw OpenMMException("Checkpoint was created with a different numeric precision");
double time;
stream.read((char*) &time, sizeof(double));
int stepCount, computeForceCount;
int stepCount, stepsSinceReorder;
stream.read((char*) &stepCount, sizeof(int));
stream.read((char*) &computeForceCount, sizeof(int));
stream.read((char*) &stepsSinceReorder, sizeof(int));
vector<CudaContext*>& contexts = cu.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++) {
contexts[i]->setTime(time);
contexts[i]->setStepCount(stepCount);
contexts[i]->setComputeForceCount(computeForceCount);
contexts[i]->setStepsSinceReorder(stepsSinceReorder);
}
char* buffer = (char*) cu.getPinnedBuffer();
stream.read(buffer, cu.getPosq().getSize()*cu.getPosq().getElementSize());
......@@ -4134,6 +4130,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
cu.setTime(cu.getTime()+dt);
cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
}
double CudaIntegrateVerletStepKernel::computeKineticEnergy(ContextImpl& context, const VerletIntegrator& integrator) {
......@@ -4221,6 +4218,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
cu.setTime(cu.getTime()+stepSize);
cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
}
double CudaIntegrateLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const LangevinIntegrator& integrator) {
......@@ -4283,6 +4281,7 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
cu.setTime(cu.getTime()+stepSize);
cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
}
double CudaIntegrateBrownianStepKernel::computeKineticEnergy(ContextImpl& context, const BrownianIntegrator& integrator) {
......@@ -4363,6 +4362,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
}
cu.setTime(time);
cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
return dt;
}
......@@ -4457,6 +4457,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
}
cu.setTime(time);
cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
return dt;
}
......@@ -5129,6 +5130,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
cu.setTime(cu.getTime()+integrator.getStepSize());
cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
}
double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) {
......@@ -5359,40 +5361,12 @@ void CudaApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, d
void CudaApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
cu.setAsCurrent();
if (cu.getAtomsWereReordered()) {
// The atoms were reordered since we saved the positions, so we need to fix them.
const vector<int> atomOrder = cu.getAtomIndex();
int numAtoms = cu.getNumAtoms();
if (cu.getUseDoublePrecision()) {
double4* pos = (double4*) cu.getPinnedBuffer();
savedPositions->download(pos);
vector<double4> fixedPos(cu.getPaddedNumAtoms());
for (int i = 0; i < numAtoms; i++)
fixedPos[lastAtomOrder[i]] = pos[i];
for (int i = 0; i < numAtoms; i++)
pos[i] = fixedPos[atomOrder[i]];
cu.getPosq().upload(pos);
}
else {
float4* pos = (float4*) cu.getPinnedBuffer();
savedPositions->download(pos);
vector<float4> fixedPos(cu.getPaddedNumAtoms());
for (int i = 0; i < numAtoms; i++)
fixedPos[lastAtomOrder[i]] = pos[i];
for (int i = 0; i < numAtoms; i++)
pos[i] = fixedPos[atomOrder[i]];
cu.getPosq().upload(pos);
}
}
else {
int bytesToCopy = cu.getPosq().getSize()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
CUresult result = cuMemcpyDtoD(cu.getPosq().getDevicePointer(), savedPositions->getDevicePointer(), bytesToCopy);
if (result != CUDA_SUCCESS) {
std::stringstream m;
m<<"Error restoring positions for MC barostat: "<<cu.getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(m.str());
}
int bytesToCopy = cu.getPosq().getSize()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
CUresult result = cuMemcpyDtoD(cu.getPosq().getDevicePointer(), savedPositions->getDevicePointer(), bytesToCopy);
if (result != CUDA_SUCCESS) {
std::stringstream m;
m<<"Error restoring positions for MC barostat: "<<cu.getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(m.str());
}
}
......
......@@ -739,12 +739,6 @@ void testChangingParameters() {
nonbonded->getParticleParameters(i, charge, sigma, epsilon);
nonbonded->setParticleParameters(i, 1.5*charge, 1.1*sigma, 1.7*epsilon);
}
double total = 0;
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
nonbonded->getParticleParameters(i, charge, sigma, epsilon);
total += charge;
}
nonbonded->updateParametersInContext(cuContext);
nonbonded->updateParametersInContext(referenceContext);
cuState = cuContext.getState(State::Forces | State::Energy);
......
......@@ -66,7 +66,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
}
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), atomsWereReordered(false), posq(NULL),
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), posq(NULL),
posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL),
expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (precision == "single") {
......@@ -407,7 +407,6 @@ void OpenCLContext::initialize() {
atomIndex[i] = i;
atomIndexDevice->upload(atomIndex);
findMoleculeGroups();
moleculesInvalid = false;
nonbonded->initialize(system);
}
......@@ -826,11 +825,6 @@ void OpenCLContext::findMoleculeGroups() {
}
void OpenCLContext::invalidateMolecules() {
moleculesInvalid = true;
}
void OpenCLContext::validateMolecules() {
moleculesInvalid = false;
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
return;
bool valid = true;
......@@ -930,24 +924,28 @@ void OpenCLContext::validateMolecules() {
findMoleculeGroups();
for (int i = 0; i < (int) reorderListeners.size(); i++)
reorderListeners[i]->execute();
reorderAtoms();
}
void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
void OpenCLContext::reorderAtoms() {
atomsWereReordered = false;
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff() || stepsSinceReorder < 100) {
stepsSinceReorder++;
return;
if (moleculesInvalid)
validateMolecules();
}
atomsWereReordered = true;
stepsSinceReorder = 0;
if (useDoublePrecision)
reorderAtomsImpl<cl_double, mm_double4, cl_double, mm_double4>(enforcePeriodic);
reorderAtomsImpl<cl_double, mm_double4, cl_double, mm_double4>();
else if (useMixedPrecision)
reorderAtomsImpl<cl_float, mm_float4, cl_double, mm_double4>(enforcePeriodic);
reorderAtomsImpl<cl_float, mm_float4, cl_double, mm_double4>();
else
reorderAtomsImpl<cl_float, mm_float4, cl_float, mm_float4>(enforcePeriodic);
reorderAtomsImpl<cl_float, mm_float4, cl_float, mm_float4>();
nonbonded->updateNeighborListSize();
}
template <class Real, class Real4, class Mixed, class Mixed4>
void OpenCLContext::reorderAtomsImpl(bool enforcePeriodic) {
void OpenCLContext::reorderAtomsImpl() {
// Find the range of positions and the number of bins along each axis.
......@@ -1023,18 +1021,16 @@ void OpenCLContext::reorderAtomsImpl(bool enforcePeriodic) {
molPos[i].x -= dx;
molPos[i].y -= dy;
molPos[i].z -= dz;
if (enforcePeriodic) {
for (int j = 0; j < (int) atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i];
Real4 p = oldPosq[atom];
p.x -= dx;
p.y -= dy;
p.z -= dz;
oldPosq[atom] = p;
posCellOffsets[atom].x -= xcell;
posCellOffsets[atom].y -= ycell;
posCellOffsets[atom].z -= zcell;
}
for (int j = 0; j < (int) atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i];
Real4 p = oldPosq[atom];
p.x -= dx;
p.y -= dy;
p.z -= dz;
oldPosq[atom] = p;
posCellOffsets[atom].x -= xcell;
posCellOffsets[atom].y -= ycell;
posCellOffsets[atom].z -= zcell;
}
}
}
......
......@@ -381,6 +381,18 @@ public:
void setComputeForceCount(int count) {
computeForceCount = count;
}
/**
* Get the number of time steps since the atoms were reordered.
*/
int getStepsSinceReorder() const {
return stepsSinceReorder;
}
/**
* Set the number of time steps since the atoms were reordered.
*/
void setStepsSinceReorder(int steps) {
stepsSinceReorder = steps;
}
/**
* Get the number of atoms.
*/
......@@ -529,10 +541,8 @@ public:
/**
* Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
* together in the arrays.
*
* @param enforcePeriodic if true, the atom positions may be altered to enforce periodic boundary conditions
*/
void reorderAtoms(bool enforcePeriodic);
void reorderAtoms();
/**
* Add a listener that should be called whenever atoms get reordered. The OpenCLContext
* assumes ownership of the object, and deletes it when the context itself is deleted.
......@@ -547,15 +557,9 @@ public:
/**
* Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions
* and order to be revalidated the next to reorderAtoms() is called.
* and order to be revalidated.
*/
void invalidateMolecules();
/**
* Get whether the current molecule definitions are valid.
*/
bool getMoleculesAreInvalid() {
return moleculesInvalid;
}
private:
struct Molecule;
struct MoleculeGroup;
......@@ -572,7 +576,7 @@ private:
* This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use.
*/
template <class Real, class Real4, class Mixed, class Mixed4>
void reorderAtomsImpl(bool enforcePeriodic);
void reorderAtomsImpl();
const System& system;
double time;
OpenCLPlatform::PlatformData& platformData;
......@@ -580,13 +584,14 @@ private:
int contextIndex;
int stepCount;
int computeForceCount;
int stepsSinceReorder;
int numAtoms;
int paddedNumAtoms;
int numAtomBlocks;
int numThreadBlocks;
int numForceBuffers;
int simdWidth;
bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered, moleculesInvalid;
bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered;
mm_float4 periodicBoxSize, invPeriodicBoxSize;
mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble;
std::string defaultOptimizationOptions;
......
......@@ -106,11 +106,6 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
bool includeNonbonded = ((groups&(1<<nb.getForceGroup())) != 0);
cl.setAtomsWereReordered(false);
if (nb.getUseCutoff() && includeNonbonded && (cl.getMoleculesAreInvalid() || cl.getComputeForceCount()%100 == 0)) {
cl.reorderAtoms(!cl.getMoleculesAreInvalid());
nb.updateNeighborListSize();
}
cl.setComputeForceCount(cl.getComputeForceCount()+1);
cl.clearAutoclearBuffers();
if (includeNonbonded)
......@@ -239,6 +234,7 @@ void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const vecto
}
for (int i = 0; i < (int) cl.getPosCellOffsets().size(); i++)
cl.getPosCellOffsets()[i] = mm_int4(0, 0, 0, 0);
cl.reorderAtoms();
}
void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) {
......@@ -342,8 +338,8 @@ void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream
stream.write((char*) &time, sizeof(double));
int stepCount = cl.getStepCount();
stream.write((char*) &stepCount, sizeof(int));
int computeForceCount = cl.getComputeForceCount();
stream.write((char*) &computeForceCount, sizeof(int));
int stepsSinceReorder = cl.getStepsSinceReorder();
stream.write((char*) &stepsSinceReorder, sizeof(int));
char* buffer = (char*) cl.getPinnedBuffer();
cl.getPosq().download(buffer);
stream.write(buffer, cl.getPosq().getSize()*cl.getPosq().getElementSize());
......@@ -373,14 +369,14 @@ void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream&
throw OpenMMException("Checkpoint was created with a different numeric precision");
double time;
stream.read((char*) &time, sizeof(double));
int stepCount, computeForceCount;
int stepCount, stepsSinceReorder;
stream.read((char*) &stepCount, sizeof(int));
stream.read((char*) &computeForceCount, sizeof(int));
stream.read((char*) &stepsSinceReorder, sizeof(int));
vector<OpenCLContext*>& contexts = cl.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++) {
contexts[i]->setTime(time);
contexts[i]->setStepCount(stepCount);
contexts[i]->setComputeForceCount(computeForceCount);
contexts[i]->setStepsSinceReorder(stepsSinceReorder);
}
char* buffer = (char*) cl.getPinnedBuffer();
stream.read(buffer, cl.getPosq().getSize()*cl.getPosq().getElementSize());
......@@ -4296,6 +4292,7 @@ void OpenCLIntegrateVerletStepKernel::execute(ContextImpl& context, const Verlet
cl.setTime(cl.getTime()+dt);
cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag.
......@@ -4395,6 +4392,7 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
cl.setTime(cl.getTime()+stepSize);
cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag.
......@@ -4473,6 +4471,7 @@ void OpenCLIntegrateBrownianStepKernel::execute(ContextImpl& context, const Brow
cl.setTime(cl.getTime()+stepSize);
cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag.
......@@ -4578,6 +4577,7 @@ double OpenCLIntegrateVariableVerletStepKernel::execute(ContextImpl& context, co
}
cl.setTime(time);
cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
return dt;
}
......@@ -4691,6 +4691,7 @@ double OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context,
}
cl.setTime(time);
cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
return dt;
}
......@@ -5354,6 +5355,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
cl.setTime(cl.getTime()+integrator.getStepSize());
cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag.
......@@ -5580,34 +5582,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
}
void OpenCLApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
if (cl.getAtomsWereReordered()) {
// The atoms were reordered since we saved the positions, so we need to fix them.
const vector<int> atomOrder = cl.getAtomIndex();
int numAtoms = cl.getNumAtoms();
if (cl.getUseDoublePrecision()) {
mm_double4* pos = (mm_double4*) cl.getPinnedBuffer();
savedPositions->download(pos);
vector<mm_double4> fixedPos(cl.getPaddedNumAtoms());
for (int i = 0; i < numAtoms; i++)
fixedPos[lastAtomOrder[i]] = pos[i];
for (int i = 0; i < numAtoms; i++)
pos[i] = fixedPos[atomOrder[i]];
cl.getPosq().upload(pos);
}
else {
mm_float4* pos = (mm_float4*) cl.getPinnedBuffer();
savedPositions->download(pos);
vector<mm_float4> fixedPos(cl.getPaddedNumAtoms());
for (int i = 0; i < numAtoms; i++)
fixedPos[lastAtomOrder[i]] = pos[i];
for (int i = 0; i < numAtoms; i++)
pos[i] = fixedPos[atomOrder[i]];
cl.getPosq().upload(pos);
}
}
else
cl.getQueue().enqueueCopyBuffer(savedPositions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(), 0, 0, cl.getPosq().getSize()*sizeof(mm_float4));
cl.getQueue().enqueueCopyBuffer(savedPositions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(), 0, 0, cl.getPosq().getSize()*sizeof(mm_float4));
}
OpenCLRemoveCMMotionKernel::~OpenCLRemoveCMMotionKernel() {
......
......@@ -742,12 +742,6 @@ void testChangingParameters() {
nonbonded->getParticleParameters(i, charge, sigma, epsilon);
nonbonded->setParticleParameters(i, 1.5*charge, 1.1*sigma, 1.7*epsilon);
}
double total = 0;
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
nonbonded->getParticleParameters(i, charge, sigma, epsilon);
total += charge;
}
nonbonded->updateParametersInContext(clContext);
nonbonded->updateParametersInContext(referenceContext);
clState = clContext.getState(State::Forces | State::Energy);
......
......@@ -358,33 +358,13 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
cu.setTime(cu.getTime()+stepSize);
cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
}
double CudaIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
}
class CudaIntegrateDrudeSCFStepKernel::ReorderListener : public CudaContext::ReorderListener {
public:
ReorderListener(CudaContext& cu, const vector<int>& drudeParticles, vector<int>& reorderedDrudeParticles) :
cu(cu), drudeParticles(drudeParticles), reorderedDrudeParticles(reorderedDrudeParticles) {
}
void execute() {
const vector<int>& order = cu.getAtomIndex();
int numParticles = order.size();
vector<int> inverseOrder(numParticles);
for (int i = 0; i < numParticles; i++)
inverseOrder[order[i]] = i;
int numDrudeParticles = drudeParticles.size();
for (int i = 0; i < numDrudeParticles; i++)
reorderedDrudeParticles[i] = inverseOrder[drudeParticles[i]];
}
private:
CudaContext& cu;
const vector<int>& drudeParticles;
vector<int>& reorderedDrudeParticles;
};
CudaIntegrateDrudeSCFStepKernel::~CudaIntegrateDrudeSCFStepKernel() {
if (minimizerPos != NULL)
lbfgs_free(minimizerPos);
......@@ -401,9 +381,7 @@ void CudaIntegrateDrudeSCFStepKernel::initialize(const System& system, const Dru
double charge, polarizability, aniso12, aniso34;
force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
drudeParticles.push_back(p);
reorderedDrudeParticles.push_back(p);
}
cu.addReorderListener(new ReorderListener(cu, drudeParticles, reorderedDrudeParticles));
// Initialize the energy minimizer.
......@@ -469,6 +447,7 @@ void CudaIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeS
cu.setTime(cu.getTime()+dt);
cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
}
double CudaIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
......@@ -478,39 +457,36 @@ double CudaIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& contex
struct MinimizerData {
ContextImpl& context;
CudaContext& cu;
vector<int>& reorderedDrudeParticles;
MinimizerData(ContextImpl& context, CudaContext& cu, vector<int>& reorderedDrudeParticles) : context(context), cu(cu), reorderedDrudeParticles(reorderedDrudeParticles) {}
vector<int>& drudeParticles;
MinimizerData(ContextImpl& context, CudaContext& cu, vector<int>& drudeParticles) : context(context), cu(cu), drudeParticles(drudeParticles) {}
};
static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
MinimizerData* data = reinterpret_cast<MinimizerData*>(instance);
ContextImpl& context = data->context;
CudaContext& cu = data->cu;
vector<int>& reorderedDrudeParticles = data->reorderedDrudeParticles;
int numDrudeParticles = reorderedDrudeParticles.size();
vector<int>& drudeParticles = data->drudeParticles;
int numDrudeParticles = drudeParticles.size();
// Set the particle positions.
cu.getPosq().download(cu.getPinnedBuffer());
double4 periodicBoxSize = cu.getPeriodicBoxSize();
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
double4& p = posq[reorderedDrudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]];
p.x = x[3*i]+offset.x*periodicBoxSize.x;
p.y = x[3*i+1]+offset.y*periodicBoxSize.y;
p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
double4& p = posq[drudeParticles[i]];
p.x = x[3*i];
p.y = x[3*i+1];
p.z = x[3*i+2];
}
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
float4& p = posq[reorderedDrudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]];
p.x = x[3*i]+offset.x*periodicBoxSize.x;
p.y = x[3*i+1]+offset.y*periodicBoxSize.y;
p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
float4& p = posq[drudeParticles[i]];
p.x = x[3*i];
p.y = x[3*i+1];
p.z = x[3*i+2];
}
}
cu.getPosq().upload(cu.getPinnedBuffer());
......@@ -523,7 +499,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
double forceScale = -1.0/0x100000000;
int paddedNumAtoms = cu.getPaddedNumAtoms();
for (int i = 0; i < numDrudeParticles; ++i) {
int index = reorderedDrudeParticles[i];
int index = drudeParticles[i];
g[3*i] = forceScale*force[index];
g[3*i+1] = forceScale*force[index+paddedNumAtoms];
g[3*i+2] = forceScale*force[index+paddedNumAtoms*2];
......@@ -534,27 +510,24 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
void CudaIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
// Record the initial positions.
int numDrudeParticles = reorderedDrudeParticles.size();
int numDrudeParticles = drudeParticles.size();
cu.getPosq().download(cu.getPinnedBuffer());
double4 periodicBoxSize = cu.getPeriodicBoxSize();
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
double4 p = posq[reorderedDrudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]];
minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x;
minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y;
minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
double4 p = posq[drudeParticles[i]];
minimizerPos[3*i] = p.x;
minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+2] = p.z;
}
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
float4 p = posq[reorderedDrudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]];
minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x;
minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y;
minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
float4 p = posq[drudeParticles[i]];
minimizerPos[3*i] = p.x;
minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+2] = p.z;
}
minimizerParams.xtol = 1e-7;
}
......@@ -571,6 +544,6 @@ void CudaIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tole
// Perform the minimization.
lbfgsfloatval_t fx;
MinimizerData data(context, cu, reorderedDrudeParticles);
MinimizerData data(context, cu, drudeParticles);
lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams);
}
\ No newline at end of file
......@@ -148,12 +148,10 @@ public:
*/
double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator);
private:
class ReorderListener;
void minimize(ContextImpl& context, double tolerance);
CudaContext& cu;
double prevStepSize;
std::vector<int> drudeParticles;
std::vector<int> reorderedDrudeParticles;
lbfgsfloatval_t *minimizerPos;
lbfgs_parameter_t minimizerParams;
CUfunction kernel1, kernel2;
......
......@@ -364,33 +364,13 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
cl.setTime(cl.getTime()+stepSize);
cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
}
double OpenCLIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
}
class OpenCLIntegrateDrudeSCFStepKernel::ReorderListener : public OpenCLContext::ReorderListener {
public:
ReorderListener(OpenCLContext& cl, const vector<int>& drudeParticles, vector<int>& reorderedDrudeParticles) :
cl(cl), drudeParticles(drudeParticles), reorderedDrudeParticles(reorderedDrudeParticles) {
}
void execute() {
const vector<int>& order = cl.getAtomIndex();
int numParticles = order.size();
vector<int> inverseOrder(numParticles);
for (int i = 0; i < numParticles; i++)
inverseOrder[order[i]] = i;
int numDrudeParticles = drudeParticles.size();
for (int i = 0; i < numDrudeParticles; i++)
reorderedDrudeParticles[i] = inverseOrder[drudeParticles[i]];
}
private:
OpenCLContext& cl;
const vector<int>& drudeParticles;
vector<int>& reorderedDrudeParticles;
};
OpenCLIntegrateDrudeSCFStepKernel::~OpenCLIntegrateDrudeSCFStepKernel() {
if (minimizerPos != NULL)
lbfgs_free(minimizerPos);
......@@ -406,9 +386,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::initialize(const System& system, const D
double charge, polarizability, aniso12, aniso34;
force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
drudeParticles.push_back(p);
reorderedDrudeParticles.push_back(p);
}
cl.addReorderListener(new ReorderListener(cl, drudeParticles, reorderedDrudeParticles));
// Initialize the energy minimizer.
......@@ -481,6 +459,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const Drud
cl.setTime(cl.getTime()+dt);
cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag.
......@@ -496,39 +475,36 @@ double OpenCLIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& cont
struct MinimizerData {
ContextImpl& context;
OpenCLContext& cl;
vector<int>& reorderedDrudeParticles;
MinimizerData(ContextImpl& context, OpenCLContext& cl, vector<int>& reorderedDrudeParticles) : context(context), cl(cl), reorderedDrudeParticles(reorderedDrudeParticles) {}
vector<int>& drudeParticles;
MinimizerData(ContextImpl& context, OpenCLContext& cl, vector<int>& drudeParticles) : context(context), cl(cl), drudeParticles(drudeParticles) {}
};
static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
MinimizerData* data = reinterpret_cast<MinimizerData*>(instance);
ContextImpl& context = data->context;
OpenCLContext& cl = data->cl;
vector<int>& reorderedDrudeParticles = data->reorderedDrudeParticles;
int numDrudeParticles = reorderedDrudeParticles.size();
vector<int>& drudeParticles = data->drudeParticles;
int numDrudeParticles = drudeParticles.size();
// Set the particle positions.
cl.getPosq().download(cl.getPinnedBuffer());
mm_double4 periodicBoxSize = cl.getPeriodicBoxSizeDouble();
if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
mm_double4& p = posq[reorderedDrudeParticles[i]];
mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]];
p.x = x[3*i]+offset.x*periodicBoxSize.x;
p.y = x[3*i+1]+offset.y*periodicBoxSize.y;
p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
mm_double4& p = posq[drudeParticles[i]];
p.x = x[3*i];
p.y = x[3*i+1];
p.z = x[3*i+2];
}
}
else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
mm_float4& p = posq[reorderedDrudeParticles[i]];
mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]];
p.x = x[3*i]+offset.x*periodicBoxSize.x;
p.y = x[3*i+1]+offset.y*periodicBoxSize.y;
p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
mm_float4& p = posq[drudeParticles[i]];
p.x = x[3*i];
p.y = x[3*i+1];
p.z = x[3*i+2];
}
}
cl.getPosq().upload(cl.getPinnedBuffer());
......@@ -540,7 +516,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
if (cl.getUseDoublePrecision()) {
mm_double4* force = (mm_double4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
int index = reorderedDrudeParticles[i];
int index = drudeParticles[i];
g[3*i] = -force[index].x;
g[3*i+1] = -force[index].y;
g[3*i+2] = -force[index].z;
......@@ -549,7 +525,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
else {
mm_float4* force = (mm_float4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
int index = reorderedDrudeParticles[i];
int index = drudeParticles[i];
g[3*i] = -force[index].x;
g[3*i+1] = -force[index].y;
g[3*i+2] = -force[index].z;
......@@ -561,27 +537,24 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
// Record the initial positions.
int numDrudeParticles = reorderedDrudeParticles.size();
int numDrudeParticles = drudeParticles.size();
cl.getPosq().download(cl.getPinnedBuffer());
mm_double4 periodicBoxSize = cl.getPeriodicBoxSizeDouble();
if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
mm_double4 p = posq[reorderedDrudeParticles[i]];
mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]];
minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x;
minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y;
minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
mm_double4 p = posq[drudeParticles[i]];
minimizerPos[3*i] = p.x;
minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+2] = p.z;
}
}
else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
mm_float4 p = posq[reorderedDrudeParticles[i]];
mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]];
minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x;
minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y;
minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
mm_float4 p = posq[drudeParticles[i]];
minimizerPos[3*i] = p.x;
minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+2] = p.z;
}
minimizerParams.xtol = 1e-7;
}
......@@ -598,6 +571,6 @@ void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double to
// Perform the minimization.
lbfgsfloatval_t fx;
MinimizerData data(context, cl, reorderedDrudeParticles);
MinimizerData data(context, cl, drudeParticles);
lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams);
}
\ No newline at end of file
......@@ -149,13 +149,11 @@ public:
*/
double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator);
private:
class ReorderListener;
void minimize(ContextImpl& context, double tolerance);
OpenCLContext& cl;
bool hasInitializedKernels;
double prevStepSize;
std::vector<int> drudeParticles;
std::vector<int> reorderedDrudeParticles;
lbfgsfloatval_t *minimizerPos;
lbfgs_parameter_t minimizerParams;
cl::Kernel kernel1, kernel2;
......
......@@ -132,6 +132,7 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
}
void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
// Loop over copies and compute the force on each one.
......@@ -178,6 +179,15 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
cu.setTime(cu.getTime()+dt);
cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
if (cu.getAtomsWereReordered() && cu.getNonbondedUtilities().getUsePeriodic()) {
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
int i = numCopies-1;
void* args[] = {&positions->getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(translateKernel, args, cu.getNumAtoms());
}
}
void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
......@@ -188,13 +198,6 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
context.computeVirtualSites();
context.updateContextState();
context.calcForcesAndEnergy(true, false);
if (cu.getAtomsWereReordered() && cu.getNonbondedUtilities().getUsePeriodic()) {
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
void* args[] = {&positions->getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(translateKernel, args, cu.getNumAtoms());
}
void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &forces->getDevicePointer(), &cu.getVelm().getDevicePointer(),
&velocities->getDevicePointer(), &cu.getPosq().getDevicePointer(), &positions->getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms());
......
......@@ -190,6 +190,14 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
cl.setTime(cl.getTime()+dt);
cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
if (cl.getAtomsWereReordered() && cl.getNonbondedUtilities().getUsePeriodic()) {
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
translateKernel.setArg<cl_int>(3, numCopies-1);
cl.executeKernel(translateKernel, cl.getNumAtoms());
}
}
void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
......@@ -199,13 +207,6 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
context.computeVirtualSites();
context.updateContextState();
context.calcForcesAndEnergy(true, false);
if (cl.getAtomsWereReordered() && cl.getNonbondedUtilities().getUsePeriodic()) {
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
translateKernel.setArg<cl_int>(3, i);
cl.executeKernel(translateKernel, cl.getNumAtoms());
}
copyFromContextKernel.setArg<cl_int>(7, i);
cl.executeKernel(copyFromContextKernel, cl.getNumAtoms());
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment