Commit 1945dd6c authored by Andy Simmonett's avatar Andy Simmonett
Browse files

Merge branch 'master' of github.com:pandegroup/openmm into dpme

parents 203e5407 9963e51a
/* Portions copyright (c) 2009-2016 Stanford University and Simbios. /* Portions copyright (c) 2009-2017 Stanford University and Simbios.
* Contributors: Peter Eastman * Contributors: Peter Eastman
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -33,16 +33,6 @@ ...@@ -33,16 +33,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuCustomGBForce::ComputeForceTask : public ThreadPool::Task {
public:
ComputeForceTask(CpuCustomGBForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuCustomGBForce& owner;
};
CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threadIndex, CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threadIndex,
const vector<Lepton::CompiledExpression>& valueExpressions, const vector<Lepton::CompiledExpression>& valueExpressions,
const vector<vector<Lepton::CompiledExpression> >& valueDerivExpressions, const vector<vector<Lepton::CompiledExpression> >& valueDerivExpressions,
...@@ -206,7 +196,7 @@ void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, RealOpenMM** ...@@ -206,7 +196,7 @@ void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, RealOpenMM**
// Calculate the first computed value. // Calculate the first computed value.
ComputeForceTask task(*this); auto task = [&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); };
gmx_atomic_set(&counter, 0); gmx_atomic_set(&counter, 0);
threads.execute(task); threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
......
/* Portions copyright (c) 2009-2014 Stanford University and Simbios. /* Portions copyright (c) 2009-2017 Stanford University and Simbios.
* Contributors: Peter Eastman * Contributors: Peter Eastman
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -37,16 +37,6 @@ ...@@ -37,16 +37,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuCustomManyParticleForce::ComputeForceTask : public ThreadPool::Task {
public:
ComputeForceTask(CpuCustomManyParticleForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuCustomManyParticleForce& owner;
};
CpuCustomManyParticleForce::CpuCustomManyParticleForce(const CustomManyParticleForce& force, ThreadPool& threads) : CpuCustomManyParticleForce::CpuCustomManyParticleForce(const CustomManyParticleForce& force, ThreadPool& threads) :
threads(threads), useCutoff(false), usePeriodic(false), neighborList(NULL) { threads(threads), useCutoff(false), usePeriodic(false), neighborList(NULL) {
numParticles = force.getNumParticles(); numParticles = force.getNumParticles();
...@@ -141,8 +131,7 @@ void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, RealOpe ...@@ -141,8 +131,7 @@ void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, RealOpe
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
ComputeForceTask task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
// Combine the energies from all the threads. // Combine the energies from all the threads.
......
/* Portions copyright (c) 2009-2016 Stanford University and Simbios. /* Portions copyright (c) 2009-2017 Stanford University and Simbios.
* Contributors: Peter Eastman * Contributors: Peter Eastman
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -33,16 +33,6 @@ ...@@ -33,16 +33,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuCustomNonbondedForce::ComputeForceTask : public ThreadPool::Task {
public:
ComputeForceTask(CpuCustomNonbondedForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuCustomNonbondedForce& owner;
};
CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression, CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression,
const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions) : const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions) :
energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions) { energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions) {
...@@ -150,8 +140,7 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v ...@@ -150,8 +140,7 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
ComputeForceTask task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
// Combine the energies from all the threads. // Combine the energies from all the threads.
......
/* Portions copyright (c) 2006-2016 Stanford University and Simbios. /* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -36,16 +36,6 @@ const int CpuGBSAOBCForce::NUM_TABLE_POINTS = 4096; ...@@ -36,16 +36,6 @@ const int CpuGBSAOBCForce::NUM_TABLE_POINTS = 4096;
const float CpuGBSAOBCForce::TABLE_MIN = 0.25f; const float CpuGBSAOBCForce::TABLE_MIN = 0.25f;
const float CpuGBSAOBCForce::TABLE_MAX = 1.5f; const float CpuGBSAOBCForce::TABLE_MAX = 1.5f;
class CpuGBSAOBCForce::ComputeTask : public ThreadPool::Task {
public:
ComputeTask(CpuGBSAOBCForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuGBSAOBCForce& owner;
};
CpuGBSAOBCForce::CpuGBSAOBCForce() : cutoff(false), periodic(false) { CpuGBSAOBCForce::CpuGBSAOBCForce() : cutoff(false), periodic(false) {
logDX = (TABLE_MAX-TABLE_MIN)/NUM_TABLE_POINTS; logDX = (TABLE_MAX-TABLE_MIN)/NUM_TABLE_POINTS;
logDXInv = 1.0f/logDX; logDXInv = 1.0f/logDX;
...@@ -110,9 +100,8 @@ void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<Align ...@@ -110,9 +100,8 @@ void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<Align
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
ComputeTask task(*this);
gmx_atomic_set(&counter, 0); gmx_atomic_set(&counter, 0);
threads.execute(task); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
threads.waitForThreads(); // Compute Born radii threads.waitForThreads(); // Compute Born radii
gmx_atomic_set(&counter, 0); gmx_atomic_set(&counter, 0);
threads.resumeThreads(); threads.resumeThreads();
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2016 Stanford University and the Authors. * * Portions copyright (c) 2016-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -44,17 +44,6 @@ ...@@ -44,17 +44,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuGayBerneForce::ComputeTask : public ThreadPool::Task {
public:
ComputeTask(CpuGayBerneForce& owner, CpuNeighborList* neighborList) : owner(owner), neighborList(neighborList) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex, neighborList);
}
CpuGayBerneForce& owner;
CpuNeighborList* neighborList;
};
CpuGayBerneForce::CpuGayBerneForce(const GayBerneForce& force) { CpuGayBerneForce::CpuGayBerneForce(const GayBerneForce& force) {
// Record the force parameters. // Record the force parameters.
...@@ -137,8 +126,7 @@ RealOpenMM CpuGayBerneForce::calculateForce(const vector<RealVec>& positions, st ...@@ -137,8 +126,7 @@ RealOpenMM CpuGayBerneForce::calculateForce(const vector<RealVec>& positions, st
// Signal the threads to compute the pairwise interactions. // Signal the threads to compute the pairwise interactions.
ComputeTask task(*this, data.neighborList); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex, data.neighborList); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
// Signal the threads to compute exceptions. // Signal the threads to compute exceptions.
......
...@@ -138,35 +138,27 @@ static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>& ...@@ -138,35 +138,27 @@ static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>&
return 0.5*energy; return 0.5*energy;
} }
class CpuCalcForcesAndEnergyKernel::SumForceTask : public ThreadPool::Task { CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
public: CalcForcesAndEnergyKernel(name, platform), data(data) {
SumForceTask(int numParticles, vector<RealVec>& forceData, CpuPlatform::PlatformData& data) : numParticles(numParticles), forceData(forceData), data(data) { // Create a Reference platform version of this kernel.
}
void execute(ThreadPool& threads, int threadIndex) { ReferenceKernelFactory referenceFactory;
// Sum the contributions to forces that have been calculated by different threads. referenceKernel = Kernel(referenceFactory.createKernelImpl(name, platform, context));
}
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
for (int i = start; i < end; i++) {
fvec4 f(0.0f);
for (int j = 0; j < numThreads; j++)
f += fvec4(&data.threadForce[j][4*i]);
forceData[i][0] += f[0];
forceData[i][1] += f[1];
forceData[i][2] += f[2];
}
}
int numParticles;
vector<RealVec>& forceData;
CpuPlatform::PlatformData& data;
};
class CpuCalcForcesAndEnergyKernel::InitForceTask : public ThreadPool::Task { void CpuCalcForcesAndEnergyKernel::initialize(const System& system) {
public: referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().initialize(system);
InitForceTask(int numParticles, ContextImpl& context, CpuPlatform::PlatformData& data) : numParticles(numParticles), positionsValid(true), context(context), data(data) { lastPositions.resize(system.getNumParticles(), Vec3(1e10, 1e10, 1e10));
} }
void execute(ThreadPool& threads, int threadIndex) {
void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().beginComputation(context, includeForce, includeEnergy, groups);
// Convert positions to single precision and clear the forces.
int numParticles = context.getSystem().getNumParticles();
bool positionsValid = true;
data.threads.execute([&] (ThreadPool& threads, int threadIndex) {
// Convert the positions to single precision and apply periodic boundary conditions // Convert the positions to single precision and apply periodic boundary conditions
AlignedArray<float>& posq = data.posq; AlignedArray<float>& posq = data.posq;
...@@ -219,36 +211,9 @@ public: ...@@ -219,36 +211,9 @@ public:
fvec4 zero(0.0f); fvec4 zero(0.0f);
for (int j = 0; j < numParticles; j++) for (int j = 0; j < numParticles; j++)
zero.store(&data.threadForce[threadIndex][j*4]); zero.store(&data.threadForce[threadIndex][j*4]);
} });
int numParticles;
bool positionsValid;
ContextImpl& context;
CpuPlatform::PlatformData& data;
};
CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
CalcForcesAndEnergyKernel(name, platform), data(data) {
// Create a Reference platform version of this kernel.
ReferenceKernelFactory referenceFactory;
referenceKernel = Kernel(referenceFactory.createKernelImpl(name, platform, context));
}
void CpuCalcForcesAndEnergyKernel::initialize(const System& system) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().initialize(system);
lastPositions.resize(system.getNumParticles(), Vec3(1e10, 1e10, 1e10));
}
void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().beginComputation(context, includeForce, includeEnergy, groups);
// Convert positions to single precision and clear the forces.
int numParticles = context.getSystem().getNumParticles();
InitForceTask task(numParticles, context, data);
data.threads.execute(task);
data.threads.waitForThreads(); data.threads.waitForThreads();
if (!task.positionsValid) if (!positionsValid)
throw OpenMMException("Particle coordinate is nan"); throw OpenMMException("Particle coordinate is nan");
// Determine whether we need to recompute the neighbor list. // Determine whether we need to recompute the neighbor list.
...@@ -303,8 +268,23 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i ...@@ -303,8 +268,23 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) { double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) {
// Sum the forces from all the threads. // Sum the forces from all the threads.
SumForceTask task(context.getSystem().getNumParticles(), extractForces(context), data); data.threads.execute([&] (ThreadPool& threads, int threadIndex) {
data.threads.execute(task); // Sum the contributions to forces that have been calculated by different threads.
int numParticles = context.getSystem().getNumParticles();
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
vector<RealVec>& forceData = extractForces(context);
for (int i = start; i < end; i++) {
fvec4 f(0.0f);
for (int j = 0; j < numThreads; j++)
f += fvec4(&data.threadForce[j][4*i]);
forceData[i][0] += f[0];
forceData[i][1] += f[1];
forceData[i][2] += f[2];
}
});
data.threads.waitForThreads(); data.threads.waitForThreads();
return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups, valid); return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups, valid);
} }
......
/* Portions copyright (c) 2006-2016 Stanford University and Simbios. /* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Authors: Peter Eastman * Authors: Peter Eastman
* Contributors: * Contributors:
* *
...@@ -29,36 +29,6 @@ ...@@ -29,36 +29,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuLangevinDynamics::Update1Task : public ThreadPool::Task {
public:
Update1Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate1(threadIndex);
}
CpuLangevinDynamics& owner;
};
class CpuLangevinDynamics::Update2Task : public ThreadPool::Task {
public:
Update2Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate2(threadIndex);
}
CpuLangevinDynamics& owner;
};
class CpuLangevinDynamics::Update3Task : public ThreadPool::Task {
public:
Update3Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate3(threadIndex);
}
CpuLangevinDynamics& owner;
};
CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM friction, RealOpenMM temperature, ThreadPool& threads, CpuRandom& random) : CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM friction, RealOpenMM temperature, ThreadPool& threads, CpuRandom& random) :
ReferenceStochasticDynamics(numberOfAtoms, deltaT, friction, temperature), threads(threads), random(random) { ReferenceStochasticDynamics(numberOfAtoms, deltaT, friction, temperature), threads(threads), random(random) {
} }
...@@ -79,8 +49,7 @@ void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<RealVec>& atomCo ...@@ -79,8 +49,7 @@ void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<RealVec>& atomCo
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
Update1Task task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate1(threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
} }
...@@ -97,8 +66,7 @@ void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<RealVec>& atomCo ...@@ -97,8 +66,7 @@ void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<RealVec>& atomCo
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
Update2Task task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate2(threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
} }
...@@ -114,8 +82,7 @@ void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<RealVec>& atomCo ...@@ -114,8 +82,7 @@ void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<RealVec>& atomCo
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
Update3Task task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate3(threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2016 Stanford University and the Authors. * * Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -409,16 +409,6 @@ private: ...@@ -409,16 +409,6 @@ private:
vector<vector<vector<pair<float, int> > > > bins; vector<vector<vector<pair<float, int> > > > bins;
}; };
class CpuNeighborList::ThreadTask : public ThreadPool::Task {
public:
ThreadTask(CpuNeighborList& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeNeighborList(threads, threadIndex);
}
CpuNeighborList& owner;
};
CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) { CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) {
} }
...@@ -460,8 +450,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float ...@@ -460,8 +450,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
// Sort the atoms based on a Hilbert curve. // Sort the atoms based on a Hilbert curve.
atomBins.resize(numAtoms); atomBins.resize(numAtoms);
ThreadTask task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeNeighborList(threads, threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
sort(atomBins.begin(), atomBins.end()); sort(atomBins.begin(), atomBins.end());
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios. /* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -42,16 +42,6 @@ using namespace OpenMM; ...@@ -42,16 +42,6 @@ using namespace OpenMM;
const float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M)); const float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M));
const int CpuNonbondedForce::NUM_TABLE_POINTS = 2048; const int CpuNonbondedForce::NUM_TABLE_POINTS = 2048;
class CpuNonbondedForce::ComputeDirectTask : public ThreadPool::Task {
public:
ComputeDirectTask(CpuNonbondedForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeDirect(threads, threadIndex);
}
CpuNonbondedForce& owner;
};
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
CpuNonbondedForce constructor CpuNonbondedForce constructor
...@@ -405,8 +395,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const ...@@ -405,8 +395,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
ComputeDirectTask task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeDirect(threads, threadIndex); });
threads.execute(task); // ACS calls threadcomputedirect
threads.waitForThreads(); threads.waitForThreads();
// Signal the threads to subtract the exclusions. // Signal the threads to subtract the exclusions.
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. * * Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -35,52 +35,6 @@ ...@@ -35,52 +35,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuSETTLE::ApplyToPositionsTask : public ThreadPool::Task {
public:
ApplyToPositionsTask(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& atomCoordinatesP, vector<RealOpenMM>& inverseMasses,
RealOpenMM tolerance, vector<ReferenceSETTLEAlgorithm*>& threadSettle) : atomCoordinates(atomCoordinates), atomCoordinatesP(atomCoordinatesP),
inverseMasses(inverseMasses), tolerance(tolerance), threadSettle(threadSettle) {
gmx_atomic_set(&atomicCounter, 0);
}
void execute(ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->apply(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance);
}
}
vector<OpenMM::RealVec>& atomCoordinates;
vector<OpenMM::RealVec>& atomCoordinatesP;
vector<RealOpenMM>& inverseMasses;
RealOpenMM tolerance;
vector<ReferenceSETTLEAlgorithm*>& threadSettle;
gmx_atomic_t atomicCounter;
};
class CpuSETTLE::ApplyToVelocitiesTask : public ThreadPool::Task {
public:
ApplyToVelocitiesTask(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& velocities, vector<RealOpenMM>& inverseMasses,
RealOpenMM tolerance, vector<ReferenceSETTLEAlgorithm*>& threadSettle) : atomCoordinates(atomCoordinates), velocities(velocities),
inverseMasses(inverseMasses), tolerance(tolerance), threadSettle(threadSettle) {
gmx_atomic_set(&atomicCounter, 0);
}
void execute(ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);
}
}
vector<OpenMM::RealVec>& atomCoordinates;
vector<OpenMM::RealVec>& velocities;
vector<RealOpenMM>& inverseMasses;
RealOpenMM tolerance;
vector<ReferenceSETTLEAlgorithm*>& threadSettle;
gmx_atomic_t atomicCounter;
};
CpuSETTLE::CpuSETTLE(const System& system, const ReferenceSETTLEAlgorithm& settle, ThreadPool& threads) : threads(threads) { CpuSETTLE::CpuSETTLE(const System& system, const ReferenceSETTLEAlgorithm& settle, ThreadPool& threads) : threads(threads) {
int numBlocks = 10*threads.getNumThreads(); int numBlocks = 10*threads.getNumThreads();
int numClusters = settle.getNumClusters(); int numClusters = settle.getNumClusters();
...@@ -107,13 +61,29 @@ CpuSETTLE::~CpuSETTLE() { ...@@ -107,13 +61,29 @@ CpuSETTLE::~CpuSETTLE() {
} }
void CpuSETTLE::apply(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& atomCoordinatesP, vector<RealOpenMM>& inverseMasses, RealOpenMM tolerance) { void CpuSETTLE::apply(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& atomCoordinatesP, vector<RealOpenMM>& inverseMasses, RealOpenMM tolerance) {
ApplyToPositionsTask task(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance, threadSettle); gmx_atomic_t atomicCounter;
threads.execute(task); gmx_atomic_set(&atomicCounter, 0);
threads.execute([&] (ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->apply(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance);
}
});
threads.waitForThreads(); threads.waitForThreads();
} }
void CpuSETTLE::applyToVelocities(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& velocities, vector<RealOpenMM>& inverseMasses, RealOpenMM tolerance) { void CpuSETTLE::applyToVelocities(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& velocities, vector<RealOpenMM>& inverseMasses, RealOpenMM tolerance) {
ApplyToVelocitiesTask task(atomCoordinates, velocities, inverseMasses, tolerance, threadSettle); gmx_atomic_t atomicCounter;
threads.execute(task); gmx_atomic_set(&atomicCounter, 0);
threads.execute([&] (ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);
}
});
threads.waitForThreads(); threads.waitForThreads();
} }
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. * * Portions copyright (c) 2009-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -494,6 +494,10 @@ public: ...@@ -494,6 +494,10 @@ public:
CudaNonbondedUtilities& getNonbondedUtilities() { CudaNonbondedUtilities& getNonbondedUtilities() {
return *nonbonded; return *nonbonded;
} }
/**
* Set the particle charges. These are packed into the fourth element of the posq array.
*/
void setCharges(const std::vector<double>& charges);
/** /**
* Get the thread used by this context for executing parallel computations. * Get the thread used by this context for executing parallel computations.
*/ */
...@@ -577,6 +581,12 @@ public: ...@@ -577,6 +581,12 @@ public:
* and order to be revalidated. * and order to be revalidated.
*/ */
void invalidateMolecules(); void invalidateMolecules();
/**
* Mark that the current molecule definitions from one particular force (and hence the atom order)
* may be invalid. This should be called whenever force field parameters change. It will cause the
* definitions and order to be revalidated.
*/
bool invalidateMolecules(CudaForceInfo* force);
private: private:
/** /**
* Compute a sorted list of device indices in decreasing order of desirability * Compute a sorted list of device indices in decreasing order of desirability
...@@ -626,6 +636,7 @@ private: ...@@ -626,6 +636,7 @@ private:
CUfunction clearFourBuffersKernel; CUfunction clearFourBuffersKernel;
CUfunction clearFiveBuffersKernel; CUfunction clearFiveBuffersKernel;
CUfunction clearSixBuffersKernel; CUfunction clearSixBuffersKernel;
CUfunction setChargesKernel;
std::vector<CudaForceInfo*> forces; std::vector<CudaForceInfo*> forces;
std::vector<Molecule> molecules; std::vector<Molecule> molecules;
std::vector<MoleculeGroup> moleculeGroups; std::vector<MoleculeGroup> moleculeGroups;
...@@ -638,6 +649,7 @@ private: ...@@ -638,6 +649,7 @@ private:
CudaArray* energyBuffer; CudaArray* energyBuffer;
CudaArray* energyParamDerivBuffer; CudaArray* energyParamDerivBuffer;
CudaArray* atomIndexDevice; CudaArray* atomIndexDevice;
CudaArray* chargeBuffer;
std::vector<std::string> energyParamDerivNames; std::vector<std::string> energyParamDerivNames;
std::map<std::string, double> energyParamDerivWorkspace; std::map<std::string, double> energyParamDerivWorkspace;
std::vector<int> atomIndex; std::vector<int> atomIndex;
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. * * Portions copyright (c) 2008-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -198,7 +198,6 @@ public: ...@@ -198,7 +198,6 @@ public:
*/ */
void loadCheckpoint(ContextImpl& context, std::istream& stream); void loadCheckpoint(ContextImpl& context, std::istream& stream);
private: private:
class GetPositionsTask;
CudaContext& cu; CudaContext& cu;
}; };
...@@ -292,9 +291,11 @@ public: ...@@ -292,9 +291,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force); void copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force);
private: private:
class ForceInfo;
int numBonds; int numBonds;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaArray* params; CudaArray* params;
}; };
...@@ -332,9 +333,11 @@ public: ...@@ -332,9 +333,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomBondForce& force); void copyParametersToContext(ContextImpl& context, const CustomBondForce& force);
private: private:
class ForceInfo;
int numBonds; int numBonds;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
...@@ -375,9 +378,11 @@ public: ...@@ -375,9 +378,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force); void copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force);
private: private:
class ForceInfo;
int numAngles; int numAngles;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaArray* params; CudaArray* params;
}; };
...@@ -415,9 +420,11 @@ public: ...@@ -415,9 +420,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomAngleForce& force); void copyParametersToContext(ContextImpl& context, const CustomAngleForce& force);
private: private:
class ForceInfo;
int numAngles; int numAngles;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
...@@ -458,9 +465,11 @@ public: ...@@ -458,9 +465,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force); void copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force);
private: private:
class ForceInfo;
int numTorsions; int numTorsions;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaArray* params; CudaArray* params;
}; };
...@@ -498,9 +507,11 @@ public: ...@@ -498,9 +507,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const RBTorsionForce& force); void copyParametersToContext(ContextImpl& context, const RBTorsionForce& force);
private: private:
class ForceInfo;
int numTorsions; int numTorsions;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaArray* params1; CudaArray* params1;
CudaArray* params2; CudaArray* params2;
...@@ -539,9 +550,11 @@ public: ...@@ -539,9 +550,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force); void copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force);
private: private:
class ForceInfo;
int numTorsions; int numTorsions;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
std::vector<int2> mapPositionsVec; std::vector<int2> mapPositionsVec;
CudaArray* coefficients; CudaArray* coefficients;
...@@ -582,9 +595,11 @@ public: ...@@ -582,9 +595,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force); void copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force);
private: private:
class ForceInfo;
int numTorsions; int numTorsions;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
...@@ -657,12 +672,14 @@ private: ...@@ -657,12 +672,14 @@ private:
const char* getMaxValue() const {return "make_int2(2147483647, 2147483647)";} const char* getMaxValue() const {return "make_int2(2147483647, 2147483647)";}
const char* getSortKey() const {return "value.y";} const char* getSortKey() const {return "value.y";}
}; };
class ForceInfo;
class PmeIO; class PmeIO;
class PmePreComputation; class PmePreComputation;
class PmePostComputation; class PmePostComputation;
class SyncStreamPreComputation; class SyncStreamPreComputation;
class SyncStreamPostComputation; class SyncStreamPostComputation;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
bool hasInitializedFFT; bool hasInitializedFFT;
CudaArray* sigmaEpsilon; CudaArray* sigmaEpsilon;
CudaArray* exceptionParams; CudaArray* exceptionParams;
...@@ -746,8 +763,10 @@ public: ...@@ -746,8 +763,10 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force); void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
private: private:
class ForceInfo;
void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource, const std::vector<std::string>& tableTypes); void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource, const std::vector<std::string>& tableTypes);
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
CudaArray* interactionGroupData; CudaArray* interactionGroupData;
...@@ -797,10 +816,12 @@ public: ...@@ -797,10 +816,12 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force); void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
private: private:
class ForceInfo;
double prefactor, surfaceAreaFactor, cutoff; double prefactor, surfaceAreaFactor, cutoff;
bool hasCreatedKernels; bool hasCreatedKernels;
int maxTiles; int maxTiles;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
CudaArray* params; CudaArray* params;
CudaArray* bornSum; CudaArray* bornSum;
CudaArray* bornRadii; CudaArray* bornRadii;
...@@ -847,10 +868,12 @@ public: ...@@ -847,10 +868,12 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force); void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private: private:
class ForceInfo;
double cutoff; double cutoff;
bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs; bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs;
int maxTiles, numComputedValues; int maxTiles, numComputedValues;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params; CudaParameterSet* params;
CudaParameterSet* computedValues; CudaParameterSet* computedValues;
CudaParameterSet* energyDerivs; CudaParameterSet* energyDerivs;
...@@ -904,9 +927,11 @@ public: ...@@ -904,9 +927,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomExternalForce& force); void copyParametersToContext(ContextImpl& context, const CustomExternalForce& force);
private: private:
class ForceInfo;
int numParticles; int numParticles;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
...@@ -948,9 +973,11 @@ public: ...@@ -948,9 +973,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force); void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force);
private: private:
class ForceInfo;
int numDonors, numAcceptors; int numDonors, numAcceptors;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
CudaParameterSet* donorParams; CudaParameterSet* donorParams;
CudaParameterSet* acceptorParams; CudaParameterSet* acceptorParams;
CudaArray* globals; CudaArray* globals;
...@@ -1000,9 +1027,11 @@ public: ...@@ -1000,9 +1027,11 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomCentroidBondForce& force); void copyParametersToContext(ContextImpl& context, const CustomCentroidBondForce& force);
private: private:
class ForceInfo;
int numGroups, numBonds; int numGroups, numBonds;
bool needEnergyParamDerivs; bool needEnergyParamDerivs;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
CudaArray* groupParticles; CudaArray* groupParticles;
...@@ -1053,8 +1082,10 @@ public: ...@@ -1053,8 +1082,10 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomCompoundBondForce& force); void copyParametersToContext(ContextImpl& context, const CustomCompoundBondForce& force);
private: private:
class ForceInfo;
int numBonds; int numBonds;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
...@@ -1099,7 +1130,9 @@ public: ...@@ -1099,7 +1130,9 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomManyParticleForce& force); void copyParametersToContext(ContextImpl& context, const CustomManyParticleForce& force);
private: private:
class ForceInfo;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
bool hasInitializedKernel; bool hasInitializedKernel;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize; int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize;
...@@ -1161,9 +1194,11 @@ public: ...@@ -1161,9 +1194,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const GayBerneForce& force); void copyParametersToContext(ContextImpl& context, const GayBerneForce& force);
private: private:
class ForceInfo;
class ReorderListener; class ReorderListener;
void sortAtoms(); void sortAtoms();
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
bool hasInitializedKernels; bool hasInitializedKernels;
int numRealParticles, numExceptions, maxNeighborBlocks; int numRealParticles, numExceptions, maxNeighborBlocks;
GayBerneForce::NonbondedMethod nonbondedMethod; GayBerneForce::NonbondedMethod nonbondedMethod;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. * * Portions copyright (c) 2009-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -108,7 +108,8 @@ static int executeInWindows(const string &command) { ...@@ -108,7 +108,8 @@ static int executeInWindows(const string &command) {
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler, CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0), const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false), isNvccAvailable(false), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false), isNvccAvailable(false),
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), chargeBuffer(NULL),
integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
// Determine what compiler to use. // Determine what compiler to use.
this->compiler = "\""+compiler+"\""; this->compiler = "\""+compiler+"\"";
...@@ -291,6 +292,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -291,6 +292,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
clearFourBuffersKernel = getKernel(utilities, "clearFourBuffers"); clearFourBuffersKernel = getKernel(utilities, "clearFourBuffers");
clearFiveBuffersKernel = getKernel(utilities, "clearFiveBuffers"); clearFiveBuffersKernel = getKernel(utilities, "clearFiveBuffers");
clearSixBuffersKernel = getKernel(utilities, "clearSixBuffers"); clearSixBuffersKernel = getKernel(utilities, "clearSixBuffers");
setChargesKernel = getKernel(utilities, "setCharges");
// Set defines based on the requested precision. // Set defines based on the requested precision.
...@@ -407,6 +409,8 @@ CudaContext::~CudaContext() { ...@@ -407,6 +409,8 @@ CudaContext::~CudaContext() {
delete energyParamDerivBuffer; delete energyParamDerivBuffer;
if (atomIndexDevice != NULL) if (atomIndexDevice != NULL)
delete atomIndexDevice; delete atomIndexDevice;
if (chargeBuffer != NULL)
delete chargeBuffer;
if (integration != NULL) if (integration != NULL)
delete integration; delete integration;
if (expression != NULL) if (expression != NULL)
...@@ -860,6 +864,25 @@ void CudaContext::clearAutoclearBuffers() { ...@@ -860,6 +864,25 @@ void CudaContext::clearAutoclearBuffers() {
} }
} }
void CudaContext::setCharges(const vector<double>& charges) {
if (chargeBuffer == NULL)
chargeBuffer = new CudaArray(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer");
if (getUseDoublePrecision()) {
double* c = (double*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++)
c[i] = charges[i];
chargeBuffer->upload(c);
}
else {
float* c = (float*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++)
c[i] = (float) charges[i];
chargeBuffer->upload(c);
}
void* args[] = {&chargeBuffer->getDevicePointer(), &posq->getDevicePointer(), &atomIndexDevice->getDevicePointer(), &numAtoms};
executeKernel(setChargesKernel, args, numAtoms);
}
/** /**
* This class ensures that atom reordering doesn't break virtual sites. * This class ensures that atom reordering doesn't break virtual sites.
*/ */
...@@ -1058,9 +1081,19 @@ void CudaContext::findMoleculeGroups() { ...@@ -1058,9 +1081,19 @@ void CudaContext::findMoleculeGroups() {
} }
void CudaContext::invalidateMolecules() { void CudaContext::invalidateMolecules() {
for (int i = 0; i < forces.size(); i++)
if (invalidateMolecules(forces[i]))
return;
}
bool CudaContext::invalidateMolecules(CudaForceInfo* force) {
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff()) if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
return; return false;
bool valid = true; bool valid = true;
int forceIndex = -1;
for (int i = 0; i < forces.size(); i++)
if (forces[i] == force)
forceIndex = i;
for (int group = 0; valid && group < (int) moleculeGroups.size(); group++) { for (int group = 0; valid && group < (int) moleculeGroups.size(); group++) {
MoleculeGroup& mol = moleculeGroups[group]; MoleculeGroup& mol = moleculeGroups[group];
vector<int>& instances = mol.instances; vector<int>& instances = mol.instances;
...@@ -1075,22 +1108,21 @@ void CudaContext::invalidateMolecules() { ...@@ -1075,22 +1108,21 @@ void CudaContext::invalidateMolecules() {
Molecule& m2 = molecules[instances[j]]; Molecule& m2 = molecules[instances[j]];
int offset2 = offsets[j]; int offset2 = offsets[j];
for (int i = 0; i < (int) atoms.size() && valid; i++) { for (int i = 0; i < (int) atoms.size() && valid; i++) {
for (int k = 0; k < (int) forces.size(); k++) if (!force->areParticlesIdentical(atoms[i]+offset1, atoms[i]+offset2))
if (!forces[k]->areParticlesIdentical(atoms[i]+offset1, atoms[i]+offset2)) valid = false;
valid = false;
} }
// See if the force groups are identical. // See if the force groups are identical.
for (int i = 0; i < (int) forces.size() && valid; i++) { if (valid && forceIndex > -1) {
for (int k = 0; k < (int) m1.groups[i].size() && valid; k++) for (int k = 0; k < (int) m1.groups[forceIndex].size() && valid; k++)
if (!forces[i]->areGroupsIdentical(m1.groups[i][k], m2.groups[i][k])) if (!force->areGroupsIdentical(m1.groups[forceIndex][k], m2.groups[forceIndex][k]))
valid = false; valid = false;
} }
} }
} }
if (valid) if (valid)
return; return false;
// The list of which molecules are identical is no longer valid. We need to restore the // The list of which molecules are identical is no longer valid. We need to restore the
// atoms to their original order, rebuild the list of identical molecules, and sort them // atoms to their original order, rebuild the list of identical molecules, and sort them
...@@ -1158,6 +1190,7 @@ void CudaContext::invalidateMolecules() { ...@@ -1158,6 +1190,7 @@ void CudaContext::invalidateMolecules() {
for (int i = 0; i < (int) reorderListeners.size(); i++) for (int i = 0; i < (int) reorderListeners.size(); i++)
reorderListeners[i]->execute(); reorderListeners[i]->execute();
reorderAtoms(); reorderAtoms();
return true;
} }
void CudaContext::reorderAtoms() { void CudaContext::reorderAtoms() {
......
This diff is collapsed.
...@@ -73,4 +73,11 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res ...@@ -73,4 +73,11 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res
clearSingleBuffer(buffer6, size6); clearSingleBuffer(buffer6, size6);
} }
/**
* Record the atomic charges into the posq array.
*/
__global__ void setCharges(real* __restrict__ charges, real4* __restrict__ posq, int* __restrict__ atomOrder, int numAtoms) {
for (int i = blockDim.x*blockIdx.x+threadIdx.x; i < numAtoms; i += blockDim.x*gridDim.x)
posq[i].w = charges[atomOrder[i]];
}
} }
\ No newline at end of file
...@@ -609,6 +609,10 @@ public: ...@@ -609,6 +609,10 @@ public:
OpenCLNonbondedUtilities& getNonbondedUtilities() { OpenCLNonbondedUtilities& getNonbondedUtilities() {
return *nonbonded; return *nonbonded;
} }
/**
* Set the particle charges. These are packed into the fourth element of the posq array.
*/
void setCharges(const std::vector<double>& charges);
/** /**
* Get the thread used by this context for executing parallel computations. * Get the thread used by this context for executing parallel computations.
*/ */
...@@ -692,6 +696,12 @@ public: ...@@ -692,6 +696,12 @@ public:
* and order to be revalidated. * and order to be revalidated.
*/ */
void invalidateMolecules(); void invalidateMolecules();
/**
* Mark that the current molecule definitions from one particular force (and hence the atom order)
* may be invalid. This should be called whenever force field parameters change. It will cause the
* definitions and order to be revalidated.
*/
bool invalidateMolecules(OpenCLForceInfo* force);
private: private:
struct Molecule; struct Molecule;
struct MoleculeGroup; struct MoleculeGroup;
...@@ -739,6 +749,7 @@ private: ...@@ -739,6 +749,7 @@ private:
cl::Kernel clearSixBuffersKernel; cl::Kernel clearSixBuffersKernel;
cl::Kernel reduceReal4Kernel; cl::Kernel reduceReal4Kernel;
cl::Kernel reduceForcesKernel; cl::Kernel reduceForcesKernel;
cl::Kernel setChargesKernel;
std::vector<OpenCLForceInfo*> forces; std::vector<OpenCLForceInfo*> forces;
std::vector<Molecule> molecules; std::vector<Molecule> molecules;
std::vector<MoleculeGroup> moleculeGroups; std::vector<MoleculeGroup> moleculeGroups;
...@@ -754,6 +765,7 @@ private: ...@@ -754,6 +765,7 @@ private:
OpenCLArray* energyBuffer; OpenCLArray* energyBuffer;
OpenCLArray* energyParamDerivBuffer; OpenCLArray* energyParamDerivBuffer;
OpenCLArray* atomIndexDevice; OpenCLArray* atomIndexDevice;
OpenCLArray* chargeBuffer;
std::vector<std::string> energyParamDerivNames; std::vector<std::string> energyParamDerivNames;
std::map<std::string, double> energyParamDerivWorkspace; std::map<std::string, double> energyParamDerivWorkspace;
std::vector<int> atomIndex; std::vector<int> atomIndex;
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. * * Portions copyright (c) 2008-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -176,7 +176,6 @@ public: ...@@ -176,7 +176,6 @@ public:
*/ */
void loadCheckpoint(ContextImpl& context, std::istream& stream); void loadCheckpoint(ContextImpl& context, std::istream& stream);
private: private:
class GetPositionsTask;
OpenCLContext& cl; OpenCLContext& cl;
}; };
...@@ -270,9 +269,11 @@ public: ...@@ -270,9 +269,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force); void copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force);
private: private:
class ForceInfo;
int numBonds; int numBonds;
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
const System& system; const System& system;
OpenCLArray* params; OpenCLArray* params;
}; };
...@@ -310,9 +311,11 @@ public: ...@@ -310,9 +311,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomBondForce& force); void copyParametersToContext(ContextImpl& context, const CustomBondForce& force);
private: private:
class ForceInfo;
int numBonds; int numBonds;
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
const System& system; const System& system;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray* globals;
...@@ -353,9 +356,11 @@ public: ...@@ -353,9 +356,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force); void copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force);
private: private:
class ForceInfo;
int numAngles; int numAngles;
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
const System& system; const System& system;
OpenCLArray* params; OpenCLArray* params;
}; };
...@@ -393,9 +398,11 @@ public: ...@@ -393,9 +398,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomAngleForce& force); void copyParametersToContext(ContextImpl& context, const CustomAngleForce& force);
private: private:
class ForceInfo;
int numAngles; int numAngles;
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
const System& system; const System& system;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray* globals;
...@@ -436,9 +443,11 @@ public: ...@@ -436,9 +443,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force); void copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force);
private: private:
class ForceInfo;
int numTorsions; int numTorsions;
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
const System& system; const System& system;
OpenCLArray* params; OpenCLArray* params;
}; };
...@@ -476,9 +485,11 @@ public: ...@@ -476,9 +485,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const RBTorsionForce& force); void copyParametersToContext(ContextImpl& context, const RBTorsionForce& force);
private: private:
class ForceInfo;
int numTorsions; int numTorsions;
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
const System& system; const System& system;
OpenCLArray* params; OpenCLArray* params;
}; };
...@@ -516,9 +527,11 @@ public: ...@@ -516,9 +527,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force); void copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force);
private: private:
class ForceInfo;
int numTorsions; int numTorsions;
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
const System& system; const System& system;
std::vector<mm_int2> mapPositionsVec; std::vector<mm_int2> mapPositionsVec;
OpenCLArray* coefficients; OpenCLArray* coefficients;
...@@ -559,9 +572,11 @@ public: ...@@ -559,9 +572,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force); void copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force);
private: private:
class ForceInfo;
int numTorsions; int numTorsions;
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
const System& system; const System& system;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray* globals;
...@@ -635,12 +650,14 @@ private: ...@@ -635,12 +650,14 @@ private:
const char* getMaxValue() const {return "(int2) (INT_MAX, INT_MAX)";} const char* getMaxValue() const {return "(int2) (INT_MAX, INT_MAX)";}
const char* getSortKey() const {return "value.y";} const char* getSortKey() const {return "value.y";}
}; };
class ForceInfo;
class PmeIO; class PmeIO;
class PmePreComputation; class PmePreComputation;
class PmePostComputation; class PmePostComputation;
class SyncQueuePreComputation; class SyncQueuePreComputation;
class SyncQueuePostComputation; class SyncQueuePostComputation;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLArray* sigmaEpsilon; OpenCLArray* sigmaEpsilon;
OpenCLArray* exceptionParams; OpenCLArray* exceptionParams;
...@@ -726,8 +743,10 @@ public: ...@@ -726,8 +743,10 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force); void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
private: private:
class ForceInfo;
void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource, const std::vector<std::string>& tableTypes); void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource, const std::vector<std::string>& tableTypes);
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray* globals;
OpenCLArray* interactionGroupData; OpenCLArray* interactionGroupData;
...@@ -778,10 +797,12 @@ public: ...@@ -778,10 +797,12 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force); void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
private: private:
class ForceInfo;
double prefactor, surfaceAreaFactor, cutoff; double prefactor, surfaceAreaFactor, cutoff;
bool hasCreatedKernels; bool hasCreatedKernels;
int maxTiles; int maxTiles;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
OpenCLArray* params; OpenCLArray* params;
OpenCLArray* bornSum; OpenCLArray* bornSum;
OpenCLArray* longBornSum; OpenCLArray* longBornSum;
...@@ -829,10 +850,12 @@ public: ...@@ -829,10 +850,12 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force); void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private: private:
class ForceInfo;
double cutoff; double cutoff;
bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs; bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs;
int maxTiles, numComputedValues; int maxTiles, numComputedValues;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLParameterSet* computedValues; OpenCLParameterSet* computedValues;
OpenCLParameterSet* energyDerivs; OpenCLParameterSet* energyDerivs;
...@@ -886,9 +909,11 @@ public: ...@@ -886,9 +909,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomExternalForce& force); void copyParametersToContext(ContextImpl& context, const CustomExternalForce& force);
private: private:
class ForceInfo;
int numParticles; int numParticles;
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
const System& system; const System& system;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray* globals;
...@@ -930,9 +955,11 @@ public: ...@@ -930,9 +955,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force); void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force);
private: private:
class ForceInfo;
int numDonors, numAcceptors; int numDonors, numAcceptors;
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
OpenCLParameterSet* donorParams; OpenCLParameterSet* donorParams;
OpenCLParameterSet* acceptorParams; OpenCLParameterSet* acceptorParams;
OpenCLArray* globals; OpenCLArray* globals;
...@@ -983,9 +1010,11 @@ public: ...@@ -983,9 +1010,11 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomCentroidBondForce& force); void copyParametersToContext(ContextImpl& context, const CustomCentroidBondForce& force);
private: private:
class ForceInfo;
int numGroups, numBonds; int numGroups, numBonds;
bool needEnergyParamDerivs; bool needEnergyParamDerivs;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray* globals;
OpenCLArray* groupParticles; OpenCLArray* groupParticles;
...@@ -1035,8 +1064,10 @@ public: ...@@ -1035,8 +1064,10 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomCompoundBondForce& force); void copyParametersToContext(ContextImpl& context, const CustomCompoundBondForce& force);
private: private:
class ForceInfo;
int numBonds; int numBonds;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray* globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
...@@ -1081,7 +1112,9 @@ public: ...@@ -1081,7 +1112,9 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomManyParticleForce& force); void copyParametersToContext(ContextImpl& context, const CustomManyParticleForce& force);
private: private:
class ForceInfo;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
bool hasInitializedKernel; bool hasInitializedKernel;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize; int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize;
...@@ -1141,9 +1174,11 @@ public: ...@@ -1141,9 +1174,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const GayBerneForce& force); void copyParametersToContext(ContextImpl& context, const GayBerneForce& force);
private: private:
class ForceInfo;
class ReorderListener; class ReorderListener;
void sortAtoms(); void sortAtoms();
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info;
bool hasInitializedKernels; bool hasInitializedKernels;
int numRealParticles, maxNeighborBlocks; int numRealParticles, maxNeighborBlocks;
GayBerneForce::NonbondedMethod nonbondedMethod; GayBerneForce::NonbondedMethod nonbondedMethod;
......
...@@ -69,8 +69,8 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i ...@@ -69,8 +69,8 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData) : OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), posq(NULL), system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), posq(NULL),
posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL), posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL),
expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { chargeBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (precision == "single") { if (precision == "single") {
useDoublePrecision = false; useDoublePrecision = false;
useMixedPrecision = false; useMixedPrecision = false;
...@@ -309,6 +309,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -309,6 +309,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
reduceReal4Kernel = cl::Kernel(utilities, "reduceReal4Buffer"); reduceReal4Kernel = cl::Kernel(utilities, "reduceReal4Buffer");
if (supports64BitGlobalAtomics) if (supports64BitGlobalAtomics)
reduceForcesKernel = cl::Kernel(utilities, "reduceForces"); reduceForcesKernel = cl::Kernel(utilities, "reduceForces");
setChargesKernel = cl::Kernel(utilities, "setCharges");
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use. // Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
...@@ -439,6 +440,8 @@ OpenCLContext::~OpenCLContext() { ...@@ -439,6 +440,8 @@ OpenCLContext::~OpenCLContext() {
delete energyParamDerivBuffer; delete energyParamDerivBuffer;
if (atomIndexDevice != NULL) if (atomIndexDevice != NULL)
delete atomIndexDevice; delete atomIndexDevice;
if (chargeBuffer != NULL)
delete chargeBuffer;
if (integration != NULL) if (integration != NULL)
delete integration; delete integration;
if (expression != NULL) if (expression != NULL)
...@@ -747,6 +750,28 @@ void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) { ...@@ -747,6 +750,28 @@ void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
executeKernel(reduceReal4Kernel, bufferSize, 128); executeKernel(reduceReal4Kernel, bufferSize, 128);
} }
void OpenCLContext::setCharges(const vector<double>& charges) {
if (chargeBuffer == NULL)
chargeBuffer = new OpenCLArray(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer");
if (getUseDoublePrecision()) {
double* c = (double*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++)
c[i] = charges[i];
chargeBuffer->upload(c);
}
else {
float* c = (float*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++)
c[i] = (float) charges[i];
chargeBuffer->upload(c);
}
setChargesKernel.setArg<cl::Buffer>(0, chargeBuffer->getDeviceBuffer());
setChargesKernel.setArg<cl::Buffer>(1, posq->getDeviceBuffer());
setChargesKernel.setArg<cl::Buffer>(2, atomIndexDevice->getDeviceBuffer());
setChargesKernel.setArg<cl_int>(3, numAtoms);
executeKernel(setChargesKernel, numAtoms);
}
/** /**
* This class ensures that atom reordering doesn't break virtual sites. * This class ensures that atom reordering doesn't break virtual sites.
*/ */
...@@ -945,9 +970,19 @@ void OpenCLContext::findMoleculeGroups() { ...@@ -945,9 +970,19 @@ void OpenCLContext::findMoleculeGroups() {
} }
void OpenCLContext::invalidateMolecules() { void OpenCLContext::invalidateMolecules() {
for (int i = 0; i < forces.size(); i++)
if (invalidateMolecules(forces[i]))
return;
}
bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) {
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff()) if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
return; return false;
bool valid = true; bool valid = true;
int forceIndex = -1;
for (int i = 0; i < forces.size(); i++)
if (forces[i] == force)
forceIndex = i;
for (int group = 0; valid && group < (int) moleculeGroups.size(); group++) { for (int group = 0; valid && group < (int) moleculeGroups.size(); group++) {
MoleculeGroup& mol = moleculeGroups[group]; MoleculeGroup& mol = moleculeGroups[group];
vector<int>& instances = mol.instances; vector<int>& instances = mol.instances;
...@@ -962,22 +997,21 @@ void OpenCLContext::invalidateMolecules() { ...@@ -962,22 +997,21 @@ void OpenCLContext::invalidateMolecules() {
Molecule& m2 = molecules[instances[j]]; Molecule& m2 = molecules[instances[j]];
int offset2 = offsets[j]; int offset2 = offsets[j];
for (int i = 0; i < (int) atoms.size() && valid; i++) { for (int i = 0; i < (int) atoms.size() && valid; i++) {
for (int k = 0; k < (int) forces.size(); k++) if (!force->areParticlesIdentical(atoms[i]+offset1, atoms[i]+offset2))
if (!forces[k]->areParticlesIdentical(atoms[i]+offset1, atoms[i]+offset2)) valid = false;
valid = false;
} }
// See if the force groups are identical. // See if the force groups are identical.
for (int i = 0; i < (int) forces.size() && valid; i++) { if (valid && forceIndex > -1) {
for (int k = 0; k < (int) m1.groups[i].size() && valid; k++) for (int k = 0; k < (int) m1.groups[forceIndex].size() && valid; k++)
if (!forces[i]->areGroupsIdentical(m1.groups[i][k], m2.groups[i][k])) if (!force->areGroupsIdentical(m1.groups[forceIndex][k], m2.groups[forceIndex][k]))
valid = false; valid = false;
} }
} }
} }
if (valid) if (valid)
return; return false;
// The list of which molecules are identical is no longer valid. We need to restore the // The list of which molecules are identical is no longer valid. We need to restore the
// atoms to their original order, rebuild the list of identical molecules, and sort them // atoms to their original order, rebuild the list of identical molecules, and sort them
...@@ -1045,6 +1079,7 @@ void OpenCLContext::invalidateMolecules() { ...@@ -1045,6 +1079,7 @@ void OpenCLContext::invalidateMolecules() {
for (int i = 0; i < (int) reorderListeners.size(); i++) for (int i = 0; i < (int) reorderListeners.size(); i++)
reorderListeners[i]->execute(); reorderListeners[i]->execute();
reorderAtoms(); reorderAtoms();
return true;
} }
void OpenCLContext::reorderAtoms() { void OpenCLContext::reorderAtoms() {
......
This diff is collapsed.
...@@ -107,3 +107,11 @@ __kernel void determineNativeAccuracy(__global float8* restrict values, int numV ...@@ -107,3 +107,11 @@ __kernel void determineNativeAccuracy(__global float8* restrict values, int numV
values[i] = (float8) (v, native_sqrt(v), native_rsqrt(v), native_recip(v), native_exp(v), native_log(v), 0.0f, 0.0f); values[i] = (float8) (v, native_sqrt(v), native_rsqrt(v), native_recip(v), native_exp(v), native_log(v), 0.0f, 0.0f);
} }
} }
/**
* Record the atomic charges into the posq array.
*/
__kernel void setCharges(__global real* restrict charges, __global real4* restrict posq, __global int* restrict atomOrder, int numAtoms) {
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0))
posq[i].w = charges[atomOrder[i]];
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment