Unverified Commit cca3e11a authored by Andy Simmonett's avatar Andy Simmonett
Browse files

Correct CUDA NHC implementation

parent 50efef13
...@@ -121,7 +121,7 @@ int NoseHooverIntegrator::addSubsystemThermostat(System& system, const std::vect ...@@ -121,7 +121,7 @@ int NoseHooverIntegrator::addSubsystemThermostat(System& system, const std::vect
int numForces = system.getNumForces(); int numForces = system.getNumForces();
if (thermostatedPairs.size() == 0){ // remove 3 degrees of freedom from thermostats that act on absolute motions if (thermostatedPairs.size() == 0){ // remove 3 degrees of freedom from thermostats that act on absolute motions
for (int forceNum = 0; forceNum < numForces; ++forceNum) { for (int forceNum = 0; forceNum < numForces; ++forceNum) {
// if (dynamic_cast<CMMotionRemover*>(&system.getForce(forceNum))) nDOF -= 3; if (dynamic_cast<CMMotionRemover*>(&system.getForce(forceNum))) nDOF -= 3;
} }
} }
...@@ -143,11 +143,7 @@ int NoseHooverIntegrator::addSubsystemThermostat(System& system, const std::vect ...@@ -143,11 +143,7 @@ int NoseHooverIntegrator::addSubsystemThermostat(System& system, const std::vect
} }
// create and add new chain // create and add new chain
int chainID = 0; int chainID = noseHooverChains.size();
for(const auto &c : noseHooverChains) {
// We need to account for the fact that a NHC with pairs thermostated has both a relative and an absolute chain
chainID += c.getThermostatedPairs().size() ? 2 : 1;
}
auto nhc = NoseHooverChain(temperature, relativeTemperature, collisionFrequency, relativeCollisionFrequency, auto nhc = NoseHooverChain(temperature, relativeTemperature, collisionFrequency, relativeCollisionFrequency,
nDOF, chainLength, numMTS, numYoshidaSuzuki, chainID, thermostatedParticles, thermostatedPairs); nDOF, chainLength, numMTS, numYoshidaSuzuki, chainID, thermostatedParticles, thermostatedPairs);
noseHooverChains.push_back(nhc); noseHooverChains.push_back(nhc);
...@@ -232,10 +228,7 @@ double NoseHooverIntegrator::computeKineticEnergy() { ...@@ -232,10 +228,7 @@ double NoseHooverIntegrator::computeKineticEnergy() {
double kE = 0.0; double kE = 0.0;
if(noseHooverChains.size()) { if(noseHooverChains.size()) {
for (const auto &nhc: noseHooverChains){ for (const auto &nhc: noseHooverChains){
if (nhc.getThermostatedPairs().size() == 0) { kE += nhcKernel.getAs<NoseHooverChainKernel>().computeMaskedKineticEnergy(*context, nhc, true).first;
auto KEs = nhcKernel.getAs<NoseHooverChainKernel>().computeMaskedKineticEnergy(*context, nhc, true);
kE += std::get<0>(KEs);
}
} }
} else { } else {
kE = vvKernel.getAs<IntegrateVelocityVerletStepKernel>().computeKineticEnergy(*context, *this); kE = vvKernel.getAs<IntegrateVelocityVerletStepKernel>().computeKineticEnergy(*context, *this);
......
...@@ -1731,11 +1731,11 @@ public: ...@@ -1731,11 +1731,11 @@ public:
* *
* @param context the context in which to execute this kernel * @param context the context in which to execute this kernel
* @param noseHooverChain the object describing the chain to be propagated. * @param noseHooverChain the object describing the chain to be propagated.
* @param kineticEnergy the kineticEnergy of the particles being thermostated by this chain. * @param kineticEnergies the {absolute, relative} kineticEnergy of the particles being thermostated by this chain.
* @param timeStep the time step used by the integrator. * @param timeStep the time step used by the integrator.
* @return the velocity scale factor to apply to the particles associated with this heat bath. * @return the {absolute, relative} velocity scale factor to apply to the particles associated with this heat bath.
*/ */
virtual double propagateChain(ContextImpl& context, const NoseHooverChain &nhc, double kineticEnergy, double timeStep); virtual std::pair<double, double> propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep);
/** /**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy. * Execute the kernal that computes the total (kinetic + potential) heat bath energy.
* *
...@@ -1753,26 +1753,29 @@ public: ...@@ -1753,26 +1753,29 @@ public:
* @param downloadValue whether the computed value should be downloaded and returned. * @param downloadValue whether the computed value should be downloaded and returned.
* *
*/ */
virtual double computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain, bool downloadValue); virtual std::pair<double,double> computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain, bool downloadValue);
/** /**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain * Execute the kernel that scales the velocities of particles associated with a nose hoover chain
* *
* @param context the context in which to execute this kernel * @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined. * @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactor the multiplicative factor by which velocities are scaled. * @param scaleFactors the {absolute, relative} multiplicative factor by which velocities are scaled.
*/ */
virtual void scaleVelocities(ContextImpl& context, const NoseHooverChain &noseHooverChain, double scaleFactor); virtual void scaleVelocities(ContextImpl& context, const NoseHooverChain &noseHooverChain, std::pair<double, double> scaleFactors);
private: private:
int sumWorkGroupSize;
CudaContext& cu; CudaContext& cu;
CudaArray scaleFactorBuffer, kineticEnergyBuffer, chainMasses, chainForces, heatBathEnergy; CudaArray scaleFactorBuffer, kineticEnergyBuffer, chainMasses, chainForces, heatBathEnergy;
std::map<int, CudaArray> masks; std::map<int, CudaArray> atomlists, pairlists;
std::map<int, CUfunction> propagateKernels; std::map<int, CUfunction> propagateKernels;
CUfunction reduceEnergyKernel; CUfunction reduceEnergyKernel;
CUfunction computeHeatBathEnergyKernel; CUfunction computeHeatBathEnergyKernel;
CUfunction computeMaskedKineticEnergyKernel; CUfunction computeAtomsKineticEnergyKernel;
CUfunction scaleVelocitiesKernel; CUfunction computePairsKineticEnergyKernel;
CUfunction scaleAtomsVelocitiesKernel;
CUfunction scalePairsVelocitiesKernel;
}; };
/** /**
......
...@@ -808,7 +808,6 @@ double CudaIntegrationUtilities::computeKineticEnergy(double timeShift) { ...@@ -808,7 +808,6 @@ double CudaIntegrationUtilities::computeKineticEnergy(double timeShift) {
energy += (v.x*v.x+v.y*v.y+v.z*v.z)/v.w; energy += (v.x*v.x+v.y*v.y+v.z*v.z)/v.w;
} }
} }
// Restore the velocities. // Restore the velocities.
if (timeShift != 0) if (timeShift != 0)
......
...@@ -8358,6 +8358,8 @@ void CudaNoseHooverChainKernel::initialize() { ...@@ -8358,6 +8358,8 @@ void CudaNoseHooverChainKernel::initialize() {
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision(); bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
map<string, string> defines; map<string, string> defines;
sumWorkGroupSize = 512;
defines["WORK_GROUP_SIZE"] = cu.intToString(sumWorkGroupSize);
defines["MIXEDEXP"] = useDouble ? "exp" : "expf"; defines["MIXEDEXP"] = useDouble ? "exp" : "expf";
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {1}) {"; defines["BEGIN_YS_LOOP"] = "for(const real & ys : {1}) {";
defines["END_YS_LOOP"] = "}"; defines["END_YS_LOOP"] = "}";
...@@ -8369,56 +8371,50 @@ void CudaNoseHooverChainKernel::initialize() { ...@@ -8369,56 +8371,50 @@ void CudaNoseHooverChainKernel::initialize() {
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}){"; defines["BEGIN_YS_LOOP"] = "for(const real & ys : {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}){";
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "-std=c++11"); module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "-std=c++11");
propagateKernels[5] = cu.getKernel(module, "propagateNoseHooverChain"); propagateKernels[5] = cu.getKernel(module, "propagateNoseHooverChain");
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "-std=c++11");
reduceEnergyKernel = cu.getKernel(module, "reduceEnergyPair");
computeHeatBathEnergyKernel = cu.getKernel(module, "computeHeatBathEnergy"); computeHeatBathEnergyKernel = cu.getKernel(module, "computeHeatBathEnergy");
computeMaskedKineticEnergyKernel = cu.getKernel(module, "computeMaskedKineticEnergy"); computeAtomsKineticEnergyKernel = cu.getKernel(module, "computeAtomsKineticEnergy");
scaleVelocitiesKernel = cu.getKernel(module, "scaleVelocities"); computePairsKineticEnergyKernel = cu.getKernel(module, "computePairsKineticEnergy");
CUmodule utilities = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::utilities); scaleAtomsVelocitiesKernel = cu.getKernel(module, "scaleAtomsVelocities");
reduceEnergyKernel = cu.getKernel(utilities, "reduceEnergy"); scalePairsVelocitiesKernel = cu.getKernel(module, "scalePairsVelocities");
} }
double CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, double kineticEnergy, double timeStep) { std::pair<double, double> CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep) {
cu.setAsCurrent(); cu.setAsCurrent();
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision(); bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
int chainID = nhc.getDefaultChainID(); int chainID = nhc.getDefaultChainID();
int nAtoms = nhc.getThermostatedAtoms().size();
int nPairs = nhc.getThermostatedPairs().size();
int chainLength = nhc.getDefaultChainLength(); int chainLength = nhc.getDefaultChainLength();
int numYS = nhc.getDefaultNumYoshidaSuzukiTimeSteps(); int numYS = nhc.getDefaultNumYoshidaSuzukiTimeSteps();
int numMTS = nhc.getDefaultNumMultiTimeSteps(); int numMTS = nhc.getDefaultNumMultiTimeSteps();
int numDOFs = nhc.getDefaultNumDegreesOfFreedom(); int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
double temperature = nhc.getDefaultTemperature(); double temperature = nhc.getDefaultTemperature();
double frequency = nhc.getDefaultCollisionFrequency(); double frequency = nhc.getDefaultCollisionFrequency();
double relativeTemperature = nhc.getDefaultRelativeTemperature();
double relativeFrequency = nhc.getDefaultRelativeCollisionFrequency();
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState(); if (numYS != 1 && numYS != 3 && numYS != 5) {
if (!chainState.count(chainID)) { throw OpenMMException("Number of Yoshida Suzuki time steps has to be 1, 3, or 5.");
chainState[chainID] = CudaArray();
}
if (chainState.at(chainID).getSize() != chainLength) {
// We need to upload the CUDA array
if(useDouble){
chainState.at(chainID).initialize<double2>(cu, chainLength, "chainState" + std::to_string(chainID));
std::vector<double2> zeros(chainLength, make_double2(0, 0));
chainState.at(chainID).upload(zeros.data());
} else {
chainState.at(chainID).initialize<float2>(cu, chainLength, "chainState" + std::to_string(chainID));
std::vector<float2> zeros(chainLength, make_float2(0, 0));
chainState.at(chainID).upload(zeros.data());
}
} }
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState();
if (!scaleFactorBuffer.isInitialized() ||scaleFactorBuffer.getSize() == 0) { if (!scaleFactorBuffer.isInitialized() ||scaleFactorBuffer.getSize() == 0) {
if(useDouble){ if(useDouble){
std::vector<double> one(1); std::vector<double2> zeros{{0,0}};
scaleFactorBuffer.initialize<double>(cu, 1, "scaleFactorBuffer"); scaleFactorBuffer.initialize<double2>(cu, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(one); scaleFactorBuffer.upload(zeros);
} else { } else {
std::vector<float> one(1); std::vector<float2> zeros{{0,0}};
scaleFactorBuffer.initialize<float>(cu, 1, "scaleFactorBuffer"); scaleFactorBuffer.initialize<float2>(cu, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(one); scaleFactorBuffer.upload(zeros);
} }
} }
std::vector<double> zeros(chainLength,0); std::vector<double> zeros(chainLength,0);
if (!chainForces.isInitialized() || !chainMasses.isInitialized() ){ if (!chainForces.isInitialized() || !chainMasses.isInitialized() ){
if(useDouble){ if(useDouble){
...@@ -8435,28 +8431,73 @@ double CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const Nos ...@@ -8435,28 +8431,73 @@ double CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const Nos
} }
if (chainForces.getSize() < chainLength) chainMasses.resize(chainLength); if (chainForces.getSize() < chainLength) chainMasses.resize(chainLength);
if (chainMasses.getSize() < chainLength) chainMasses.resize(chainLength); if (chainMasses.getSize() < chainLength) chainMasses.resize(chainLength);
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float timeStepFloat = (float) timeStep; float timeStepFloat = (float) timeStep;
float frequencyFloat = (float) frequency;
// N.B. We ignore the incoming kineticEnergy and grab it from the device buffer instead // N.B. We ignore the incoming kineticEnergy and grab it from the device buffer instead
void *args[] = { if (nAtoms) {
&chainState[chainID].getDevicePointer(), if (!chainState.count(2*chainID)) chainState[2*chainID] = CudaArray();
if (chainState.at(2*chainID).getSize() != chainLength) {
// We need to upload the CUDA array
if(useDouble){
chainState.at(2*chainID).initialize<double2>(cu, chainLength, "chainState" + std::to_string(2*chainID));
std::vector<double2> zeros(chainLength, make_double2(0, 0));
chainState.at(2*chainID).upload(zeros.data());
} else {
chainState.at(2*chainID).initialize<float2>(cu, chainLength, "chainState" + std::to_string(2*chainID));
std::vector<float2> zeros(chainLength, make_float2(0, 0));
chainState.at(2*chainID).upload(zeros.data());
}
}
int chainType = 0;
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
void *args[] = {
&chainState[2*chainID].getDevicePointer(),
&kineticEnergyBuffer.getDevicePointer(), &kineticEnergyBuffer.getDevicePointer(),
&scaleFactorBuffer.getDevicePointer(), &scaleFactorBuffer.getDevicePointer(),
&chainMasses.getDevicePointer(), &chainMasses.getDevicePointer(),
&chainForces.getDevicePointer(), &chainForces.getDevicePointer(),
&chainLength, &numMTS, &numDOFs, &chainType, &chainLength, &numMTS, &numDOFs,
&timeStepFloat, &timeStepFloat,
useDouble ? (void*) &kT : (void*) &kTfloat, useDouble ? (void*) &kT : (void*) &kTfloat,
&frequencyFloat &frequencyFloat
}; };
if (numYS == 1 || numYS == 3 || numYS == 5) {
cu.executeKernel(propagateKernels[numYS], args, 1, 1); cu.executeKernel(propagateKernels[numYS], args, 1, 1);
} else {
throw OpenMMException("Number of Yoshida Suzuki time steps has to be 1, 3, or 5.");
} }
return 0; if (nPairs) {
if (!chainState.count(2*chainID+1)) chainState[2*chainID+1] = CudaArray();
if (chainState.at(2*chainID+1).getSize() != chainLength) {
// We need to upload the CUDA array
if(useDouble){
chainState.at(2*chainID+1).initialize<double2>(cu, chainLength, "chainState" + std::to_string(2*chainID+1));
std::vector<double2> zeros(chainLength, make_double2(0, 0));
chainState.at(2*chainID+1).upload(zeros.data());
} else {
chainState.at(2*chainID+1).initialize<float2>(cu, chainLength, "chainState" + std::to_string(2*chainID+1));
std::vector<float2> zeros(chainLength, make_float2(0, 0));
chainState.at(2*chainID+1).upload(zeros.data());
}
}
int chainType = 1;
double kT = BOLTZ * relativeTemperature;
int ndf = 3*nPairs;
float kTfloat = (float) kT;
float frequencyFloat = (float) relativeFrequency;
void *args[] = {
&chainState[2*chainID+1].getDevicePointer(),
&kineticEnergyBuffer.getDevicePointer(),
&scaleFactorBuffer.getDevicePointer(),
&chainMasses.getDevicePointer(),
&chainForces.getDevicePointer(),
&chainType, &chainLength, &numMTS, &ndf,
&timeStepFloat,
useDouble ? (void*) &kT : (void*) &kTfloat,
&frequencyFloat
};
cu.executeKernel(propagateKernels[numYS], args, 1, 1);
}
return {0, 0};
} }
double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc) { double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc) {
...@@ -8466,14 +8507,17 @@ double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, co ...@@ -8466,14 +8507,17 @@ double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, co
int chainID = nhc.getDefaultChainID(); int chainID = nhc.getDefaultChainID();
int chainLength = nhc.getDefaultChainLength(); int chainLength = nhc.getDefaultChainLength();
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
double temperature = nhc.getDefaultTemperature();
double frequency = nhc.getDefaultCollisionFrequency();
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState(); auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState();
if (chainID >= chainState.size() || !chainState[chainID].isInitialized() || chainState[chainID].getSize() != chainLength) {
return 0.0; bool absChainIsValid = chainState.count(2*chainID) != 0 &&
} chainState[2*chainID].isInitialized() &&
chainState[2*chainID].getSize() == chainLength;
bool relChainIsValid = chainState.count(2*chainID+1) != 0 &&
chainState[2*chainID+1].isInitialized() &&
chainState[2*chainID+1].getSize() == chainLength;
if (!absChainIsValid && !relChainIsValid) return 0.0;
if (!heatBathEnergy.isInitialized() || heatBathEnergy.getSize() == 0) { if (!heatBathEnergy.isInitialized() || heatBathEnergy.getSize() == 0) {
if(useDouble){ if(useDouble){
...@@ -8487,15 +8531,35 @@ double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, co ...@@ -8487,15 +8531,35 @@ double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, co
} }
} }
double kT = BOLTZ * temperature; cu.clearBuffer(heatBathEnergy);
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency; if (absChainIsValid) {
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
double temperature = nhc.getDefaultTemperature();
double frequency = nhc.getDefaultCollisionFrequency();
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
void * args[] = {&heatBathEnergy.getDevicePointer(), &chainLength, &numDOFs, void * args[] = {&heatBathEnergy.getDevicePointer(), &chainLength, &numDOFs,
useDouble ? (void*) &kT : (void*) & kTfloat, useDouble ? (void*) &kT : (void*) & kTfloat,
&frequencyFloat, &chainState[chainID].getDevicePointer()}; &frequencyFloat, &chainState[2*chainID].getDevicePointer()};
cu.executeKernel(computeHeatBathEnergyKernel, args, 1, 1);
}
if (relChainIsValid) {
int numDOFs = 3 * nhc.getThermostatedPairs().size();
double temperature = nhc.getDefaultRelativeTemperature();
double frequency = nhc.getDefaultRelativeCollisionFrequency();
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
void * args[] = {&heatBathEnergy.getDevicePointer(), &chainLength, &numDOFs,
useDouble ? (void*) &kT : (void*) & kTfloat,
&frequencyFloat, &chainState[2*chainID+1].getDevicePointer()};
cu.executeKernel(computeHeatBathEnergyKernel, args, 1, 1);
}
cu.executeKernel(computeHeatBathEnergyKernel, args, 1, 1);
void * pinnedBuffer = cu.getPinnedBuffer(); void * pinnedBuffer = cu.getPinnedBuffer();
heatBathEnergy.download(pinnedBuffer); heatBathEnergy.download(pinnedBuffer);
...@@ -8506,78 +8570,94 @@ double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, co ...@@ -8506,78 +8570,94 @@ double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, co
} }
} }
double CudaNoseHooverChainKernel::computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &nhc, bool downloadValue) { std::pair<double, double> CudaNoseHooverChainKernel::computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &nhc, bool downloadValue) {
cu.setAsCurrent(); cu.setAsCurrent();
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision(); bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
int chainID = nhc.getDefaultChainID(); int chainID = nhc.getDefaultChainID();
const auto & parents = nhc.getThermostatedPairs(); const auto & nhcAtoms = nhc.getThermostatedAtoms();
const auto & thermostatedAtoms = nhc.getThermostatedAtoms(); const auto & nhcPairs = nhc.getThermostatedPairs();
auto nAtoms = cu.getPaddedNumAtoms(); auto nAtoms = nhcAtoms.size();
if (!masks.count(chainID)) { auto nPairs = nhcPairs.size();
// We need to upload the CUDA array if (nAtoms) {
std::vector<int> maskVec(nAtoms); if (!atomlists.count(chainID)) {
for (int i = 0; i < nAtoms; ++i) maskVec[i] = i; // We need to upload the CUDA array
atomlists[chainID] = CudaArray();
if (parents.size() ) { atomlists[chainID].initialize<int>(cu, nAtoms, "atomlist" + std::to_string(chainID));
assert(parents.size() == thermostatedAtoms.size()); atomlists[chainID].upload(nhcAtoms);
for ( size_t i = 0; i < parents.size(); ++i) { }
int atom = thermostatedAtoms[i]; if (atomlists[chainID].getSize() != nAtoms) {
maskVec[atom] = parents[i]; throw OpenMMException("Number of atoms changed. Cannot be handled by the same Nose-Hoover thermostat.");
} }
} else { }
for ( const auto & atom : thermostatedAtoms) { if (nPairs) {
maskVec[atom] = -1L; if (!pairlists.count(chainID)) {
} // We need to upload the CUDA array
pairlists[chainID] = CudaArray();
pairlists[chainID].initialize<int2>(cu, nPairs, "pairlist" + std::to_string(chainID));
std::vector<int2> int2vec;
for(const auto &p : nhcPairs) int2vec.push_back(make_int2(p.first, p.second));
pairlists[chainID].upload(int2vec);
}
if (pairlists[chainID].getSize() != nPairs) {
throw OpenMMException("Number of thermostated pairs changed. Cannot be handled by the same Nose-Hoover thermostat.");
} }
masks[chainID] = CudaArray();
masks[chainID].initialize<int>(cu, nAtoms, "mask" + std::to_string(chainID));
masks[chainID].upload(maskVec);
}
if (masks[chainID].getSize() != nAtoms) {
throw OpenMMException("Number of atoms changed. Cannot be handled by the same Nose-Hoover thermostat.");
} }
if (!kineticEnergyBuffer.isInitialized() || kineticEnergyBuffer.getSize() == 0) { if (!kineticEnergyBuffer.isInitialized() || kineticEnergyBuffer.getSize() == 0) {
if(useDouble){ if(useDouble){
std::vector<double> one(1); std::vector<double2> zeros{{0,0}};
kineticEnergyBuffer.initialize<double>(cu, 1, "kineticEnergyBuffer"); kineticEnergyBuffer.initialize<double2>(cu, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(one); kineticEnergyBuffer.upload(zeros);
} else { } else {
std::vector<float> one(1); std::vector<float2> zeros{{0,0}};
kineticEnergyBuffer.initialize<float>(cu, 1, "kineticEnergyBuffer"); kineticEnergyBuffer.initialize<float2>(cu, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(one); kineticEnergyBuffer.upload(zeros);
} }
} }
cu.clearBuffer(cu.getEnergyBuffer()); cu.clearBuffer(cu.getEnergyBuffer());
void *args[] = {&cu.getEnergyBuffer().getDevicePointer(),&nAtoms, &cu.getVelm().getDevicePointer(), &masks[chainID].getDevicePointer()}; if (nAtoms) {
cu.executeKernel(computeMaskedKineticEnergyKernel, args, nAtoms); void *args[] = {&cu.getEnergyBuffer().getDevicePointer(),&nAtoms, &cu.getVelm().getDevicePointer(), &atomlists[chainID].getDevicePointer()};
cu.executeKernel(computeAtomsKineticEnergyKernel, args, nAtoms);
}
if (nPairs) {
void *args[] = {&cu.getEnergyBuffer().getDevicePointer(),&nPairs, &cu.getVelm().getDevicePointer(), &pairlists[chainID].getDevicePointer()};
cu.executeKernel(computePairsKineticEnergyKernel, args, nPairs);
}
//taken from CudaContext::reduceEnergy(); the final kinetic energy will live in the kineticEnergy buffer //taken from CudaContext::reduceEnergy(); the final kinetic energy will live in the kineticEnergy buffer
int bufferSize = cu.getEnergyBuffer().getSize(); int bufferSize = cu.getEnergyBuffer().getSize() / 2; // Halve it to account for the fact that we're storing mixed2 instead of mixed in there
int workGroupSize = 512; void* args2[] = {&cu.getEnergyBuffer().getDevicePointer(), &kineticEnergyBuffer.getDevicePointer(), &bufferSize, &sumWorkGroupSize};
void* args2[] = {&cu.getEnergyBuffer().getDevicePointer(), &kineticEnergyBuffer.getDevicePointer(), &bufferSize, &workGroupSize}; cu.executeKernel(reduceEnergyKernel, args2, sumWorkGroupSize, sumWorkGroupSize, 2*sumWorkGroupSize*cu.getEnergyBuffer().getElementSize());
cu.executeKernel(reduceEnergyKernel, args2, workGroupSize, workGroupSize, workGroupSize*cu.getEnergyBuffer().getElementSize());
double value = 0; std::pair<double, double> KEs = {0, 0};
if (downloadValue) { if (downloadValue) {
void * pinnedBuffer = cu.getPinnedBuffer(); void * pinnedBuffer = cu.getPinnedBuffer();
kineticEnergyBuffer.download(pinnedBuffer); kineticEnergyBuffer.download(pinnedBuffer);
value = useDouble ? *((double*) pinnedBuffer) : *((float*) pinnedBuffer); KEs.first = useDouble ? *((double*) pinnedBuffer) : *((float*) pinnedBuffer);
KEs.second = useDouble ? *((double*) pinnedBuffer + 1) : *((float*) pinnedBuffer + 1);
} }
return value; return KEs;
} }
void CudaNoseHooverChainKernel::scaleVelocities(ContextImpl& context, const NoseHooverChain &nhc, double scaleFactor) { void CudaNoseHooverChainKernel::scaleVelocities(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> scaleFactor) {
// For now we assume that the mask info is valid, because computeMaskedKineticEnergy must have been // For now we assume that the atoms and pairs info is valid, because compute{Atoms|Pairs}KineticEnergy must have been
// called before this kernel. If that ever ceases to be true, some sanity checks are needed here. // called before this kernel. If that ever ceases to be true, some sanity checks are needed here.
cu.setAsCurrent(); cu.setAsCurrent();
auto nAtoms = cu.getPaddedNumAtoms();
int chainID = nhc.getDefaultChainID(); int chainID = nhc.getDefaultChainID();
void *args[] = {&scaleFactorBuffer.getDevicePointer(), auto nAtoms = nhc.getThermostatedAtoms().size();
&nAtoms, &cu.getVelm().getDevicePointer(), &masks[chainID].getDevicePointer()}; auto nPairs = nhc.getThermostatedPairs().size();
cu.executeKernel(scaleVelocitiesKernel, args, nAtoms); if(nAtoms) {
void *args[] = {&scaleFactorBuffer.getDevicePointer(),
&nAtoms, &cu.getVelm().getDevicePointer(), &atomlists[chainID].getDevicePointer()};
cu.executeKernel(scaleAtomsVelocitiesKernel, args, nAtoms);
}
if(nPairs) {
void *args[] = {&scaleFactorBuffer.getDevicePointer(),
&nPairs, &cu.getVelm().getDevicePointer(), &pairlists[chainID].getDevicePointer()};
cu.executeKernel(scalePairsVelocitiesKernel, args, nPairs);
}
} }
void CudaApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) { void CudaApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
......
#include <initializer_list> #include <initializer_list>
extern "C" __global__ void propagateNoseHooverChain(mixed2* __restrict__ chainData, const mixed * __restrict__ energySum, mixed* __restrict__ scaleFactor, extern "C" __global__ void propagateNoseHooverChain(mixed2* __restrict__ chainData, const mixed2 * __restrict__ energySum, mixed2* __restrict__ scaleFactor,
mixed* __restrict__ chainMasses, mixed* __restrict__ chainForces, mixed* __restrict__ chainMasses, mixed* __restrict__ chainForces,
int chainLength, int numMTS, int numDOFs, float timeStep, int chainType, int chainLength, int numMTS, int numDOFs, float timeStep,
mixed kT, float frequency){ mixed kT, float frequency){
mixed &scale = scaleFactor[0]; const mixed & kineticEnergy = chainType ? energySum[0].y : energySum[0].x;
mixed &scale = chainType ? scaleFactor[0].y : scaleFactor[0].x;
scale = (mixed) 1; scale = (mixed) 1;
const mixed & kineticEnergy = energySum[0];
if(kineticEnergy < 1e-8) return; if(kineticEnergy < 1e-8) return;
for (int bead = 0; bead < chainLength; ++bead) chainMasses[bead] = kT / (frequency * frequency); for (int bead = 0; bead < chainLength; ++bead) chainMasses[bead] = kT / (frequency * frequency);
chainMasses[0] *= numDOFs; chainMasses[0] *= numDOFs;
...@@ -49,11 +49,10 @@ extern "C" __global__ void propagateNoseHooverChain(mixed2* __restrict__ chainDa ...@@ -49,11 +49,10 @@ extern "C" __global__ void propagateNoseHooverChain(mixed2* __restrict__ chainDa
/** /**
* Compute total (potential + kinetic) energy of the Nose-Hoover beads * Compute total (potential + kinetic) energy of the Nose-Hoover beads
*/ */
extern "C" __global__ void computeHeatBathEnergy(mixed* __restrict__ heatBathEnergy, int chainLength, int numDOFs, extern "C" __global__ void computeHeatBathEnergy(mixed* __restrict__ heatBathEnergy, int chainLength, int numDOFs,
mixed kT, float frequency, const mixed2* __restrict__ chainData){ mixed kT, float frequency, const mixed2* __restrict__ chainData){
// Note that this is always incremented; make sure it's zeroed properly before the first call
mixed &energy = heatBathEnergy[0]; mixed &energy = heatBathEnergy[0];
energy = (mixed) 0;
for(int i = 0; i < chainLength; ++i) { for(int i = 0; i < chainLength; ++i) {
mixed prefac = i ? 1 : numDOFs; mixed prefac = i ? 1 : numDOFs;
...@@ -67,35 +66,105 @@ extern "C" __global__ void computeHeatBathEnergy(mixed* __restrict__ heatBathEne ...@@ -67,35 +66,105 @@ extern "C" __global__ void computeHeatBathEnergy(mixed* __restrict__ heatBathEne
} }
} }
extern "C" __global__ void computeMaskedKineticEnergy(mixed * __restrict__ energyBuffer, int paddedNumAtoms, extern "C" __global__ void computeAtomsKineticEnergy(mixed2 * __restrict__ energyBuffer, int numAtoms,
const mixed4* __restrict__ velm, const int *__restrict__ mask){ const mixed4* __restrict__ velm, const int *__restrict__ atoms){
mixed energy = 0; mixed2 energy = make_mixed2(0,0);
//energy = 1; return; //energy = 1; return;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < paddedNumAtoms; index += blockDim.x*gridDim.x) { for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
mixed4 v = velm[index]; int atom = atoms[index];
mixed4 v = velm[atom];
mixed mass = v.w == 0 ? 0 : 1 / v.w; mixed mass = v.w == 0 ? 0 : 1 / v.w;
if (mask[index] >= 0){ energy.x += 0.5f * mass * (v.x*v.x + v.y*v.y + v.z*v.z);
const mixed4& vparent = velm[mask[index]];
mixed massp = vparent.w == 0 ? 0 : 1/vparent.w;
mass = (massp + mass) == 0 ? 0 : (massp * mass) / ( massp + mass );
v.x -= vparent.x;
v.y -= vparent.y;
v.z -= vparent.z;
}
energy += 0.5f * mass * (v.x*v.x + v.y*v.y + v.z*v.z);
} }
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] = energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] = energy;
} }
extern "C" __global__ void scaleVelocities(mixed * __restrict__ scaleFactor, int paddedNumAtoms, extern "C" __global__ void computePairsKineticEnergy(mixed2 * __restrict__ energyBuffer, int numPairs,
mixed4* __restrict__ velm, const int *__restrict__ mask){ const mixed4* __restrict__ velm, const int2 *__restrict__ pairs){
const mixed &scale = scaleFactor[0]; mixed2 energy = make_mixed2(0,0);
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < paddedNumAtoms; index += blockDim.x*gridDim.x) { for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numPairs; index += blockDim.x*gridDim.x) {
mixed4 &v = velm[index]; int2 pair = pairs[index];
mixed4 vparent = mask[index] >= 0 ? velm[mask[index]] : make_mixed4(0.0f, 0.0f, 0.0f, 0.0f); int atom1 = pair.x;
mixed maskedScale = mask[index] == index ? 1 : scale; int atom2 = pair.y;
v.x = vparent.x + maskedScale * (v.x - vparent.x); mixed4 v1 = velm[atom1];
v.y = vparent.y + maskedScale * (v.y - vparent.y); mixed4 v2 = velm[atom2];
v.z = vparent.z + maskedScale * (v.z - vparent.z); mixed m1 = v1.w == 0 ? 0 : 1 / v1.w;
mixed m2 = v2.w == 0 ? 0 : 1 / v2.w;
mixed4 cv;
cv.x = (m1*v1.x + m2*v2.x) / (m1 + m2);
cv.y = (m1*v1.y + m2*v2.y) / (m1 + m2);
cv.z = (m1*v1.z + m2*v2.z) / (m1 + m2);
mixed4 rv;
rv.x = v2.x - v1.x;
rv.y = v2.y - v1.y;
rv.z = v2.z - v1.z;
energy.x += 0.5f * (m1 + m2) * (cv.x*cv.x + cv.y*cv.y + cv.z*cv.z);
energy.y += 0.5f * (m1 * m2 / (m1 + m2)) * (rv.x*rv.x + rv.y*rv.y + rv.z*rv.z);
}
// The atoms version of this has been called already, so accumulate instead of assigning here
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x].x += energy.x;
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x].y += energy.y;
}
extern "C" __global__ void scaleAtomsVelocities(mixed2* __restrict__ scaleFactor, int numAtoms,
mixed4* __restrict__ velm, const int *__restrict__ atoms){
const mixed &scale = scaleFactor[0].x;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
int atom = atoms[index];
mixed4 &v = velm[atom];
v.x *= scale;
v.y *= scale;
v.z *= scale;
}
}
extern "C" __global__ void scalePairsVelocities(mixed2 * __restrict__ scaleFactor, int numPairs,
mixed4* __restrict__ velm, const int2 *__restrict__ pairs){
const mixed &absScale = scaleFactor[0].x;
const mixed &relScale = scaleFactor[0].y;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numPairs; index += blockDim.x*gridDim.x) {
int atom1 = pairs[index].x;
int atom2 = pairs[index].y;
mixed4 &v1 = velm[atom1];
mixed4 &v2 = velm[atom2];
mixed m1 = v1.w == 0 ? 0 : 1 / v1.w;
mixed m2 = v2.w == 0 ? 0 : 1 / v2.w;
mixed4 cv;
cv.x = (m1*v1.x + m2*v2.x) / (m1 + m2);
cv.y = (m1*v1.y + m2*v2.y) / (m1 + m2);
cv.z = (m1*v1.z + m2*v2.z) / (m1 + m2);
mixed4 rv;
rv.x = v2.x - v1.x;
rv.y = v2.y - v1.y;
rv.z = v2.z - v1.z;
v1.x = absScale * cv.x - relScale * rv.x * m2 / (m1 + m2);
v1.y = absScale * cv.y - relScale * rv.y * m2 / (m1 + m2);
v1.z = absScale * cv.z - relScale * rv.z * m2 / (m1 + m2);
v2.x = absScale * cv.x + relScale * rv.x * m1 / (m1 + m2);
v2.y = absScale * cv.y + relScale * rv.y * m1 / (m1 + m2);
v2.z = absScale * cv.z + relScale * rv.z * m1 / (m1 + m2);
}
}
/**
* Sum the energy buffer containing a pair of energies stored as mixed2. This is copied from utilities.cu with small modifications
*/
extern "C" __global__ void reduceEnergyPair(const mixed2* __restrict__ energyBuffer, mixed2* __restrict__ result, int bufferSize, int workGroupSize) {
__shared__ mixed2 tempBuffer[WORK_GROUP_SIZE];
const unsigned int thread = threadIdx.x;
mixed2 sum = make_mixed2(0,0);
for (unsigned int idx = thread; idx < bufferSize; idx += blockDim.x) {
sum.x += energyBuffer[idx].x;
sum.y += energyBuffer[idx].y;
}
tempBuffer[thread] = sum;
for (int i = 1; i < workGroupSize; i *= 2) {
__syncthreads();
if (thread%(i*2) == 0 && thread+i < workGroupSize) {
tempBuffer[thread].x += tempBuffer[thread+i].x;
tempBuffer[thread].y += tempBuffer[thread+i].y;
}
} }
if (thread == 0)
*result = tempBuffer[0];
} }
...@@ -99,4 +99,4 @@ __global__ void setCharges(real* __restrict__ charges, real4* __restrict__ posq, ...@@ -99,4 +99,4 @@ __global__ void setCharges(real* __restrict__ charges, real4* __restrict__ posq,
for (int i = blockDim.x*blockIdx.x+threadIdx.x; i < numAtoms; i += blockDim.x*gridDim.x) for (int i = blockDim.x*blockIdx.x+threadIdx.x; i < numAtoms; i += blockDim.x*gridDim.x)
posq[i].w = charges[atomOrder[i]]; posq[i].w = charges[atomOrder[i]];
} }
} }
\ No newline at end of file
...@@ -2477,54 +2477,58 @@ std::pair<double, double> ReferenceNoseHooverChainKernel::propagateChain(Context ...@@ -2477,54 +2477,58 @@ std::pair<double, double> ReferenceNoseHooverChainKernel::propagateChain(Context
// Get the variables describing the NHC // Get the variables describing the NHC
int chainLength = nhc.getDefaultChainLength(); int chainLength = nhc.getDefaultChainLength();
int chainID = nhc.getDefaultChainID(); int chainID = nhc.getDefaultChainID();
double temperature = nhc.getDefaultTemperature();
double collisionFrequency = nhc.getDefaultCollisionFrequency();
int numDOFs = nhc.getDefaultNumDegreesOfFreedom(); int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
int numMTS = nhc.getDefaultNumMultiTimeSteps(); int numMTS = nhc.getDefaultNumMultiTimeSteps();
// Get the state of the NHC from the context // Get the state of the NHC from the context
auto& allChainPositions = extractNoseHooverPositions(context); auto& allChainPositions = extractNoseHooverPositions(context);
auto& allChainVelocities = extractNoseHooverVelocities(context); auto& allChainVelocities = extractNoseHooverVelocities(context);
if (allChainPositions.size() < chainID+1){
allChainPositions.resize(chainID+1); int nAtoms = nhc.getThermostatedAtoms().size();
} double absScale = 0;
if (allChainVelocities.size() < chainID+1){ if (nAtoms) {
allChainVelocities.resize(chainID+1); if (allChainPositions.size() < 2*chainID+1){
} allChainPositions.resize(2*chainID+1);
auto& chainPositions = allChainPositions.at(chainID); }
auto& chainVelocities = allChainVelocities.at(chainID); if (allChainVelocities.size() < 2*chainID+1){
if (chainPositions.size() < chainLength){ allChainVelocities.resize(2*chainID+1);
chainPositions.resize(chainLength, 0); }
} auto& chainPositions = allChainPositions.at(2*chainID);
if (chainVelocities.size() < chainLength){ auto& chainVelocities = allChainVelocities.at(2*chainID);
chainVelocities.resize(chainLength, 0); if (chainPositions.size() < chainLength){
chainPositions.resize(chainLength, 0);
}
if (chainVelocities.size() < chainLength){
chainVelocities.resize(chainLength, 0);
}
double temperature = nhc.getDefaultTemperature();
double collisionFrequency = nhc.getDefaultCollisionFrequency();
absScale = chainPropagator->propagate(absKE, chainVelocities, chainPositions, numDOFs,
temperature, collisionFrequency, timeStep,
numMTS, nhc.getDefaultYoshidaSuzukiWeights());
} }
double absScale = chainPropagator->propagate(absKE, chainVelocities, chainPositions, numDOFs,
temperature, collisionFrequency, timeStep,
numMTS, nhc.getDefaultYoshidaSuzukiWeights());
double relScale = 0; double relScale = 0;
int numPairs = nhc.getThermostatedPairs().size(); int nPairs = nhc.getThermostatedPairs().size();
if(numPairs){ if (nPairs) {
double relativeTemperature = nhc.getDefaultRelativeTemperature(); if (allChainPositions.size() < 2*chainID+2){
double relativeCollisionFrequency = nhc.getDefaultRelativeCollisionFrequency(); allChainPositions.resize(2*chainID+2);
if (allChainPositions.size() < chainID+2){
allChainPositions.resize(chainID+2);
} }
if (allChainVelocities.size() < chainID+2){ if (allChainVelocities.size() < 2*chainID+2){
allChainVelocities.resize(chainID+2); allChainVelocities.resize(2*chainID+2);
} }
auto& chainPositions = allChainPositions.at(chainID+1); auto& chainPositions = allChainPositions.at(2*chainID+1);
auto& chainVelocities = allChainVelocities.at(chainID+1); auto& chainVelocities = allChainVelocities.at(2*chainID+1);
if (chainPositions.size() < chainLength){ if (chainPositions.size() < chainLength){
chainPositions.resize(chainLength, 0); chainPositions.resize(chainLength, 0);
} }
if (chainVelocities.size() < chainLength){ if (chainVelocities.size() < chainLength){
chainVelocities.resize(chainLength, 0); chainVelocities.resize(chainLength, 0);
} }
relScale = chainPropagator->propagate(relKE, chainVelocities, chainPositions, 3*numPairs, double temperature = nhc.getDefaultRelativeTemperature();
relativeTemperature, relativeCollisionFrequency, timeStep, double collisionFrequency = nhc.getDefaultRelativeCollisionFrequency();
relScale = chainPropagator->propagate(relKE, chainVelocities, chainPositions, 3*nPairs,
temperature, collisionFrequency, timeStep,
numMTS, nhc.getDefaultYoshidaSuzukiWeights()); numMTS, nhc.getDefaultYoshidaSuzukiWeights());
} }
return {absScale, relScale}; return {absScale, relScale};
} }
...@@ -2534,21 +2538,41 @@ double ReferenceNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& contex ...@@ -2534,21 +2538,41 @@ double ReferenceNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& contex
double kineticEnergy = 0; double kineticEnergy = 0;
int chainLength = nhc.getDefaultChainLength(); int chainLength = nhc.getDefaultChainLength();
int chainID = nhc.getDefaultChainID(); int chainID = nhc.getDefaultChainID();
double temperature = nhc.getDefaultTemperature(); int nAtoms = nhc.getThermostatedAtoms().size();
double collisionFrequency = nhc.getDefaultCollisionFrequency(); int nPairs = nhc.getThermostatedPairs().size();
double kT = temperature * BOLTZ;
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
auto& nhcPositions = extractNoseHooverPositions(context); auto& nhcPositions = extractNoseHooverPositions(context);
auto& nhcVelocities = extractNoseHooverVelocities(context); auto& nhcVelocities = extractNoseHooverVelocities(context);
for(int i = 0; i < chainLength; ++i) { if (nAtoms) {
double prefac = i ? 1 : numDOFs; double temperature = nhc.getDefaultTemperature();
double mass = prefac * kT / (collisionFrequency * collisionFrequency); double collisionFrequency = nhc.getDefaultCollisionFrequency();
double velocity = nhcVelocities[chainID][i]; double kT = temperature * BOLTZ;
// The kinetic energy of this bead int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
kineticEnergy += 0.5 * mass * velocity * velocity; for(int i = 0; i < chainLength; ++i) {
// The potential energy of this bead double prefac = i ? 1 : numDOFs;
double position = nhcPositions[chainID][i]; double mass = prefac * kT / (collisionFrequency * collisionFrequency);
potentialEnergy += prefac * kT * position; double velocity = nhcVelocities[2*chainID][i];
// The kinetic energy of this bead
kineticEnergy += 0.5 * mass * velocity * velocity;
// The potential energy of this bead
double position = nhcPositions[2*chainID][i];
potentialEnergy += prefac * kT * position;
}
}
if (nPairs) {
double temperature = nhc.getDefaultRelativeTemperature();
double collisionFrequency = nhc.getDefaultRelativeCollisionFrequency();
double kT = temperature * BOLTZ;
int numDOFs = 3 * nPairs;
for(int i = 0; i < chainLength; ++i) {
double prefac = i ? 1 : numDOFs;
double mass = prefac * kT / (collisionFrequency * collisionFrequency);
double velocity = nhcVelocities[2*chainID+1][i];
// The kinetic energy of this bead
kineticEnergy += 0.5 * mass * velocity * velocity;
// The potential energy of this bead
double position = nhcPositions[2*chainID+1][i];
potentialEnergy += prefac * kT * position;
}
} }
return kineticEnergy + potentialEnergy; return kineticEnergy + potentialEnergy;
} }
......
...@@ -117,8 +117,8 @@ void testWaterBox(Platform& platform){ ...@@ -117,8 +117,8 @@ void testWaterBox(Platform& platform){
int chainLength = 4; int chainLength = 4;
int numMTS = 3; int numMTS = 3;
int numYS = 3; int numYS = 3;
double frequency = 50.0; double frequency = 100.0;
double frequencyDrude = 25.0; double frequencyDrude = 80.0;
int randomSeed = 100; int randomSeed = 100;
DrudeNoseHooverIntegrator integ(0.0005, system, temperature, temperatureDrude, DrudeNoseHooverIntegrator integ(0.0005, system, temperature, temperatureDrude,
frequency, frequencyDrude, chainLength, numMTS, numYS);; frequency, frequencyDrude, chainLength, numMTS, numYS);;
...@@ -138,12 +138,12 @@ void testWaterBox(Platform& platform){ ...@@ -138,12 +138,12 @@ void testWaterBox(Platform& platform){
context.applyConstraints(1e-6); context.applyConstraints(1e-6);
// Equilibrate. // Equilibrate.
integ.step(5000); integ.step(800);
// Compute the internal and center of mass temperatures. // Compute the internal and center of mass temperatures.
double totalKE = 0; double totalKE = 0;
int numSteps = 100000; const int numSteps = 800;
double meanTemp = 0.0; double meanTemp = 0.0;
double meanDrudeTemp = 0.0; double meanDrudeTemp = 0.0;
double meanConserved = 0.0; double meanConserved = 0.0;
...@@ -154,7 +154,6 @@ void testWaterBox(Platform& platform){ ...@@ -154,7 +154,6 @@ void testWaterBox(Platform& platform){
double PE = state.getPotentialEnergy(); double PE = state.getPotentialEnergy();
double fullKE = integ.computeTotalKineticEnergy(); double fullKE = integ.computeTotalKineticEnergy();
double drudeKE = integ.computeDrudeKineticEnergy(); double drudeKE = integ.computeDrudeKineticEnergy();
KE = fullKE - drudeKE;
double temp = KE/(0.5*numStandardDof*BOLTZ); double temp = KE/(0.5*numStandardDof*BOLTZ);
double drudeTemp = drudeKE/(0.5*numDrudeDof*BOLTZ); double drudeTemp = drudeKE/(0.5*numDrudeDof*BOLTZ);
meanTemp = (i*meanTemp + temp)/(i+1); meanTemp = (i*meanTemp + temp)/(i+1);
...@@ -162,7 +161,7 @@ void testWaterBox(Platform& platform){ ...@@ -162,7 +161,7 @@ void testWaterBox(Platform& platform){
double heatBathEnergy = integ.computeHeatBathEnergy(); double heatBathEnergy = integ.computeHeatBathEnergy();
double conserved = PE + fullKE + heatBathEnergy; double conserved = PE + fullKE + heatBathEnergy;
meanConserved = (i*meanConserved + conserved)/(i+1); meanConserved = (i*meanConserved + conserved)/(i+1);
#define DEBUG 1 #define DEBUG 0
#if DEBUG #if DEBUG
if(i%10 == 0) if(i%10 == 0)
std::cout << std::setw(6) << i std::cout << std::setw(6) << i
...@@ -170,15 +169,17 @@ void testWaterBox(Platform& platform){ ...@@ -170,15 +169,17 @@ void testWaterBox(Platform& platform){
<< std::setprecision(8) << std::setw(16) << drudeKE << std::setprecision(8) << std::setw(16) << drudeKE
<< std::setprecision(8) << std::setw(16) << meanTemp << std::setprecision(8) << std::setw(16) << meanTemp
<< std::setprecision(8) << std::setw(16) << meanDrudeTemp << std::setprecision(8) << std::setw(16) << meanDrudeTemp
<< std::setprecision(8) << std::setw(16) << heatBathEnergy
<< std::setprecision(8) << std::setw(16) << fullKE
<< std::setprecision(8) << std::setw(16) << conserved << std::setprecision(8) << std::setw(16) << conserved
<< std::endl; << std::endl;
#endif #endif
totalKE += KE; totalKE += KE;
// ASSERT(fabs(meanConserved - conserved) < 0.5); ASSERT(fabs(meanConserved - conserved) < 0.2);
} }
totalKE /= numSteps; totalKE /= numSteps;
// ASSERT_USUALLY_EQUAL_TOL(temperature, meanTemp, 0.02); ASSERT_USUALLY_EQUAL_TOL(temperature, meanTemp, 0.03);
// ASSERT_USUALLY_EQUAL_TOL(temperatureDrude, meanDrudeTemp, 0.10); ASSERT_USUALLY_EQUAL_TOL(temperatureDrude, meanDrudeTemp, 0.03);
} }
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment