Unverified Commit cca3e11a authored by Andy Simmonett's avatar Andy Simmonett
Browse files

Correct CUDA NHC implementation

parent 50efef13
......@@ -121,7 +121,7 @@ int NoseHooverIntegrator::addSubsystemThermostat(System& system, const std::vect
int numForces = system.getNumForces();
if (thermostatedPairs.size() == 0){ // remove 3 degrees of freedom from thermostats that act on absolute motions
for (int forceNum = 0; forceNum < numForces; ++forceNum) {
// if (dynamic_cast<CMMotionRemover*>(&system.getForce(forceNum))) nDOF -= 3;
if (dynamic_cast<CMMotionRemover*>(&system.getForce(forceNum))) nDOF -= 3;
}
}
......@@ -143,11 +143,7 @@ int NoseHooverIntegrator::addSubsystemThermostat(System& system, const std::vect
}
// create and add new chain
int chainID = 0;
for(const auto &c : noseHooverChains) {
// We need to account for the fact that a NHC with pairs thermostated has both a relative and an absolute chain
chainID += c.getThermostatedPairs().size() ? 2 : 1;
}
int chainID = noseHooverChains.size();
auto nhc = NoseHooverChain(temperature, relativeTemperature, collisionFrequency, relativeCollisionFrequency,
nDOF, chainLength, numMTS, numYoshidaSuzuki, chainID, thermostatedParticles, thermostatedPairs);
noseHooverChains.push_back(nhc);
......@@ -232,10 +228,7 @@ double NoseHooverIntegrator::computeKineticEnergy() {
double kE = 0.0;
if(noseHooverChains.size()) {
for (const auto &nhc: noseHooverChains){
if (nhc.getThermostatedPairs().size() == 0) {
auto KEs = nhcKernel.getAs<NoseHooverChainKernel>().computeMaskedKineticEnergy(*context, nhc, true);
kE += std::get<0>(KEs);
}
kE += nhcKernel.getAs<NoseHooverChainKernel>().computeMaskedKineticEnergy(*context, nhc, true).first;
}
} else {
kE = vvKernel.getAs<IntegrateVelocityVerletStepKernel>().computeKineticEnergy(*context, *this);
......
......@@ -1731,11 +1731,11 @@ public:
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the object describing the chain to be propagated.
* @param kineticEnergy the kineticEnergy of the particles being thermostated by this chain.
* @param kineticEnergies the {absolute, relative} kineticEnergy of the particles being thermostated by this chain.
* @param timeStep the time step used by the integrator.
* @return the velocity scale factor to apply to the particles associated with this heat bath.
* @return the {absolute, relative} velocity scale factor to apply to the particles associated with this heat bath.
*/
virtual double propagateChain(ContextImpl& context, const NoseHooverChain &nhc, double kineticEnergy, double timeStep);
virtual std::pair<double, double> propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep);
/**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy.
*
......@@ -1753,26 +1753,29 @@ public:
* @param downloadValue whether the computed value should be downloaded and returned.
*
*/
virtual double computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain, bool downloadValue);
virtual std::pair<double,double> computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain, bool downloadValue);
/**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactor the multiplicative factor by which velocities are scaled.
* @param scaleFactors the {absolute, relative} multiplicative factor by which velocities are scaled.
*/
virtual void scaleVelocities(ContextImpl& context, const NoseHooverChain &noseHooverChain, double scaleFactor);
virtual void scaleVelocities(ContextImpl& context, const NoseHooverChain &noseHooverChain, std::pair<double, double> scaleFactors);
private:
int sumWorkGroupSize;
CudaContext& cu;
CudaArray scaleFactorBuffer, kineticEnergyBuffer, chainMasses, chainForces, heatBathEnergy;
std::map<int, CudaArray> masks;
std::map<int, CudaArray> atomlists, pairlists;
std::map<int, CUfunction> propagateKernels;
CUfunction reduceEnergyKernel;
CUfunction computeHeatBathEnergyKernel;
CUfunction computeMaskedKineticEnergyKernel;
CUfunction scaleVelocitiesKernel;
CUfunction computeAtomsKineticEnergyKernel;
CUfunction computePairsKineticEnergyKernel;
CUfunction scaleAtomsVelocitiesKernel;
CUfunction scalePairsVelocitiesKernel;
};
/**
......
......@@ -808,7 +808,6 @@ double CudaIntegrationUtilities::computeKineticEnergy(double timeShift) {
energy += (v.x*v.x+v.y*v.y+v.z*v.z)/v.w;
}
}
// Restore the velocities.
if (timeShift != 0)
......
......@@ -8358,6 +8358,8 @@ void CudaNoseHooverChainKernel::initialize() {
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
map<string, string> defines;
sumWorkGroupSize = 512;
defines["WORK_GROUP_SIZE"] = cu.intToString(sumWorkGroupSize);
defines["MIXEDEXP"] = useDouble ? "exp" : "expf";
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {1}) {";
defines["END_YS_LOOP"] = "}";
......@@ -8369,56 +8371,50 @@ void CudaNoseHooverChainKernel::initialize() {
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}){";
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "-std=c++11");
propagateKernels[5] = cu.getKernel(module, "propagateNoseHooverChain");
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "-std=c++11");
reduceEnergyKernel = cu.getKernel(module, "reduceEnergyPair");
computeHeatBathEnergyKernel = cu.getKernel(module, "computeHeatBathEnergy");
computeMaskedKineticEnergyKernel = cu.getKernel(module, "computeMaskedKineticEnergy");
scaleVelocitiesKernel = cu.getKernel(module, "scaleVelocities");
CUmodule utilities = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::utilities);
reduceEnergyKernel = cu.getKernel(utilities, "reduceEnergy");
computeAtomsKineticEnergyKernel = cu.getKernel(module, "computeAtomsKineticEnergy");
computePairsKineticEnergyKernel = cu.getKernel(module, "computePairsKineticEnergy");
scaleAtomsVelocitiesKernel = cu.getKernel(module, "scaleAtomsVelocities");
scalePairsVelocitiesKernel = cu.getKernel(module, "scalePairsVelocities");
}
double CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, double kineticEnergy, double timeStep) {
std::pair<double, double> CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep) {
cu.setAsCurrent();
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
int chainID = nhc.getDefaultChainID();
int nAtoms = nhc.getThermostatedAtoms().size();
int nPairs = nhc.getThermostatedPairs().size();
int chainLength = nhc.getDefaultChainLength();
int numYS = nhc.getDefaultNumYoshidaSuzukiTimeSteps();
int numMTS = nhc.getDefaultNumMultiTimeSteps();
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
double temperature = nhc.getDefaultTemperature();
double frequency = nhc.getDefaultCollisionFrequency();
double relativeTemperature = nhc.getDefaultRelativeTemperature();
double relativeFrequency = nhc.getDefaultRelativeCollisionFrequency();
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState();
if (!chainState.count(chainID)) {
chainState[chainID] = CudaArray();
}
if (chainState.at(chainID).getSize() != chainLength) {
// We need to upload the CUDA array
if(useDouble){
chainState.at(chainID).initialize<double2>(cu, chainLength, "chainState" + std::to_string(chainID));
std::vector<double2> zeros(chainLength, make_double2(0, 0));
chainState.at(chainID).upload(zeros.data());
} else {
chainState.at(chainID).initialize<float2>(cu, chainLength, "chainState" + std::to_string(chainID));
std::vector<float2> zeros(chainLength, make_float2(0, 0));
chainState.at(chainID).upload(zeros.data());
}
if (numYS != 1 && numYS != 3 && numYS != 5) {
throw OpenMMException("Number of Yoshida Suzuki time steps has to be 1, 3, or 5.");
}
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState();
if (!scaleFactorBuffer.isInitialized() ||scaleFactorBuffer.getSize() == 0) {
if(useDouble){
std::vector<double> one(1);
scaleFactorBuffer.initialize<double>(cu, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(one);
std::vector<double2> zeros{{0,0}};
scaleFactorBuffer.initialize<double2>(cu, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(zeros);
} else {
std::vector<float> one(1);
scaleFactorBuffer.initialize<float>(cu, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(one);
std::vector<float2> zeros{{0,0}};
scaleFactorBuffer.initialize<float2>(cu, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(zeros);
}
}
std::vector<double> zeros(chainLength,0);
if (!chainForces.isInitialized() || !chainMasses.isInitialized() ){
if(useDouble){
......@@ -8435,28 +8431,73 @@ double CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const Nos
}
if (chainForces.getSize() < chainLength) chainMasses.resize(chainLength);
if (chainMasses.getSize() < chainLength) chainMasses.resize(chainLength);
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float timeStepFloat = (float) timeStep;
float frequencyFloat = (float) frequency;
// N.B. We ignore the incoming kineticEnergy and grab it from the device buffer instead
void *args[] = {
&chainState[chainID].getDevicePointer(),
if (nAtoms) {
if (!chainState.count(2*chainID)) chainState[2*chainID] = CudaArray();
if (chainState.at(2*chainID).getSize() != chainLength) {
// We need to upload the CUDA array
if(useDouble){
chainState.at(2*chainID).initialize<double2>(cu, chainLength, "chainState" + std::to_string(2*chainID));
std::vector<double2> zeros(chainLength, make_double2(0, 0));
chainState.at(2*chainID).upload(zeros.data());
} else {
chainState.at(2*chainID).initialize<float2>(cu, chainLength, "chainState" + std::to_string(2*chainID));
std::vector<float2> zeros(chainLength, make_float2(0, 0));
chainState.at(2*chainID).upload(zeros.data());
}
}
int chainType = 0;
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
void *args[] = {
&chainState[2*chainID].getDevicePointer(),
&kineticEnergyBuffer.getDevicePointer(),
&scaleFactorBuffer.getDevicePointer(),
&chainMasses.getDevicePointer(),
&chainForces.getDevicePointer(),
&chainLength, &numMTS, &numDOFs,
&chainType, &chainLength, &numMTS, &numDOFs,
&timeStepFloat,
useDouble ? (void*) &kT : (void*) &kTfloat,
&frequencyFloat
};
if (numYS == 1 || numYS == 3 || numYS == 5) {
cu.executeKernel(propagateKernels[numYS], args, 1, 1);
} else {
throw OpenMMException("Number of Yoshida Suzuki time steps has to be 1, 3, or 5.");
}
return 0;
if (nPairs) {
if (!chainState.count(2*chainID+1)) chainState[2*chainID+1] = CudaArray();
if (chainState.at(2*chainID+1).getSize() != chainLength) {
// We need to upload the CUDA array
if(useDouble){
chainState.at(2*chainID+1).initialize<double2>(cu, chainLength, "chainState" + std::to_string(2*chainID+1));
std::vector<double2> zeros(chainLength, make_double2(0, 0));
chainState.at(2*chainID+1).upload(zeros.data());
} else {
chainState.at(2*chainID+1).initialize<float2>(cu, chainLength, "chainState" + std::to_string(2*chainID+1));
std::vector<float2> zeros(chainLength, make_float2(0, 0));
chainState.at(2*chainID+1).upload(zeros.data());
}
}
int chainType = 1;
double kT = BOLTZ * relativeTemperature;
int ndf = 3*nPairs;
float kTfloat = (float) kT;
float frequencyFloat = (float) relativeFrequency;
void *args[] = {
&chainState[2*chainID+1].getDevicePointer(),
&kineticEnergyBuffer.getDevicePointer(),
&scaleFactorBuffer.getDevicePointer(),
&chainMasses.getDevicePointer(),
&chainForces.getDevicePointer(),
&chainType, &chainLength, &numMTS, &ndf,
&timeStepFloat,
useDouble ? (void*) &kT : (void*) &kTfloat,
&frequencyFloat
};
cu.executeKernel(propagateKernels[numYS], args, 1, 1);
}
return {0, 0};
}
double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc) {
......@@ -8466,14 +8507,17 @@ double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, co
int chainID = nhc.getDefaultChainID();
int chainLength = nhc.getDefaultChainLength();
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
double temperature = nhc.getDefaultTemperature();
double frequency = nhc.getDefaultCollisionFrequency();
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState();
if (chainID >= chainState.size() || !chainState[chainID].isInitialized() || chainState[chainID].getSize() != chainLength) {
return 0.0;
}
bool absChainIsValid = chainState.count(2*chainID) != 0 &&
chainState[2*chainID].isInitialized() &&
chainState[2*chainID].getSize() == chainLength;
bool relChainIsValid = chainState.count(2*chainID+1) != 0 &&
chainState[2*chainID+1].isInitialized() &&
chainState[2*chainID+1].getSize() == chainLength;
if (!absChainIsValid && !relChainIsValid) return 0.0;
if (!heatBathEnergy.isInitialized() || heatBathEnergy.getSize() == 0) {
if(useDouble){
......@@ -8487,15 +8531,35 @@ double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, co
}
}
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
cu.clearBuffer(heatBathEnergy);
if (absChainIsValid) {
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
double temperature = nhc.getDefaultTemperature();
double frequency = nhc.getDefaultCollisionFrequency();
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
void * args[] = {&heatBathEnergy.getDevicePointer(), &chainLength, &numDOFs,
useDouble ? (void*) &kT : (void*) & kTfloat,
&frequencyFloat, &chainState[chainID].getDevicePointer()};
void * args[] = {&heatBathEnergy.getDevicePointer(), &chainLength, &numDOFs,
useDouble ? (void*) &kT : (void*) & kTfloat,
&frequencyFloat, &chainState[2*chainID].getDevicePointer()};
cu.executeKernel(computeHeatBathEnergyKernel, args, 1, 1);
}
if (relChainIsValid) {
int numDOFs = 3 * nhc.getThermostatedPairs().size();
double temperature = nhc.getDefaultRelativeTemperature();
double frequency = nhc.getDefaultRelativeCollisionFrequency();
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
void * args[] = {&heatBathEnergy.getDevicePointer(), &chainLength, &numDOFs,
useDouble ? (void*) &kT : (void*) & kTfloat,
&frequencyFloat, &chainState[2*chainID+1].getDevicePointer()};
cu.executeKernel(computeHeatBathEnergyKernel, args, 1, 1);
}
cu.executeKernel(computeHeatBathEnergyKernel, args, 1, 1);
void * pinnedBuffer = cu.getPinnedBuffer();
heatBathEnergy.download(pinnedBuffer);
......@@ -8506,78 +8570,94 @@ double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, co
}
}
double CudaNoseHooverChainKernel::computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &nhc, bool downloadValue) {
std::pair<double, double> CudaNoseHooverChainKernel::computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &nhc, bool downloadValue) {
cu.setAsCurrent();
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
int chainID = nhc.getDefaultChainID();
const auto & parents = nhc.getThermostatedPairs();
const auto & thermostatedAtoms = nhc.getThermostatedAtoms();
auto nAtoms = cu.getPaddedNumAtoms();
if (!masks.count(chainID)) {
// We need to upload the CUDA array
std::vector<int> maskVec(nAtoms);
for (int i = 0; i < nAtoms; ++i) maskVec[i] = i;
if (parents.size() ) {
assert(parents.size() == thermostatedAtoms.size());
for ( size_t i = 0; i < parents.size(); ++i) {
int atom = thermostatedAtoms[i];
maskVec[atom] = parents[i];
}
} else {
for ( const auto & atom : thermostatedAtoms) {
maskVec[atom] = -1L;
}
const auto & nhcAtoms = nhc.getThermostatedAtoms();
const auto & nhcPairs = nhc.getThermostatedPairs();
auto nAtoms = nhcAtoms.size();
auto nPairs = nhcPairs.size();
if (nAtoms) {
if (!atomlists.count(chainID)) {
// We need to upload the CUDA array
atomlists[chainID] = CudaArray();
atomlists[chainID].initialize<int>(cu, nAtoms, "atomlist" + std::to_string(chainID));
atomlists[chainID].upload(nhcAtoms);
}
if (atomlists[chainID].getSize() != nAtoms) {
throw OpenMMException("Number of atoms changed. Cannot be handled by the same Nose-Hoover thermostat.");
}
}
if (nPairs) {
if (!pairlists.count(chainID)) {
// We need to upload the CUDA array
pairlists[chainID] = CudaArray();
pairlists[chainID].initialize<int2>(cu, nPairs, "pairlist" + std::to_string(chainID));
std::vector<int2> int2vec;
for(const auto &p : nhcPairs) int2vec.push_back(make_int2(p.first, p.second));
pairlists[chainID].upload(int2vec);
}
if (pairlists[chainID].getSize() != nPairs) {
throw OpenMMException("Number of thermostated pairs changed. Cannot be handled by the same Nose-Hoover thermostat.");
}
masks[chainID] = CudaArray();
masks[chainID].initialize<int>(cu, nAtoms, "mask" + std::to_string(chainID));
masks[chainID].upload(maskVec);
}
if (masks[chainID].getSize() != nAtoms) {
throw OpenMMException("Number of atoms changed. Cannot be handled by the same Nose-Hoover thermostat.");
}
if (!kineticEnergyBuffer.isInitialized() || kineticEnergyBuffer.getSize() == 0) {
if(useDouble){
std::vector<double> one(1);
kineticEnergyBuffer.initialize<double>(cu, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(one);
std::vector<double2> zeros{{0,0}};
kineticEnergyBuffer.initialize<double2>(cu, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(zeros);
} else {
std::vector<float> one(1);
kineticEnergyBuffer.initialize<float>(cu, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(one);
std::vector<float2> zeros{{0,0}};
kineticEnergyBuffer.initialize<float2>(cu, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(zeros);
}
}
cu.clearBuffer(cu.getEnergyBuffer());
void *args[] = {&cu.getEnergyBuffer().getDevicePointer(),&nAtoms, &cu.getVelm().getDevicePointer(), &masks[chainID].getDevicePointer()};
cu.executeKernel(computeMaskedKineticEnergyKernel, args, nAtoms);
if (nAtoms) {
void *args[] = {&cu.getEnergyBuffer().getDevicePointer(),&nAtoms, &cu.getVelm().getDevicePointer(), &atomlists[chainID].getDevicePointer()};
cu.executeKernel(computeAtomsKineticEnergyKernel, args, nAtoms);
}
if (nPairs) {
void *args[] = {&cu.getEnergyBuffer().getDevicePointer(),&nPairs, &cu.getVelm().getDevicePointer(), &pairlists[chainID].getDevicePointer()};
cu.executeKernel(computePairsKineticEnergyKernel, args, nPairs);
}
//taken from CudaContext::reduceEnergy(); the final kinetic energy will live in the kineticEnergy buffer
int bufferSize = cu.getEnergyBuffer().getSize();
int workGroupSize = 512;
void* args2[] = {&cu.getEnergyBuffer().getDevicePointer(), &kineticEnergyBuffer.getDevicePointer(), &bufferSize, &workGroupSize};
cu.executeKernel(reduceEnergyKernel, args2, workGroupSize, workGroupSize, workGroupSize*cu.getEnergyBuffer().getElementSize());
int bufferSize = cu.getEnergyBuffer().getSize() / 2; // Halve it to account for the fact that we're storing mixed2 instead of mixed in there
void* args2[] = {&cu.getEnergyBuffer().getDevicePointer(), &kineticEnergyBuffer.getDevicePointer(), &bufferSize, &sumWorkGroupSize};
cu.executeKernel(reduceEnergyKernel, args2, sumWorkGroupSize, sumWorkGroupSize, 2*sumWorkGroupSize*cu.getEnergyBuffer().getElementSize());
double value = 0;
std::pair<double, double> KEs = {0, 0};
if (downloadValue) {
void * pinnedBuffer = cu.getPinnedBuffer();
kineticEnergyBuffer.download(pinnedBuffer);
value = useDouble ? *((double*) pinnedBuffer) : *((float*) pinnedBuffer);
KEs.first = useDouble ? *((double*) pinnedBuffer) : *((float*) pinnedBuffer);
KEs.second = useDouble ? *((double*) pinnedBuffer + 1) : *((float*) pinnedBuffer + 1);
}
return value;
return KEs;
}
void CudaNoseHooverChainKernel::scaleVelocities(ContextImpl& context, const NoseHooverChain &nhc, double scaleFactor) {
// For now we assume that the mask info is valid, because computeMaskedKineticEnergy must have been
void CudaNoseHooverChainKernel::scaleVelocities(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> scaleFactor) {
// For now we assume that the atoms and pairs info is valid, because compute{Atoms|Pairs}KineticEnergy must have been
// called before this kernel. If that ever ceases to be true, some sanity checks are needed here.
cu.setAsCurrent();
auto nAtoms = cu.getPaddedNumAtoms();
int chainID = nhc.getDefaultChainID();
void *args[] = {&scaleFactorBuffer.getDevicePointer(),
&nAtoms, &cu.getVelm().getDevicePointer(), &masks[chainID].getDevicePointer()};
cu.executeKernel(scaleVelocitiesKernel, args, nAtoms);
auto nAtoms = nhc.getThermostatedAtoms().size();
auto nPairs = nhc.getThermostatedPairs().size();
if(nAtoms) {
void *args[] = {&scaleFactorBuffer.getDevicePointer(),
&nAtoms, &cu.getVelm().getDevicePointer(), &atomlists[chainID].getDevicePointer()};
cu.executeKernel(scaleAtomsVelocitiesKernel, args, nAtoms);
}
if(nPairs) {
void *args[] = {&scaleFactorBuffer.getDevicePointer(),
&nPairs, &cu.getVelm().getDevicePointer(), &pairlists[chainID].getDevicePointer()};
cu.executeKernel(scalePairsVelocitiesKernel, args, nPairs);
}
}
void CudaApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
......
#include <initializer_list>
extern "C" __global__ void propagateNoseHooverChain(mixed2* __restrict__ chainData, const mixed * __restrict__ energySum, mixed* __restrict__ scaleFactor,
extern "C" __global__ void propagateNoseHooverChain(mixed2* __restrict__ chainData, const mixed2 * __restrict__ energySum, mixed2* __restrict__ scaleFactor,
mixed* __restrict__ chainMasses, mixed* __restrict__ chainForces,
int chainLength, int numMTS, int numDOFs, float timeStep,
int chainType, int chainLength, int numMTS, int numDOFs, float timeStep,
mixed kT, float frequency){
mixed &scale = scaleFactor[0];
const mixed & kineticEnergy = chainType ? energySum[0].y : energySum[0].x;
mixed &scale = chainType ? scaleFactor[0].y : scaleFactor[0].x;
scale = (mixed) 1;
const mixed & kineticEnergy = energySum[0];
if(kineticEnergy < 1e-8) return;
for (int bead = 0; bead < chainLength; ++bead) chainMasses[bead] = kT / (frequency * frequency);
chainMasses[0] *= numDOFs;
......@@ -49,11 +49,10 @@ extern "C" __global__ void propagateNoseHooverChain(mixed2* __restrict__ chainDa
/**
* Compute total (potential + kinetic) energy of the Nose-Hoover beads
*/
extern "C" __global__ void computeHeatBathEnergy(mixed* __restrict__ heatBathEnergy, int chainLength, int numDOFs,
extern "C" __global__ void computeHeatBathEnergy(mixed* __restrict__ heatBathEnergy, int chainLength, int numDOFs,
mixed kT, float frequency, const mixed2* __restrict__ chainData){
// Note that this is always incremented; make sure it's zeroed properly before the first call
mixed &energy = heatBathEnergy[0];
energy = (mixed) 0;
for(int i = 0; i < chainLength; ++i) {
mixed prefac = i ? 1 : numDOFs;
......@@ -67,35 +66,105 @@ extern "C" __global__ void computeHeatBathEnergy(mixed* __restrict__ heatBathEne
}
}
extern "C" __global__ void computeMaskedKineticEnergy(mixed * __restrict__ energyBuffer, int paddedNumAtoms,
const mixed4* __restrict__ velm, const int *__restrict__ mask){
mixed energy = 0;
extern "C" __global__ void computeAtomsKineticEnergy(mixed2 * __restrict__ energyBuffer, int numAtoms,
const mixed4* __restrict__ velm, const int *__restrict__ atoms){
mixed2 energy = make_mixed2(0,0);
//energy = 1; return;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < paddedNumAtoms; index += blockDim.x*gridDim.x) {
mixed4 v = velm[index];
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
int atom = atoms[index];
mixed4 v = velm[atom];
mixed mass = v.w == 0 ? 0 : 1 / v.w;
if (mask[index] >= 0){
const mixed4& vparent = velm[mask[index]];
mixed massp = vparent.w == 0 ? 0 : 1/vparent.w;
mass = (massp + mass) == 0 ? 0 : (massp * mass) / ( massp + mass );
v.x -= vparent.x;
v.y -= vparent.y;
v.z -= vparent.z;
}
energy += 0.5f * mass * (v.x*v.x + v.y*v.y + v.z*v.z);
energy.x += 0.5f * mass * (v.x*v.x + v.y*v.y + v.z*v.z);
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] = energy;
}
extern "C" __global__ void scaleVelocities(mixed * __restrict__ scaleFactor, int paddedNumAtoms,
mixed4* __restrict__ velm, const int *__restrict__ mask){
const mixed &scale = scaleFactor[0];
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < paddedNumAtoms; index += blockDim.x*gridDim.x) {
mixed4 &v = velm[index];
mixed4 vparent = mask[index] >= 0 ? velm[mask[index]] : make_mixed4(0.0f, 0.0f, 0.0f, 0.0f);
mixed maskedScale = mask[index] == index ? 1 : scale;
v.x = vparent.x + maskedScale * (v.x - vparent.x);
v.y = vparent.y + maskedScale * (v.y - vparent.y);
v.z = vparent.z + maskedScale * (v.z - vparent.z);
extern "C" __global__ void computePairsKineticEnergy(mixed2 * __restrict__ energyBuffer, int numPairs,
const mixed4* __restrict__ velm, const int2 *__restrict__ pairs){
mixed2 energy = make_mixed2(0,0);
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numPairs; index += blockDim.x*gridDim.x) {
int2 pair = pairs[index];
int atom1 = pair.x;
int atom2 = pair.y;
mixed4 v1 = velm[atom1];
mixed4 v2 = velm[atom2];
mixed m1 = v1.w == 0 ? 0 : 1 / v1.w;
mixed m2 = v2.w == 0 ? 0 : 1 / v2.w;
mixed4 cv;
cv.x = (m1*v1.x + m2*v2.x) / (m1 + m2);
cv.y = (m1*v1.y + m2*v2.y) / (m1 + m2);
cv.z = (m1*v1.z + m2*v2.z) / (m1 + m2);
mixed4 rv;
rv.x = v2.x - v1.x;
rv.y = v2.y - v1.y;
rv.z = v2.z - v1.z;
energy.x += 0.5f * (m1 + m2) * (cv.x*cv.x + cv.y*cv.y + cv.z*cv.z);
energy.y += 0.5f * (m1 * m2 / (m1 + m2)) * (rv.x*rv.x + rv.y*rv.y + rv.z*rv.z);
}
// The atoms version of this has been called already, so accumulate instead of assigning here
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x].x += energy.x;
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x].y += energy.y;
}
extern "C" __global__ void scaleAtomsVelocities(mixed2* __restrict__ scaleFactor, int numAtoms,
mixed4* __restrict__ velm, const int *__restrict__ atoms){
const mixed &scale = scaleFactor[0].x;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
int atom = atoms[index];
mixed4 &v = velm[atom];
v.x *= scale;
v.y *= scale;
v.z *= scale;
}
}
extern "C" __global__ void scalePairsVelocities(mixed2 * __restrict__ scaleFactor, int numPairs,
mixed4* __restrict__ velm, const int2 *__restrict__ pairs){
const mixed &absScale = scaleFactor[0].x;
const mixed &relScale = scaleFactor[0].y;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numPairs; index += blockDim.x*gridDim.x) {
int atom1 = pairs[index].x;
int atom2 = pairs[index].y;
mixed4 &v1 = velm[atom1];
mixed4 &v2 = velm[atom2];
mixed m1 = v1.w == 0 ? 0 : 1 / v1.w;
mixed m2 = v2.w == 0 ? 0 : 1 / v2.w;
mixed4 cv;
cv.x = (m1*v1.x + m2*v2.x) / (m1 + m2);
cv.y = (m1*v1.y + m2*v2.y) / (m1 + m2);
cv.z = (m1*v1.z + m2*v2.z) / (m1 + m2);
mixed4 rv;
rv.x = v2.x - v1.x;
rv.y = v2.y - v1.y;
rv.z = v2.z - v1.z;
v1.x = absScale * cv.x - relScale * rv.x * m2 / (m1 + m2);
v1.y = absScale * cv.y - relScale * rv.y * m2 / (m1 + m2);
v1.z = absScale * cv.z - relScale * rv.z * m2 / (m1 + m2);
v2.x = absScale * cv.x + relScale * rv.x * m1 / (m1 + m2);
v2.y = absScale * cv.y + relScale * rv.y * m1 / (m1 + m2);
v2.z = absScale * cv.z + relScale * rv.z * m1 / (m1 + m2);
}
}
/**
* Sum the energy buffer containing a pair of energies stored as mixed2. This is copied from utilities.cu with small modifications
*/
extern "C" __global__ void reduceEnergyPair(const mixed2* __restrict__ energyBuffer, mixed2* __restrict__ result, int bufferSize, int workGroupSize) {
__shared__ mixed2 tempBuffer[WORK_GROUP_SIZE];
const unsigned int thread = threadIdx.x;
mixed2 sum = make_mixed2(0,0);
for (unsigned int idx = thread; idx < bufferSize; idx += blockDim.x) {
sum.x += energyBuffer[idx].x;
sum.y += energyBuffer[idx].y;
}
tempBuffer[thread] = sum;
for (int i = 1; i < workGroupSize; i *= 2) {
__syncthreads();
if (thread%(i*2) == 0 && thread+i < workGroupSize) {
tempBuffer[thread].x += tempBuffer[thread+i].x;
tempBuffer[thread].y += tempBuffer[thread+i].y;
}
}
if (thread == 0)
*result = tempBuffer[0];
}
......@@ -99,4 +99,4 @@ __global__ void setCharges(real* __restrict__ charges, real4* __restrict__ posq,
for (int i = blockDim.x*blockIdx.x+threadIdx.x; i < numAtoms; i += blockDim.x*gridDim.x)
posq[i].w = charges[atomOrder[i]];
}
}
\ No newline at end of file
}
......@@ -2477,54 +2477,58 @@ std::pair<double, double> ReferenceNoseHooverChainKernel::propagateChain(Context
// Get the variables describing the NHC
int chainLength = nhc.getDefaultChainLength();
int chainID = nhc.getDefaultChainID();
double temperature = nhc.getDefaultTemperature();
double collisionFrequency = nhc.getDefaultCollisionFrequency();
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
int numMTS = nhc.getDefaultNumMultiTimeSteps();
// Get the state of the NHC from the context
auto& allChainPositions = extractNoseHooverPositions(context);
auto& allChainVelocities = extractNoseHooverVelocities(context);
if (allChainPositions.size() < chainID+1){
allChainPositions.resize(chainID+1);
}
if (allChainVelocities.size() < chainID+1){
allChainVelocities.resize(chainID+1);
}
auto& chainPositions = allChainPositions.at(chainID);
auto& chainVelocities = allChainVelocities.at(chainID);
if (chainPositions.size() < chainLength){
chainPositions.resize(chainLength, 0);
}
if (chainVelocities.size() < chainLength){
chainVelocities.resize(chainLength, 0);
int nAtoms = nhc.getThermostatedAtoms().size();
double absScale = 0;
if (nAtoms) {
if (allChainPositions.size() < 2*chainID+1){
allChainPositions.resize(2*chainID+1);
}
if (allChainVelocities.size() < 2*chainID+1){
allChainVelocities.resize(2*chainID+1);
}
auto& chainPositions = allChainPositions.at(2*chainID);
auto& chainVelocities = allChainVelocities.at(2*chainID);
if (chainPositions.size() < chainLength){
chainPositions.resize(chainLength, 0);
}
if (chainVelocities.size() < chainLength){
chainVelocities.resize(chainLength, 0);
}
double temperature = nhc.getDefaultTemperature();
double collisionFrequency = nhc.getDefaultCollisionFrequency();
absScale = chainPropagator->propagate(absKE, chainVelocities, chainPositions, numDOFs,
temperature, collisionFrequency, timeStep,
numMTS, nhc.getDefaultYoshidaSuzukiWeights());
}
double absScale = chainPropagator->propagate(absKE, chainVelocities, chainPositions, numDOFs,
temperature, collisionFrequency, timeStep,
numMTS, nhc.getDefaultYoshidaSuzukiWeights());
double relScale = 0;
int numPairs = nhc.getThermostatedPairs().size();
if(numPairs){
double relativeTemperature = nhc.getDefaultRelativeTemperature();
double relativeCollisionFrequency = nhc.getDefaultRelativeCollisionFrequency();
if (allChainPositions.size() < chainID+2){
allChainPositions.resize(chainID+2);
int nPairs = nhc.getThermostatedPairs().size();
if (nPairs) {
if (allChainPositions.size() < 2*chainID+2){
allChainPositions.resize(2*chainID+2);
}
if (allChainVelocities.size() < chainID+2){
allChainVelocities.resize(chainID+2);
if (allChainVelocities.size() < 2*chainID+2){
allChainVelocities.resize(2*chainID+2);
}
auto& chainPositions = allChainPositions.at(chainID+1);
auto& chainVelocities = allChainVelocities.at(chainID+1);
auto& chainPositions = allChainPositions.at(2*chainID+1);
auto& chainVelocities = allChainVelocities.at(2*chainID+1);
if (chainPositions.size() < chainLength){
chainPositions.resize(chainLength, 0);
}
if (chainVelocities.size() < chainLength){
chainVelocities.resize(chainLength, 0);
}
relScale = chainPropagator->propagate(relKE, chainVelocities, chainPositions, 3*numPairs,
relativeTemperature, relativeCollisionFrequency, timeStep,
double temperature = nhc.getDefaultRelativeTemperature();
double collisionFrequency = nhc.getDefaultRelativeCollisionFrequency();
relScale = chainPropagator->propagate(relKE, chainVelocities, chainPositions, 3*nPairs,
temperature, collisionFrequency, timeStep,
numMTS, nhc.getDefaultYoshidaSuzukiWeights());
}
return {absScale, relScale};
}
......@@ -2534,21 +2538,41 @@ double ReferenceNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& contex
double kineticEnergy = 0;
int chainLength = nhc.getDefaultChainLength();
int chainID = nhc.getDefaultChainID();
double temperature = nhc.getDefaultTemperature();
double collisionFrequency = nhc.getDefaultCollisionFrequency();
double kT = temperature * BOLTZ;
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
int nAtoms = nhc.getThermostatedAtoms().size();
int nPairs = nhc.getThermostatedPairs().size();
auto& nhcPositions = extractNoseHooverPositions(context);
auto& nhcVelocities = extractNoseHooverVelocities(context);
for(int i = 0; i < chainLength; ++i) {
double prefac = i ? 1 : numDOFs;
double mass = prefac * kT / (collisionFrequency * collisionFrequency);
double velocity = nhcVelocities[chainID][i];
// The kinetic energy of this bead
kineticEnergy += 0.5 * mass * velocity * velocity;
// The potential energy of this bead
double position = nhcPositions[chainID][i];
potentialEnergy += prefac * kT * position;
if (nAtoms) {
double temperature = nhc.getDefaultTemperature();
double collisionFrequency = nhc.getDefaultCollisionFrequency();
double kT = temperature * BOLTZ;
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
for(int i = 0; i < chainLength; ++i) {
double prefac = i ? 1 : numDOFs;
double mass = prefac * kT / (collisionFrequency * collisionFrequency);
double velocity = nhcVelocities[2*chainID][i];
// The kinetic energy of this bead
kineticEnergy += 0.5 * mass * velocity * velocity;
// The potential energy of this bead
double position = nhcPositions[2*chainID][i];
potentialEnergy += prefac * kT * position;
}
}
if (nPairs) {
double temperature = nhc.getDefaultRelativeTemperature();
double collisionFrequency = nhc.getDefaultRelativeCollisionFrequency();
double kT = temperature * BOLTZ;
int numDOFs = 3 * nPairs;
for(int i = 0; i < chainLength; ++i) {
double prefac = i ? 1 : numDOFs;
double mass = prefac * kT / (collisionFrequency * collisionFrequency);
double velocity = nhcVelocities[2*chainID+1][i];
// The kinetic energy of this bead
kineticEnergy += 0.5 * mass * velocity * velocity;
// The potential energy of this bead
double position = nhcPositions[2*chainID+1][i];
potentialEnergy += prefac * kT * position;
}
}
return kineticEnergy + potentialEnergy;
}
......
......@@ -117,8 +117,8 @@ void testWaterBox(Platform& platform){
int chainLength = 4;
int numMTS = 3;
int numYS = 3;
double frequency = 50.0;
double frequencyDrude = 25.0;
double frequency = 100.0;
double frequencyDrude = 80.0;
int randomSeed = 100;
DrudeNoseHooverIntegrator integ(0.0005, system, temperature, temperatureDrude,
frequency, frequencyDrude, chainLength, numMTS, numYS);;
......@@ -138,12 +138,12 @@ void testWaterBox(Platform& platform){
context.applyConstraints(1e-6);
// Equilibrate.
integ.step(5000);
integ.step(800);
// Compute the internal and center of mass temperatures.
double totalKE = 0;
int numSteps = 100000;
const int numSteps = 800;
double meanTemp = 0.0;
double meanDrudeTemp = 0.0;
double meanConserved = 0.0;
......@@ -154,7 +154,6 @@ void testWaterBox(Platform& platform){
double PE = state.getPotentialEnergy();
double fullKE = integ.computeTotalKineticEnergy();
double drudeKE = integ.computeDrudeKineticEnergy();
KE = fullKE - drudeKE;
double temp = KE/(0.5*numStandardDof*BOLTZ);
double drudeTemp = drudeKE/(0.5*numDrudeDof*BOLTZ);
meanTemp = (i*meanTemp + temp)/(i+1);
......@@ -162,7 +161,7 @@ void testWaterBox(Platform& platform){
double heatBathEnergy = integ.computeHeatBathEnergy();
double conserved = PE + fullKE + heatBathEnergy;
meanConserved = (i*meanConserved + conserved)/(i+1);
#define DEBUG 1
#define DEBUG 0
#if DEBUG
if(i%10 == 0)
std::cout << std::setw(6) << i
......@@ -170,15 +169,17 @@ void testWaterBox(Platform& platform){
<< std::setprecision(8) << std::setw(16) << drudeKE
<< std::setprecision(8) << std::setw(16) << meanTemp
<< std::setprecision(8) << std::setw(16) << meanDrudeTemp
<< std::setprecision(8) << std::setw(16) << heatBathEnergy
<< std::setprecision(8) << std::setw(16) << fullKE
<< std::setprecision(8) << std::setw(16) << conserved
<< std::endl;
#endif
totalKE += KE;
// ASSERT(fabs(meanConserved - conserved) < 0.5);
ASSERT(fabs(meanConserved - conserved) < 0.2);
}
totalKE /= numSteps;
// ASSERT_USUALLY_EQUAL_TOL(temperature, meanTemp, 0.02);
// ASSERT_USUALLY_EQUAL_TOL(temperatureDrude, meanDrudeTemp, 0.10);
ASSERT_USUALLY_EQUAL_TOL(temperature, meanTemp, 0.03);
ASSERT_USUALLY_EQUAL_TOL(temperatureDrude, meanDrudeTemp, 0.03);
}
int main(int argc, char* argv[]) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment