Unverified Commit 799d4b1f authored by Andy Simmonett's avatar Andy Simmonett
Browse files

Nose-Hoover chain propagation works on CUDA

parent 6278ef5f
...@@ -1762,7 +1762,9 @@ public: ...@@ -1762,7 +1762,9 @@ public:
private: private:
CudaContext& cu; CudaContext& cu;
CudaArray scaleFactor, chainMasses, chainForces, heatBathEnergy;
std::map<int, CUfunction> propagateKernels; std::map<int, CUfunction> propagateKernels;
CUfunction computeHeatBathEnergyKernel;
}; };
/** /**
......
...@@ -8353,28 +8353,145 @@ void CudaNoseHooverChainKernel::initialize() { ...@@ -8353,28 +8353,145 @@ void CudaNoseHooverChainKernel::initialize() {
cu.setAsCurrent(); cu.setAsCurrent();
map<string, string> defines; map<string, string> defines;
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {1} {"; defines["BEGIN_YS_LOOP"] = "for(const real & ys : {1}) {";
defines["END_YS_LOOP"] = "}"; defines["END_YS_LOOP"] = "}";
CUmodule module = cu.createModule(CudaKernelSources::noseHooverChain, defines, ""); CUmodule module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "-std=c++11");
propagateKernels[1] = cu.getKernel(module, "propagateNoseHooverChain"); propagateKernels[1] = cu.getKernel(module, "propagateNoseHooverChain");
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {0.828981543588751, -0.657963087177502, 0.828981543588751} {"; defines["BEGIN_YS_LOOP"] = "for(const real & ys : {0.828981543588751, -0.657963087177502, 0.828981543588751}){";
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, ""); module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "-std=c++11");
propagateKernels[3] = cu.getKernel(module, "propagateNoseHooverChain"); propagateKernels[3] = cu.getKernel(module, "propagateNoseHooverChain");
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065} {"; defines["BEGIN_YS_LOOP"] = "for(const real & ys : {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}){";
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, ""); module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "-std=c++11");
propagateKernels[5] = cu.getKernel(module, "propagateNoseHooverChain"); propagateKernels[5] = cu.getKernel(module, "propagateNoseHooverChain");
computeHeatBathEnergyKernel = cu.getKernel(module, "computeHeatBathEnergy");
} }
double CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, double kineticEnergy, double timeStep) { double CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, double kineticEnergy, double timeStep) {
cu.setAsCurrent(); cu.setAsCurrent();
return 1; bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
int chainID = nhc.getDefaultChainID();
int chainLength = nhc.getDefaultChainLength();
int numYS = nhc.getDefaultNumYoshidaSuzukiTimeSteps();
int numMTS = nhc.getDefaultNumMultiTimeSteps();
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
double temperature = nhc.getDefaultTemperature();
double frequency = nhc.getDefaultCollisionFrequency();
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState();
if (chainID >= chainState.size()) chainState.resize(chainID+1);
if (!chainState[chainID].isInitialized() || chainState[chainID].getSize() != chainLength) {
// We need to upload the CUDA array
if(useDouble){
chainState[chainID].initialize<double2>(cu, chainLength, "chainState" + std::to_string(chainID));
std::vector<double2> zeros(chainLength, {0.0, 0.0});
chainState[chainID].upload(zeros);
} else {
chainState[chainID].initialize<float2>(cu, chainLength, "chainState" + std::to_string(chainID));
std::vector<float2> zeros(chainLength, {0.0, 0.0});
chainState[chainID].upload(zeros);
}
}
if (!scaleFactor.isInitialized() ||scaleFactor.getSize() == 0) {
if(useDouble){
std::vector<double> one(1);
scaleFactor.initialize<double>(cu, 1, "scaleFactor");
scaleFactor.upload(one);
} else {
std::vector<float> one(1);
scaleFactor.initialize<float>(cu, 1, "scaleFactor");
scaleFactor.upload(one);
}
}
if (!chainForces.isInitialized() || !chainMasses.isInitialized() || chainForces.getSize() < chainLength || chainMasses.getSize() < chainLength) {
if(useDouble){
std::vector<double> zeros(chainLength,0);
chainMasses.initialize<double>(cu, chainLength, "chainMasses");
chainForces.initialize<double>(cu, chainLength, "chainForces");
chainMasses.upload(zeros);
chainForces.upload(zeros);
} else {
std::vector<float> zeros(chainLength,0);
chainMasses.initialize<float>(cu, chainLength, "chainMasses");
chainForces.initialize<float>(cu, chainLength, "chainForces");
chainMasses.upload(zeros);
chainForces.upload(zeros);
}
}
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float timeStepFloat = (float) timeStep;
float frequencyFloat = (float) frequency;
float kineticEnergyFloat = (float) kineticEnergy;
void *args[] = {&scaleFactor.getDevicePointer(),
&chainMasses.getDevicePointer(),
&chainForces.getDevicePointer(),
&chainLength, &numMTS, &numDOFs,
&timeStepFloat,
useDouble ? (void*) &kT : (void*) &kTfloat,
&frequencyFloat,
useDouble ? (void*) &kineticEnergy : (void*) &kineticEnergyFloat, &chainState[chainID].getDevicePointer()};
if (numYS == 1 || numYS == 3 || numYS == 5) {
cu.executeKernel(propagateKernels[numYS], args, 1);
} else {
throw OpenMMException("Number of Yoshida Suzuki time steps has to be 1, 3, or 5.");
}
return 0;
} }
double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc) { double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc) {
cu.setAsCurrent(); cu.setAsCurrent();
return 1;
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
int chainID = nhc.getDefaultChainID();
int chainLength = nhc.getDefaultChainLength();
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
double temperature = nhc.getDefaultTemperature();
double frequency = nhc.getDefaultCollisionFrequency();
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState();
if (chainID >= chainState.size() || !chainState[chainID].isInitialized() || chainState[chainID].getSize() != chainLength) {
return 0.0;
}
if (!heatBathEnergy.isInitialized() || heatBathEnergy.getSize() == 0) {
if(useDouble){
std::vector<double> one(1);
heatBathEnergy.initialize<double>(cu, 1, "heatBathEnergy");
heatBathEnergy.upload(one);
} else {
std::vector<float> one(1);
heatBathEnergy.initialize<float>(cu, 1, "heatBathEnergy");
heatBathEnergy.upload(one);
}
}
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
void * args[] = {&heatBathEnergy.getDevicePointer(), &chainLength, &numDOFs,
useDouble ? (void*) &kT : (void*) & kTfloat,
&frequencyFloat, &chainState[chainID].getDevicePointer()};
cu.executeKernel(computeHeatBathEnergyKernel, args, 1);
if (useDouble){
std::vector<double> energy(1);
heatBathEnergy.download(energy);
return energy[0];
} else {
std::vector<float> energy(1);
heatBathEnergy.download(energy);
return (double) energy[0];
}
} }
double CudaNoseHooverChainKernel::computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain) { double CudaNoseHooverChainKernel::computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain) {
......
#include <initializer_list>
/**
* Propagate the Nose-Hoover chain with one yoshida-suzuki term
*/
extern "C" __global__ void propagateNoseHooverChain(real* __restrict__ scaleFactor, real* __restrict__ chainMasses, real* __restrict__ chainForces,
int chainLength, int numMTS, int numDOFs, float timeStep,
real kT, float frequency, real kineticEnergy, real2* __restrict__ chainData){
real &scale = scaleFactor[0];
scale = (real) 1;
for (int bead = 0; bead < chainLength; ++bead) chainMasses[bead] = kT / (frequency * frequency);
chainMasses[0] *= numDOFs;
real KE2 = 2.0f * kineticEnergy;
real timeOverMTS = timeStep / numMTS;
chainForces[0] = (KE2 - numDOFs * kT) / chainMasses[0];
for (int bead = 0; bead < chainLength - 1; ++bead) {
chainForces[bead + 1] = (chainMasses[bead] * chainData[bead].y * chainData[bead].y - kT) / chainMasses[bead + 1];
}
for (int mts = 0; mts < numMTS; ++mts) {
BEGIN_YS_LOOP
real wdt = ys * timeOverMTS;
chainData[chainLength-1].y += 0.25f * wdt * chainForces[chainLength-1];
for (int bead = chainLength - 2; bead >= 0; --bead) {
real aa = exp(-0.125f * wdt * chainData[bead + 1].y);
chainData[bead].y = aa * (chainData[bead].y * aa + 0.25f * wdt * chainForces[bead]);
}
// update particle velocities
real aa = exp(-0.5f * wdt * chainData[0].y);
scale *= aa;
// update the thermostat positions
for (int bead = 0; bead < chainLength; ++bead) {
chainData[bead].x += 0.5f * chainData[bead].y * wdt;
}
// update the forces
chainForces[0] = (scale * scale * KE2 - numDOFs * kT) / chainMasses[0];
// update thermostat velocities
for (int bead = 0; bead < chainLength - 1; ++bead) {
real aa = exp(-0.125f * wdt * chainData[bead + 1].y);
chainData[bead].y = aa * (aa * chainData[bead].y + 0.25f * wdt * chainForces[bead]);
chainForces[bead + 1] = (chainMasses[bead] * chainData[bead].y * chainData[bead].y - kT) / chainMasses[bead + 1];
}
chainData[chainLength-1].y += 0.25f * wdt * chainForces[chainLength-1];
END_YS_LOOP
} // MTS loop
//printf("SCALE %f\n", scale);
}
/**
* Compute total (potential + kinetic) energy of the Nose-Hoover beads
*/
extern "C" __global__ void computeHeatBathEnergy(real* __restrict__ heatBathEnergy, int chainLength, int numDOFs,
real kT, float frequency, real2* __restrict__ chainData){
real &energy = heatBathEnergy[0];
energy = (real) 0;
for(int i = 0; i < chainLength; ++i) {
real prefac = i ? 1 : numDOFs;
real mass = prefac * kT / (frequency * frequency);
real velocity = chainData[i].y;
// The kinetic energy of this bead
energy += 0.5f * mass * velocity * velocity;
// The potential energy of this bead
real position = chainData[i].x;
energy += prefac * kT * position;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment