"wrappers/python/vscode:/vscode.git/clone" did not exist on "84132b19f8702c356629a34b236331d77d51dd67"
Unverified Commit ca4c03c3 authored by peastman's avatar peastman Committed by GitHub
Browse files

Merge pull request #2527 from andysim/nhcommon

Convert Nose Hoover integrator to Common plaform
parents d169badc 2270a28a
...@@ -264,6 +264,10 @@ protected: ...@@ -264,6 +264,10 @@ protected:
* Compute the kinetic energy of the system at the current time. * Compute the kinetic energy of the system at the current time.
*/ */
virtual double computeKineticEnergy(); virtual double computeKineticEnergy();
/**
* Computing kinetic energy for this integrator does not require forces.
*/
bool kineticEnergyRequiresForce() const override;
std::vector<NoseHooverChain> noseHooverChains; std::vector<NoseHooverChain> noseHooverChains;
std::vector<int> allAtoms; std::vector<int> allAtoms;
......
...@@ -267,6 +267,7 @@ void NoseHooverIntegrator::setRelativeCollisionFrequency(double frequency, int c ...@@ -267,6 +267,7 @@ void NoseHooverIntegrator::setRelativeCollisionFrequency(double frequency, int c
} }
double NoseHooverIntegrator::computeKineticEnergy() { double NoseHooverIntegrator::computeKineticEnergy() {
forcesAreValid = false;
double kE = 0.0; double kE = 0.0;
if(noseHooverChains.size() > 0) { if(noseHooverChains.size() > 0) {
for (const auto &nhc: noseHooverChains){ for (const auto &nhc: noseHooverChains){
...@@ -278,6 +279,10 @@ double NoseHooverIntegrator::computeKineticEnergy() { ...@@ -278,6 +279,10 @@ double NoseHooverIntegrator::computeKineticEnergy() {
return kE; return kE;
} }
bool NoseHooverIntegrator::kineticEnergyRequiresForce() const {
return false;
}
double NoseHooverIntegrator::computeHeatBathEnergy() { double NoseHooverIntegrator::computeHeatBathEnergy() {
double energy = 0; double energy = 0;
for(auto &nhc : noseHooverChains) { for(auto &nhc : noseHooverChains) {
...@@ -340,7 +345,8 @@ void NoseHooverIntegrator::step(int steps) { ...@@ -340,7 +345,8 @@ void NoseHooverIntegrator::step(int steps) {
throw OpenMMException("This Integrator is not bound to a context!"); throw OpenMMException("This Integrator is not bound to a context!");
std::pair<double, double> scale, kineticEnergy; std::pair<double, double> scale, kineticEnergy;
for (int i = 0; i < steps; ++i) { for (int i = 0; i < steps; ++i) {
context->updateContextState(); if(context->updateContextState())
forcesAreValid = false;
for(auto &nhc : noseHooverChains) { for(auto &nhc : noseHooverChains) {
kineticEnergy = nhcKernel.getAs<NoseHooverChainKernel>().computeMaskedKineticEnergy(*context, nhc, false); kineticEnergy = nhcKernel.getAs<NoseHooverChainKernel>().computeMaskedKineticEnergy(*context, nhc, false);
scale = nhcKernel.getAs<NoseHooverChainKernel>().propagateChain(*context, nhc, kineticEnergy, getStepSize()); scale = nhcKernel.getAs<NoseHooverChainKernel>().propagateChain(*context, nhc, kineticEnergy, getStepSize());
......
...@@ -941,6 +941,116 @@ private: ...@@ -941,6 +941,116 @@ private:
ComputeKernel kernel1, kernel2, kernel3; ComputeKernel kernel1, kernel2, kernel3;
}; };
/*
* This kernel is invoked by NoseHooverIntegrator to take one time step.
*/
class CommonIntegrateVelocityVerletStepKernel : public IntegrateVelocityVerletStepKernel {
public:
CommonIntegrateVelocityVerletStepKernel(std::string name, const Platform& platform, ComputeContext& cc) :
IntegrateVelocityVerletStepKernel(name, platform), cc(cc), hasInitializedKernels(false) { }
~CommonIntegrateVelocityVerletStepKernel() {}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the NoseHooverIntegrator this kernel will be used for
*/
void initialize(const System& system, const NoseHooverIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
* @param forcesAreValid a reference to the parent integrator's boolean for keeping
* track of the validity of the current forces.
*/
void execute(ContextImpl& context, const NoseHooverIntegrator& integrator, bool &forcesAreValid);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the NoseHooverIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const NoseHooverIntegrator& integrator);
private:
ComputeContext& cc;
float prevMaxPairDistance;
ComputeArray maxPairDistanceBuffer, pairListBuffer, atomListBuffer, pairTemperatureBuffer;
ComputeKernel kernel1, kernel2, kernel3, kernelHardWall;
bool hasInitializedKernels;
};
/**
* This kernel is invoked by NoseHooverChain at the start of each time step to adjust the thermostat
* and update the associated particle velocities.
*/
class CommonNoseHooverChainKernel : public NoseHooverChainKernel {
public:
CommonNoseHooverChainKernel(std::string name, const Platform& platform, ComputeContext& cc) :
NoseHooverChainKernel(name, platform), cc(cc), hasInitializedPropagateKernel(false),
hasInitializedKineticEnergyKernel(false), hasInitializedHeatBathEnergyKernel(false),
hasInitializedScaleVelocitiesKernel(false) {}
~CommonNoseHooverChainKernel() {}
/**
* Initialize the kernel.
*/
void initialize();
/**
* Execute the kernel that propagates the Nose Hoover chain and determines the velocity scale factor.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the object describing the chain to be propagated.
* @param kineticEnergies the {absolute, relative} kineticEnergy of the particles being thermostated by this chain.
* @param timeStep the time step used by the integrator.
* @return the {absolute, relative} velocity scale factor to apply to the particles associated with this heat bath.
*/
std::pair<double, double> propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep);
/**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @return the total heat bath energy.
*/
double computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc);
/**
* Execute the kernel that computes the kinetic energy for a subset of atoms,
* or the relative kinetic energy of Drude particles with respect to their parent atoms
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param downloadValue whether the computed value should be downloaded and returned.
*
*/
std::pair<double,double> computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain, bool downloadValue);
/**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactors the {absolute, relative} multiplicative factor by which velocities are scaled.
*/
void scaleVelocities(ContextImpl& context, const NoseHooverChain &noseHooverChain, std::pair<double, double> scaleFactors);
private:
int sumWorkGroupSize;
ComputeContext& cc;
ComputeArray energyBuffer, scaleFactorBuffer, kineticEnergyBuffer, chainMasses, chainForces, heatBathEnergy;
std::map<int, ComputeArray> atomlists, pairlists;
std::map<int, ComputeKernel> propagateKernels;
bool hasInitializedPropagateKernel;
bool hasInitializedKineticEnergyKernel;
bool hasInitializedHeatBathEnergyKernel;
bool hasInitializedScaleVelocitiesKernel;
ComputeKernel reduceEnergyKernel;
ComputeKernel computeHeatBathEnergyKernel;
ComputeKernel computeAtomsKineticEnergyKernel;
ComputeKernel computePairsKineticEnergyKernel;
ComputeKernel scaleAtomsVelocitiesKernel;
ComputeKernel scalePairsVelocitiesKernel;
};
/** /**
* This kernel is invoked by BrownianIntegrator to take one time step. * This kernel is invoked by BrownianIntegrator to take one time step.
*/ */
......
...@@ -5640,6 +5640,550 @@ double CommonIntegrateLangevinMiddleStepKernel::computeKineticEnergy(ContextImpl ...@@ -5640,6 +5640,550 @@ double CommonIntegrateLangevinMiddleStepKernel::computeKineticEnergy(ContextImpl
return cc.getIntegrationUtilities().computeKineticEnergy(0.0); return cc.getIntegrationUtilities().computeKineticEnergy(0.0);
} }
void CommonIntegrateVelocityVerletStepKernel::initialize(const System& system, const NoseHooverIntegrator& integrator) {
cc.initializeContexts();
map<string, string> defines;
defines["BOLTZ"] = cc.doubleToString(BOLTZ);
ComputeProgram program = cc.compileProgram(CommonKernelSources::velocityVerlet, defines);
kernel1 = program->createKernel("integrateVelocityVerletPart1");
kernel2 = program->createKernel("integrateVelocityVerletPart2");
kernel3 = program->createKernel("integrateVelocityVerletPart3");
kernelHardWall = program->createKernel("integrateVelocityVerletHardWall");
prevMaxPairDistance = -1.0f;
maxPairDistanceBuffer.initialize<float>(cc, 1, "maxPairDistanceBuffer");
}
void CommonIntegrateVelocityVerletStepKernel::execute(ContextImpl& context, const NoseHooverIntegrator& integrator, bool &forcesAreValid) {
IntegrationUtilities& integration = cc.getIntegrationUtilities();
int paddedNumAtoms = cc.getPaddedNumAtoms();
double dt = integrator.getStepSize();
cc.getIntegrationUtilities().setNextStepSize(dt);
// If the atom reordering has occured, the forces from the previous step are permuted and thus invalid.
// They need to be either sorted or recomputed; here we choose the latter.
if (!forcesAreValid || cc.getAtomsWereReordered()) context.calcForcesAndEnergy(true, false);
const auto& atomList = integrator.getAllThermostatedIndividualParticles();
const auto& pairList = integrator.getAllThermostatedPairs();
int numAtoms = atomList.size();
int numPairs = pairList.size();
int numParticles = numAtoms + 2*numPairs;
float maxPairDistance = integrator.getMaximumPairDistance();
// Make sure atom and pair metadata is uploaded and has the correct dimensions
if (prevMaxPairDistance != maxPairDistance) {
std::vector<float> tmp(1, maxPairDistance);
maxPairDistanceBuffer.upload(tmp);
prevMaxPairDistance = maxPairDistance;
}
if (numAtoms !=0 && (!atomListBuffer.isInitialized() || atomListBuffer.getSize() != numAtoms)) {
if (atomListBuffer.isInitialized())
atomListBuffer.resize(atomList.size());
else
atomListBuffer.initialize<int>(cc, atomList.size(), "atomListBuffer");
atomListBuffer.upload(atomList);
}
if (numPairs !=0 && (!pairListBuffer.isInitialized() || pairListBuffer.getSize() != numPairs)) {
if (pairListBuffer.isInitialized()) {
pairListBuffer.resize(pairList.size());
pairTemperatureBuffer.resize(pairList.size());
}
else {
pairListBuffer.initialize<mm_int2>(cc, pairList.size(), "pairListBuffer");
pairTemperatureBuffer.initialize<float>(cc, pairList.size(), "pairTemperatureBuffer");
}
std::vector<mm_int2> tmp;
std::vector<float> tmp2;
for(const auto &pair : pairList) {
tmp.push_back(mm_int2(std::get<0>(pair), std::get<1>(pair)));
tmp2.push_back(std::get<2>(pair));
}
pairListBuffer.upload(tmp);
pairTemperatureBuffer.upload(tmp2);
}
if (!hasInitializedKernels) {
hasInitializedKernels = true;
kernel1->addArg(numAtoms);
kernel1->addArg(numPairs);
kernel1->addArg(paddedNumAtoms);
kernel1->addArg(cc.getIntegrationUtilities().getStepSize());
kernel1->addArg(cc.getPosq());
kernel1->addArg(cc.getVelm());
kernel1->addArg(cc.getLongForceBuffer());
kernel1->addArg(integration.getPosDelta());
kernel1->addArg(numAtoms > 0 ? atomListBuffer : cc.getEnergyBuffer()); // The array is not used if num == 0
kernel1->addArg(numPairs > 0 ? pairListBuffer : cc.getEnergyBuffer()); // The array is not used if num == 0
if (cc.getUseMixedPrecision())
kernel1->addArg(cc.getPosqCorrection());
kernel2->addArg(numParticles);
kernel2->addArg(cc.getIntegrationUtilities().getStepSize());
kernel2->addArg(cc.getPosq());
kernel2->addArg(cc.getVelm());
kernel2->addArg(integration.getPosDelta());
if (cc.getUseMixedPrecision())
kernel2->addArg(cc.getPosqCorrection());
if (numPairs > 0) {
kernelHardWall->addArg(numPairs);
kernelHardWall->addArg(maxPairDistanceBuffer);
kernelHardWall->addArg(cc.getIntegrationUtilities().getStepSize());
kernelHardWall->addArg(cc.getPosq());
kernelHardWall->addArg(cc.getVelm());
kernelHardWall->addArg(pairListBuffer);
kernelHardWall->addArg(pairTemperatureBuffer);
if (cc.getUseMixedPrecision())
kernelHardWall->addArg(cc.getPosqCorrection());
}
kernel3->addArg(numAtoms);
kernel3->addArg(numPairs);
kernel3->addArg(paddedNumAtoms);
kernel3->addArg(cc.getIntegrationUtilities().getStepSize());
kernel3->addArg(cc.getPosq());
kernel3->addArg(cc.getVelm());
kernel3->addArg(cc.getLongForceBuffer());
kernel3->addArg(integration.getPosDelta());
kernel3->addArg(numAtoms > 0 ? atomListBuffer : cc.getEnergyBuffer()); // The array is not used if num == 0
kernel3->addArg(numPairs > 0 ? pairListBuffer : cc.getEnergyBuffer()); // The array is not used if num == 0
if (cc.getUseMixedPrecision())
kernel3->addArg(cc.getPosqCorrection());
}
/*
* Carry out the integration
*/
// Advance the velocities a half step
kernel1->execute(std::max(numAtoms, numPairs));
integration.applyConstraints(integrator.getConstraintTolerance());
// Advance particle positions a full step
kernel2->execute(numParticles);
// Make sure any Drude-like particles have not wandered too far from home
if (numPairs > 0) kernelHardWall->execute(numPairs);
integration.computeVirtualSites();
context.calcForcesAndEnergy(true, false);
forcesAreValid = true;
// Update velocities another half step
kernel3->execute(std::max(numAtoms, numPairs));
integration.applyVelocityConstraints(integrator.getConstraintTolerance());
cc.setTime(cc.getTime()+dt);
cc.setStepCount(cc.getStepCount()+1);
cc.reorderAtoms();
}
double CommonIntegrateVelocityVerletStepKernel::computeKineticEnergy(ContextImpl& context, const NoseHooverIntegrator& integrator) {
return cc.getIntegrationUtilities().computeKineticEnergy(0);
}
void CommonNoseHooverChainKernel::initialize() {
bool useDouble = cc.getUseDoublePrecision() || cc.getUseMixedPrecision();
map<string, string> defines;
int workGroupSize = std::min(cc.getMaxThreadBlockSize(), 512);
defines["WORK_GROUP_SIZE"] = std::to_string(workGroupSize);
defines["BEGIN_YS_LOOP"] = "const real arr[1] = {1.0}; for(int i=0;i<1;++i) { const real ys = arr[i];";
defines["END_YS_LOOP"] = "}";
ComputeProgram program = cc.compileProgram(CommonKernelSources::noseHooverChain, defines);
propagateKernels[1] = program->createKernel("propagateNoseHooverChain");
defines["BEGIN_YS_LOOP"] = "const real arr[3] = {0.828981543588751, -0.657963087177502, 0.828981543588751}; for(int i=0;i<3;++i) { const real ys = arr[i];";
program = cc.compileProgram(CommonKernelSources::noseHooverChain, defines);
propagateKernels[3] = program->createKernel("propagateNoseHooverChain");
defines["BEGIN_YS_LOOP"] = "const real arr[5] = {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}; for(int i=0;i<5;++i) { const real ys = arr[i];";
program = cc.compileProgram(CommonKernelSources::noseHooverChain, defines);
propagateKernels[5] = program->createKernel("propagateNoseHooverChain");
program = cc.compileProgram(CommonKernelSources::noseHooverChain, defines);
reduceEnergyKernel = program->createKernel("reduceEnergyPair");
computeHeatBathEnergyKernel = program->createKernel("computeHeatBathEnergy");
computeAtomsKineticEnergyKernel = program->createKernel("computeAtomsKineticEnergy");
computePairsKineticEnergyKernel = program->createKernel("computePairsKineticEnergy");
scaleAtomsVelocitiesKernel = program->createKernel("scaleAtomsVelocities");
scalePairsVelocitiesKernel = program->createKernel("scalePairsVelocities");
int energyBufferSize = cc.getEnergyBuffer().getSize();
if (cc.getUseDoublePrecision() || cc.getUseMixedPrecision())
energyBuffer.initialize<mm_double2>(cc, energyBufferSize, "energyBuffer");
else
energyBuffer.initialize<mm_float2>(cc, energyBufferSize, "energyBuffer");
}
std::pair<double, double> CommonNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep) {
bool useDouble = cc.getUseDoublePrecision() || cc.getUseMixedPrecision();
int chainID = nhc.getChainID();
int nAtoms = nhc.getThermostatedAtoms().size();
int nPairs = nhc.getThermostatedPairs().size();
int chainLength = nhc.getChainLength();
int numYS = nhc.getNumYoshidaSuzukiTimeSteps();
int numMTS = nhc.getNumMultiTimeSteps();
if (numYS != 1 && numYS != 3 && numYS != 5) {
throw OpenMMException("Number of Yoshida Suzuki time steps has to be 1, 3, or 5.");
}
auto & chainState = cc.getIntegrationUtilities().getNoseHooverChainState();
if (!scaleFactorBuffer.isInitialized() || scaleFactorBuffer.getSize() == 0) {
if (useDouble) {
std::vector<mm_double2> zeros{{0,0}};
if (scaleFactorBuffer.isInitialized())
scaleFactorBuffer.resize(1);
else
scaleFactorBuffer.initialize<mm_double2>(cc, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(zeros);
}
else {
std::vector<mm_float2> zeros{{0,0}};
if (scaleFactorBuffer.isInitialized())
scaleFactorBuffer.resize(1);
else
scaleFactorBuffer.initialize<mm_float2>(cc, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(zeros);
}
}
if (!chainForces.isInitialized() || !chainMasses.isInitialized()) {
if (useDouble) {
std::vector<double> zeros(chainLength,0);
if (chainForces.isInitialized()) {
chainMasses.resize(chainLength);
chainForces.resize(chainLength);
}
else {
chainMasses.initialize<double>(cc, chainLength, "chainMasses");
chainForces.initialize<double>(cc, chainLength, "chainForces");
}
chainMasses.upload(zeros);
chainForces.upload(zeros);
}
else {
std::vector<float> zeros(chainLength,0);
if (chainForces.isInitialized()) {
chainMasses.resize(chainLength);
chainForces.resize(chainLength);
}
else {
chainMasses.initialize<float>(cc, chainLength, "chainMasses");
chainForces.initialize<float>(cc, chainLength, "chainForces");
}
chainMasses.upload(zeros);
chainForces.upload(zeros);
}
}
if (chainForces.getSize() < chainLength)
chainForces.resize(chainLength);
if (chainMasses.getSize() < chainLength)
chainMasses.resize(chainLength);
// N.B. We ignore the incoming kineticEnergy and grab it from the device buffer instead
if (nAtoms) {
if (!chainState.count(2*chainID))
chainState[2*chainID] = ComputeArray();
if (!chainState.at(2*chainID).isInitialized() || chainState.at(2*chainID).getSize() != chainLength) {
// We need to upload the Common array
if (useDouble) {
if (chainState.at(2*chainID).isInitialized())
chainState.at(2*chainID).resize(chainLength);
else
chainState.at(2*chainID).initialize<mm_double2>(cc, chainLength, "chainState" + std::to_string(2*chainID));
std::vector<mm_double2> zeros(chainLength, mm_double2(0.0, 0.0));
chainState.at(2*chainID).upload(zeros.data());
}
else {
if (chainState.at(2*chainID).isInitialized())
chainState.at(2*chainID).resize(chainLength);
else
chainState.at(2*chainID).initialize<mm_float2>(cc, chainLength, "chainState" + std::to_string(2*chainID));
std::vector<mm_float2> zeros(chainLength, mm_float2(0.0f, 0.0f));
chainState.at(2*chainID).upload(zeros.data());
}
}
}
if (nPairs) {
if (!chainState.count(2*chainID+1))
chainState[2*chainID+1] = ComputeArray();
if (!chainState.at(2*chainID+1).isInitialized() || chainState.at(2*chainID+1).getSize() != chainLength) {
// We need to upload the Common array
if (useDouble) {
if (chainState.at(2*chainID+1).isInitialized())
chainState.at(2*chainID+1).resize(chainLength);
else
chainState.at(2*chainID+1).initialize<mm_double2>(cc, chainLength, "chainState" + std::to_string(2*chainID+1));
std::vector<mm_double2> zeros(chainLength, mm_double2(0.0, 0.0));
chainState.at(2*chainID+1).upload(zeros.data());
}
else {
if (chainState.at(2*chainID+1).isInitialized())
chainState.at(2*chainID+1).resize(chainLength);
else
chainState.at(2*chainID+1).initialize<mm_float2>(cc, chainLength, "chainState" + std::to_string(2*chainID+1));
std::vector<mm_float2> zeros(chainLength, mm_float2(0.0f, 0.0f));
chainState.at(2*chainID+1).upload(zeros.data());
}
}
}
if (!hasInitializedPropagateKernel) {
hasInitializedPropagateKernel = true;
propagateKernels[numYS]->addArg(); // ChainState
propagateKernels[numYS]->addArg(kineticEnergyBuffer);
propagateKernels[numYS]->addArg(scaleFactorBuffer);
propagateKernels[numYS]->addArg(chainMasses);
propagateKernels[numYS]->addArg(chainForces);
propagateKernels[numYS]->addArg(); // ChainType
propagateKernels[numYS]->addArg(chainLength);
propagateKernels[numYS]->addArg(numMTS);
propagateKernels[numYS]->addArg(); // numDoFs
propagateKernels[numYS]->addArg((float)timeStep);
propagateKernels[numYS]->addArg(); // kT
propagateKernels[numYS]->addArg(); // frequency
}
if (nAtoms) {
int chainType = 0;
double temperature = nhc.getTemperature();
float frequency = nhc.getCollisionFrequency();
double kT = BOLTZ * temperature;
int numDOFs = nhc.getNumDegreesOfFreedom();
propagateKernels[numYS]->setArg(0, chainState[2*chainID]);
propagateKernels[numYS]->setArg(5, chainType);
propagateKernels[numYS]->setArg(8, numDOFs);
if (useDouble) {
propagateKernels[numYS]->setArg(10, kT);
} else {
propagateKernels[numYS]->setArg(10, (float)kT);
}
propagateKernels[numYS]->setArg(11, frequency);
propagateKernels[numYS]->execute(1, 1);
}
if (nPairs) {
int chainType = 1;
double relativeTemperature = nhc.getRelativeTemperature();
float relativeFrequency = nhc.getRelativeCollisionFrequency();
double kT = BOLTZ * relativeTemperature;
int ndf = 3*nPairs;
propagateKernels[numYS]->setArg(0, chainState[2*chainID+1]);
propagateKernels[numYS]->setArg(5, chainType);
propagateKernels[numYS]->setArg(8, ndf);
if (useDouble) {
propagateKernels[numYS]->setArg(10, kT);
} else {
propagateKernels[numYS]->setArg(10, (float)kT);
}
propagateKernels[numYS]->setArg(11, relativeFrequency);
propagateKernels[numYS]->execute(1, 1);
}
return {0, 0};
}
double CommonNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc) {
bool useDouble = cc.getUseDoublePrecision() || cc.getUseMixedPrecision();
int chainID = nhc.getChainID();
int chainLength = nhc.getChainLength();
auto & chainState = cc.getIntegrationUtilities().getNoseHooverChainState();
bool absChainIsValid = chainState.count(2*chainID) != 0 &&
chainState[2*chainID].isInitialized() &&
chainState[2*chainID].getSize() == chainLength;
bool relChainIsValid = chainState.count(2*chainID+1) != 0 &&
chainState[2*chainID+1].isInitialized() &&
chainState[2*chainID+1].getSize() == chainLength;
if (!absChainIsValid && !relChainIsValid) return 0.0;
if (!heatBathEnergy.isInitialized() || heatBathEnergy.getSize() == 0) {
if (useDouble) {
std::vector<double> one(1);
heatBathEnergy.initialize<double>(cc, 1, "heatBathEnergy");
heatBathEnergy.upload(one);
}
else {
std::vector<float> one(1);
heatBathEnergy.initialize<float>(cc, 1, "heatBathEnergy");
heatBathEnergy.upload(one);
}
}
cc.clearBuffer(heatBathEnergy);
if(!hasInitializedHeatBathEnergyKernel) {
hasInitializedHeatBathEnergyKernel = true;
computeHeatBathEnergyKernel->addArg(heatBathEnergy);
computeHeatBathEnergyKernel->addArg(chainLength);
computeHeatBathEnergyKernel->addArg(); // numDOFs
computeHeatBathEnergyKernel->addArg(); // kT
computeHeatBathEnergyKernel->addArg(); // frequency
computeHeatBathEnergyKernel->addArg(); // chainstate
}
if (absChainIsValid) {
int numDOFs = nhc.getNumDegreesOfFreedom();
double temperature = nhc.getTemperature();
float frequency = nhc.getCollisionFrequency();
double kT = BOLTZ * temperature;
computeHeatBathEnergyKernel->setArg(2, numDOFs);
if (useDouble) {
computeHeatBathEnergyKernel->setArg(3, kT);
} else {
computeHeatBathEnergyKernel->setArg(3, (float)kT);
}
computeHeatBathEnergyKernel->setArg(4, frequency);
computeHeatBathEnergyKernel->setArg(5, chainState[2*chainID]);
computeHeatBathEnergyKernel->execute(1, 1);
}
if (relChainIsValid) {
int numDOFs = 3 * nhc.getThermostatedPairs().size();
double temperature = nhc.getRelativeTemperature();
float frequency = nhc.getRelativeCollisionFrequency();
double kT = BOLTZ * temperature;
computeHeatBathEnergyKernel->setArg(2, numDOFs);
if (useDouble) {
computeHeatBathEnergyKernel->setArg(3, kT);
} else {
computeHeatBathEnergyKernel->setArg(3, (float)kT);
}
computeHeatBathEnergyKernel->setArg(4, frequency);
computeHeatBathEnergyKernel->setArg(5, chainState[2*chainID+1]);
computeHeatBathEnergyKernel->execute(1, 1);
}
void * pinnedBuffer = cc.getPinnedBuffer();
heatBathEnergy.download(pinnedBuffer);
if (useDouble)
return *((double*) pinnedBuffer);
else
return *((float*) pinnedBuffer);
}
std::pair<double, double> CommonNoseHooverChainKernel::computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &nhc, bool downloadValue) {
bool useDouble = cc.getUseDoublePrecision() || cc.getUseMixedPrecision();
int chainID = nhc.getChainID();
const auto & nhcAtoms = nhc.getThermostatedAtoms();
const auto & nhcPairs = nhc.getThermostatedPairs();
int nAtoms = nhcAtoms.size();
int nPairs = nhcPairs.size();
if (nAtoms) {
if (!atomlists.count(chainID)) {
// We need to upload the Common array
atomlists[chainID] = ComputeArray();
atomlists[chainID].initialize<int>(cc, nAtoms, "atomlist" + std::to_string(chainID));
atomlists[chainID].upload(nhcAtoms);
}
if (atomlists[chainID].getSize() != nAtoms) {
throw OpenMMException("Number of atoms changed. Cannot be handled by the same Nose-Hoover thermostat.");
}
}
if (nPairs) {
if (!pairlists.count(chainID)) {
// We need to upload the Common array
pairlists[chainID] = ComputeArray();
pairlists[chainID].initialize<mm_int2>(cc, nPairs, "pairlist" + std::to_string(chainID));
std::vector<mm_int2> int2vec;
for(const auto &p : nhcPairs) int2vec.push_back(mm_int2(p.first, p.second));
pairlists[chainID].upload(int2vec);
}
if (pairlists[chainID].getSize() != nPairs) {
throw OpenMMException("Number of thermostated pairs changed. Cannot be handled by the same Nose-Hoover thermostat.");
}
}
if (!kineticEnergyBuffer.isInitialized() || kineticEnergyBuffer.getSize() == 0) {
if (useDouble) {
std::vector<mm_double2> zeros{{0,0}};
kineticEnergyBuffer.initialize<mm_double2>(cc, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(zeros);
}
else {
std::vector<mm_float2> zeros{{0,0}};
kineticEnergyBuffer.initialize<mm_float2>(cc, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(zeros);
}
}
int workGroupSize = std::min(cc.getMaxThreadBlockSize(), 512);
if (!hasInitializedKineticEnergyKernel) {
hasInitializedKineticEnergyKernel = true;
computeAtomsKineticEnergyKernel->addArg(energyBuffer);
computeAtomsKineticEnergyKernel->addArg(); // nAtoms
computeAtomsKineticEnergyKernel->addArg(cc.getVelm());
computeAtomsKineticEnergyKernel->addArg(); // atom list
computePairsKineticEnergyKernel->addArg(energyBuffer);
computePairsKineticEnergyKernel->addArg(); // nPairs
computePairsKineticEnergyKernel->addArg(cc.getVelm());
computePairsKineticEnergyKernel->addArg(); // pair list
reduceEnergyKernel->addArg(energyBuffer);
reduceEnergyKernel->addArg(kineticEnergyBuffer);
reduceEnergyKernel->addArg(energyBuffer.getSize());
}
cc.clearBuffer(energyBuffer);
if (nAtoms) {
computeAtomsKineticEnergyKernel->setArg(1, nAtoms);
computeAtomsKineticEnergyKernel->setArg(3, atomlists[chainID]);
computeAtomsKineticEnergyKernel->execute(nAtoms);
}
if (nPairs) {
computePairsKineticEnergyKernel->setArg(1, nPairs);
computePairsKineticEnergyKernel->setArg(3, pairlists[chainID]);
computePairsKineticEnergyKernel->execute(nPairs);
}
reduceEnergyKernel->execute(workGroupSize, workGroupSize);
std::pair<double, double> KEs = {0, 0};
if (downloadValue) {
if (useDouble) {
mm_double2 tmp;
kineticEnergyBuffer.download(&tmp);
KEs.first = tmp.x;
KEs.second = tmp.y;
}
else {
mm_float2 tmp;
kineticEnergyBuffer.download(&tmp);
KEs.first = tmp.x;
KEs.second = tmp.y;
}
}
return KEs;
}
void CommonNoseHooverChainKernel::scaleVelocities(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> scaleFactor) {
// For now we assume that the atoms and pairs info is valid, because compute{Atoms|Pairs}KineticEnergy must have been
// called before this kernel. If that ever ceases to be true, some sanity checks are needed here.
int chainID = nhc.getChainID();
int nAtoms = nhc.getThermostatedAtoms().size();
int nPairs = nhc.getThermostatedPairs().size();
if (!hasInitializedScaleVelocitiesKernel) {
hasInitializedScaleVelocitiesKernel = true;
scaleAtomsVelocitiesKernel->addArg(scaleFactorBuffer);
scaleAtomsVelocitiesKernel->addArg(); // nAtoms
scaleAtomsVelocitiesKernel->addArg(cc.getVelm());
scaleAtomsVelocitiesKernel->addArg(); // atom list
scalePairsVelocitiesKernel->addArg(scaleFactorBuffer);
scalePairsVelocitiesKernel->addArg(); // nPairs
scalePairsVelocitiesKernel->addArg(cc.getVelm());
scalePairsVelocitiesKernel->addArg(); // pair list
}
if (nAtoms) {
scaleAtomsVelocitiesKernel->setArg(1, nAtoms);
scaleAtomsVelocitiesKernel->setArg(3, atomlists[chainID]);
scaleAtomsVelocitiesKernel->execute(nAtoms);
}
if (nPairs) {
scalePairsVelocitiesKernel->setArg(1, nPairs);
scalePairsVelocitiesKernel->setArg(3, pairlists[chainID]);
scalePairsVelocitiesKernel->execute(nPairs);
}
}
void CommonIntegrateBrownianStepKernel::initialize(const System& system, const BrownianIntegrator& integrator) { void CommonIntegrateBrownianStepKernel::initialize(const System& system, const BrownianIntegrator& integrator) {
cc.initializeContexts(); cc.initializeContexts();
cc.setAsCurrent(); cc.setAsCurrent();
......
KERNEL void propagateNoseHooverChain(GLOBAL mixed2* RESTRICT chainData, GLOBAL const mixed2 * RESTRICT energySum, GLOBAL mixed2* RESTRICT scaleFactor,
//#include <initializer_list> GLOBAL mixed* RESTRICT chainMasses, GLOBAL mixed* RESTRICT chainForces, int chainType, int chainLength, int numMTS,
int numDOFs, float timeStep, mixed kT, float frequency){
__kernel void propagateNoseHooverChain(__global mixed2* restrict chainData, __global const mixed2 * restrict energySum, __global mixed2* restrict scaleFactor,
__global mixed* restrict chainMasses, __global mixed* restrict chainForces,
int chainType, int chainLength, int numMTS, int numDOFs, float timeStep,
mixed kT, float frequency){
const mixed kineticEnergy = chainType == 0 ? energySum[0].x : energySum[0].y; const mixed kineticEnergy = chainType == 0 ? energySum[0].x : energySum[0].y;
mixed scale = 1; mixed scale = 1;
if(kineticEnergy < 1e-8) return; if(kineticEnergy < 1e-8) return;
...@@ -54,10 +50,9 @@ __kernel void propagateNoseHooverChain(__global mixed2* restrict chainData, __gl ...@@ -54,10 +50,9 @@ __kernel void propagateNoseHooverChain(__global mixed2* restrict chainData, __gl
/** /**
* Compute total (potential + kinetic) energy of the Nose-Hoover beads * Compute total (potential + kinetic) energy of the Nose-Hoover beads
*/ */
__kernel void computeHeatBathEnergy(__global mixed* restrict heatBathEnergy, int chainLength, int numDOFs, KERNEL void computeHeatBathEnergy(GLOBAL mixed* RESTRICT heatBathEnergy, int chainLength, int numDOFs,
mixed kT, float frequency, __global const mixed2* restrict chainData){ mixed kT, float frequency, GLOBAL const mixed2* RESTRICT chainData){
// Note that this is always incremented; make sure it's zeroed properly before the first call // Note that this is always incremented; make sure it's zeroed properly before the first call
for(int i = 0; i < chainLength; ++i) { for(int i = 0; i < chainLength; ++i) {
mixed prefac = i ? 1 : numDOFs; mixed prefac = i ? 1 : numDOFs;
mixed mass = prefac * kT / (frequency * frequency); mixed mass = prefac * kT / (frequency * frequency);
...@@ -70,25 +65,24 @@ __kernel void computeHeatBathEnergy(__global mixed* restrict heatBathEnergy, int ...@@ -70,25 +65,24 @@ __kernel void computeHeatBathEnergy(__global mixed* restrict heatBathEnergy, int
} }
} }
__kernel void computeAtomsKineticEnergy(__global mixed2 * restrict energyBuffer, int numAtoms, KERNEL void computeAtomsKineticEnergy(GLOBAL mixed2 * RESTRICT energyBuffer, int numAtoms,
__global const mixed4* restrict velm, __global const int *restrict atoms){ GLOBAL const mixed4* RESTRICT velm, GLOBAL const int *RESTRICT atoms){
mixed2 energy = (mixed2) (0,0); mixed2 energy = make_mixed2(0,0);
//energy = 1; return; int index = GLOBAL_ID;
int index = get_global_id(0);
while (index < numAtoms){ while (index < numAtoms){
int atom = atoms[index]; int atom = atoms[index];
mixed4 v = velm[atom]; mixed4 v = velm[atom];
mixed mass = v.w == 0 ? 0 : 1 / v.w; mixed mass = v.w == 0 ? 0 : 1 / v.w;
energy.x += 0.5f * mass * (v.x*v.x + v.y*v.y + v.z*v.z); energy.x += 0.5f * mass * (v.x*v.x + v.y*v.y + v.z*v.z);
index += get_global_size(0); index += GLOBAL_SIZE;
} }
energyBuffer[get_global_id(0)] = energy; energyBuffer[GLOBAL_ID] = energy;
} }
__kernel void computePairsKineticEnergy(__global mixed2 * restrict energyBuffer, int numPairs, KERNEL void computePairsKineticEnergy(GLOBAL mixed2 * RESTRICT energyBuffer, int numPairs,
__global const mixed4* restrict velm, __global const int2 *restrict pairs){ GLOBAL const mixed4* RESTRICT velm, GLOBAL const int2 *RESTRICT pairs){
mixed2 energy = (mixed2) (0,0); mixed2 energy = make_mixed2(0,0);
int index = get_global_id(0); int index = GLOBAL_ID;
while (index < numPairs){ while (index < numPairs){
int2 pair = pairs[index]; int2 pair = pairs[index];
int atom1 = pair.x; int atom1 = pair.x;
...@@ -107,64 +101,74 @@ __kernel void computePairsKineticEnergy(__global mixed2 * restrict energyBuffer, ...@@ -107,64 +101,74 @@ __kernel void computePairsKineticEnergy(__global mixed2 * restrict energyBuffer,
rv.z = v2.z - v1.z; rv.z = v2.z - v1.z;
energy.x += 0.5f * (m1 + m2) * (cv.x*cv.x + cv.y*cv.y + cv.z*cv.z); energy.x += 0.5f * (m1 + m2) * (cv.x*cv.x + cv.y*cv.y + cv.z*cv.z);
energy.y += 0.5f * (m1 * m2 / (m1 + m2)) * (rv.x*rv.x + rv.y*rv.y + rv.z*rv.z); energy.y += 0.5f * (m1 * m2 / (m1 + m2)) * (rv.x*rv.x + rv.y*rv.y + rv.z*rv.z);
index += get_global_size(0); index += GLOBAL_SIZE;
} }
// The atoms version of this has been called already, so accumulate instead of assigning here // The atoms version of this has been called already, so accumulate instead of assigning here
energyBuffer[get_global_id(0)].xy += energy.xy; energyBuffer[GLOBAL_ID].x += energy.x;
energyBuffer[GLOBAL_ID].y += energy.y;
} }
__kernel void scaleAtomsVelocities(__global mixed2* restrict scaleFactor, int numAtoms, KERNEL void scaleAtomsVelocities(GLOBAL mixed2* RESTRICT scaleFactor, int numAtoms,
__global mixed4* restrict velm, __global const int *restrict atoms){ GLOBAL mixed4* RESTRICT velm, GLOBAL const int *RESTRICT atoms){
const mixed scale = scaleFactor[0].x; const mixed scale = scaleFactor[0].x;
int index = get_global_id(0); int index = GLOBAL_ID;
while (index < numAtoms){ while (index < numAtoms){
int atom = atoms[index]; int atom = atoms[index];
velm[atom].x *= scale; velm[atom].x *= scale;
velm[atom].y *= scale; velm[atom].y *= scale;
velm[atom].z *= scale; velm[atom].z *= scale;
index += get_global_size(0); index += GLOBAL_SIZE;
} }
} }
__kernel void scalePairsVelocities(__global mixed2 * restrict scaleFactor, int numPairs, KERNEL void scalePairsVelocities(GLOBAL mixed2 * RESTRICT scaleFactor, int numPairs,
__global mixed4* restrict velm, __global const int2 *restrict pairs){ GLOBAL mixed4* RESTRICT velm, GLOBAL const int2 *RESTRICT pairs){
int index = get_global_id(0); int index = GLOBAL_ID;
mixed comScale = scaleFactor[0].x;
mixed relScale = scaleFactor[0].y;
while (index < numPairs){ while (index < numPairs){
int atom1 = pairs[index].x; int atom1 = pairs[index].x;
int atom2 = pairs[index].y; int atom2 = pairs[index].y;
mixed m1 = velm[atom1].w == 0 ? 0 : 1 / velm[atom1].w; mixed m1 = velm[atom1].w == 0 ? 0 : 1 / velm[atom1].w;
mixed m2 = velm[atom2].w == 0 ? 0 : 1 / velm[atom2].w; mixed m2 = velm[atom2].w == 0 ? 0 : 1 / velm[atom2].w;
mixed4 cv; mixed4 cv;
cv.xyz = (m1*velm[atom1].xyz + m2*velm[atom2].xyz) / (m1 + m2); cv.x = (m1*velm[atom1].x + m2*velm[atom2].x) / (m1 + m2);
cv.y = (m1*velm[atom1].y + m2*velm[atom2].y) / (m1 + m2);
cv.z = (m1*velm[atom1].z + m2*velm[atom2].z) / (m1 + m2);
mixed4 rv; mixed4 rv;
rv.xyz = velm[atom2].xyz - velm[atom1].xyz; rv.x = velm[atom2].x - velm[atom1].x;
velm[atom1].x = scaleFactor[0].x * cv.x - scaleFactor[0].y * rv.x * m2 / (m1 + m2); rv.y = velm[atom2].y - velm[atom1].y;
velm[atom1].y = scaleFactor[0].x * cv.y - scaleFactor[0].y * rv.y * m2 / (m1 + m2); rv.z = velm[atom2].z - velm[atom1].z;
velm[atom1].z = scaleFactor[0].x * cv.z - scaleFactor[0].y * rv.z * m2 / (m1 + m2); velm[atom1].x = comScale * cv.x - relScale * rv.x * m2 / (m1 + m2);
velm[atom2].x = scaleFactor[0].x * cv.x + scaleFactor[0].y * rv.x * m1 / (m1 + m2); velm[atom1].y = comScale * cv.y - relScale * rv.y * m2 / (m1 + m2);
velm[atom2].y = scaleFactor[0].x * cv.y + scaleFactor[0].y * rv.y * m1 / (m1 + m2); velm[atom1].z = comScale * cv.z - relScale * rv.z * m2 / (m1 + m2);
velm[atom2].z = scaleFactor[0].x * cv.z + scaleFactor[0].y * rv.z * m1 / (m1 + m2); velm[atom2].x = comScale * cv.x + relScale * rv.x * m1 / (m1 + m2);
index += get_global_size(0); velm[atom2].y = comScale * cv.y + relScale * rv.y * m1 / (m1 + m2);
velm[atom2].z = comScale * cv.z + relScale * rv.z * m1 / (m1 + m2);
index += GLOBAL_SIZE;
} }
} }
/** /**
* Sum the energy buffer containing a pair of energies stored as mixed2. This is copied from utilities.cu with small modifications * Sum the energy buffer containing a pair of energies stored as mixed2. This is taken from the analogous customIntegrator code
*/ */
__kernel void reduceEnergyPair(__global const mixed2* restrict energyBuffer, __global mixed2* restrict result, int bufferSize, int workGroupSize, __local mixed2* restrict tempBuffer) { KERNEL void reduceEnergyPair(GLOBAL const mixed2* RESTRICT sumBuffer, GLOBAL mixed2* result, int bufferSize) {
const unsigned int thread = get_local_id(0); LOCAL mixed2 tempBuffer[WORK_GROUP_SIZE];
mixed2 sum = (mixed2) (0,0); const unsigned int thread = LOCAL_ID;
for (unsigned int index = thread; index < bufferSize; index += get_local_size(0)) { mixed2 sum = make_mixed2(0,0);
sum.xy += energyBuffer[index].xy; for (unsigned int index = thread; index < bufferSize; index += LOCAL_SIZE) {
sum.x += sumBuffer[index].x;
sum.y += sumBuffer[index].y;
} }
tempBuffer[thread].xy = sum.xy; tempBuffer[thread].x = sum.x;
for (int i = 1; i < workGroupSize; i *= 2) { tempBuffer[thread].y = sum.y;
barrier(CLK_LOCAL_MEM_FENCE); for (int i = 1; i < WORK_GROUP_SIZE; i *= 2) {
if (thread%(i*2) == 0 && thread+i < workGroupSize) { SYNC_THREADS;
tempBuffer[thread].xy += tempBuffer[thread+i].xy; if (thread%(i*2) == 0 && thread+i < WORK_GROUP_SIZE) {
tempBuffer[thread].x += tempBuffer[thread+i].x;
tempBuffer[thread].y += tempBuffer[thread+i].y;
} }
} }
if (thread == 0) { if (thread == 0)
*result = tempBuffer[0]; *result = tempBuffer[0];
}
} }
/** /**
* Perform the first step of Velocity Verlet integration. * Perform the first step of Velocity Verlet integration.
*
* update displacements (posDelta) and velocities (velm)
*/ */
extern "C" __global__ void integrateVelocityVerletPart1(int numAtoms, int numPairs, int paddedNumAtoms, const mixed2* __restrict__ dt, const real4* __restrict__ posq, KERNEL void integrateVelocityVerletPart1(int numAtoms, int numPairs, int paddedNumAtoms, GLOBAL const mixed2* RESTRICT dt, GLOBAL const real4* RESTRICT posq,
const real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, const long long* __restrict__ force, mixed4* __restrict__ posDelta, GLOBAL mixed4* RESTRICT velm, GLOBAL const mm_long* RESTRICT force, GLOBAL mixed4* RESTRICT posDelta,
const int* __restrict__ atomList, const int2* __restrict__ pairList) { GLOBAL const int* RESTRICT atomList, GLOBAL const int2* RESTRICT pairList
#ifdef USE_MIXED_PRECISION
,GLOBAL const real4* RESTRICT posqCorrection
#endif
){
const mixed2 stepSize = dt[0]; const mixed2 stepSize = dt[0];
const mixed dtPos = stepSize.y; const mixed dtPos = stepSize.y;
const mixed dtVel = 0.5f*(stepSize.x+stepSize.y); const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
const mixed scale = 0.5f*dtVel/(mixed) 0x100000000; const mixed scale = 0.5f * dtVel/(mixed) 0x100000000;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) { int index = GLOBAL_ID;
while (index < numAtoms) {
int atom = atomList[index]; int atom = atomList[index];
mixed4 velocity = velm[atom]; mixed4 velocity = velm[atom];
if (velocity.w != 0.0) { if (velocity.w != 0.0) {
...@@ -31,8 +34,10 @@ extern "C" __global__ void integrateVelocityVerletPart1(int numAtoms, int numPai ...@@ -31,8 +34,10 @@ extern "C" __global__ void integrateVelocityVerletPart1(int numAtoms, int numPai
posDelta[atom] = pos; posDelta[atom] = pos;
velm[atom] = velocity; velm[atom] = velocity;
} }
index += GLOBAL_SIZE;
} }
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numPairs; index += blockDim.x*gridDim.x) { index = GLOBAL_ID;
while (index < numPairs){
int atom1 = pairList[index].x; int atom1 = pairList[index].x;
int atom2 = pairList[index].y; int atom2 = pairList[index].y;
mixed4 v1 = velm[atom1]; mixed4 v1 = velm[atom1];
...@@ -51,21 +56,27 @@ extern "C" __global__ void integrateVelocityVerletPart1(int numAtoms, int numPai ...@@ -51,21 +56,27 @@ extern "C" __global__ void integrateVelocityVerletPart1(int numAtoms, int numPai
relVel.x= v2.x - v1.x; relVel.x= v2.x - v1.x;
relVel.y= v2.y - v1.y; relVel.y= v2.y - v1.y;
relVel.z= v2.z - v1.z; relVel.z= v2.z - v1.z;
//
mixed3 comFrc; mixed3 comFrc;
comFrc.x = force[atom1] + force[atom2]; mixed F1x = scale*force[atom1];
comFrc.y = force[atom1 + paddedNumAtoms] + force[atom2 + paddedNumAtoms]; mixed F1y = scale*force[atom1+paddedNumAtoms];
comFrc.z = force[atom1 + paddedNumAtoms*2] + force[atom2 + paddedNumAtoms*2]; mixed F1z = scale*force[atom1+paddedNumAtoms*2];
mixed F2x = scale*force[atom2];
mixed F2y = scale*force[atom2+paddedNumAtoms];
mixed F2z = scale*force[atom2+paddedNumAtoms*2];
comFrc.x = F1x + F2x;
comFrc.y = F1y + F2y;
comFrc.z = F1z + F2z;
mixed3 relFrc; mixed3 relFrc;
relFrc.x = mass1fract*force[atom2] - mass2fract*force[atom1]; relFrc.x = mass1fract*F2x - mass2fract*F1x;
relFrc.y = mass1fract*force[atom2+paddedNumAtoms] - mass2fract*force[atom1+paddedNumAtoms]; relFrc.y = mass1fract*F2y - mass2fract*F1y;
relFrc.z = mass1fract*force[atom2+paddedNumAtoms*2] - mass2fract*force[atom1+paddedNumAtoms*2]; relFrc.z = mass1fract*F2z - mass2fract*F1z;
comVel.x += comFrc.x * scale * invTotMass; comVel.x += comFrc.x * invTotMass;
comVel.y += comFrc.y * scale * invTotMass; comVel.y += comFrc.y * invTotMass;
comVel.z += comFrc.z * scale * invTotMass; comVel.z += comFrc.z * invTotMass;
relVel.x += relFrc.x * scale * invRedMass; relVel.x += relFrc.x * invRedMass;
relVel.y += relFrc.y * scale * invRedMass; relVel.y += relFrc.y * invRedMass;
relVel.z += relFrc.z * scale * invRedMass; relVel.z += relFrc.z * invRedMass;
#ifdef USE_MIXED_PRECISION #ifdef USE_MIXED_PRECISION
real4 posv1 = posq[atom1]; real4 posv1 = posq[atom1];
real4 posv2 = posq[atom2]; real4 posv2 = posq[atom2];
...@@ -97,22 +108,25 @@ extern "C" __global__ void integrateVelocityVerletPart1(int numAtoms, int numPai ...@@ -97,22 +108,25 @@ extern "C" __global__ void integrateVelocityVerletPart1(int numAtoms, int numPai
posDelta[atom2] = pos2; posDelta[atom2] = pos2;
velm[atom2] = v2; velm[atom2] = v2;
} }
index += GLOBAL_SIZE;
} }
} }
/** /**
* Perform the second step of Velocity Verlet integration. * Perform the second step of Velocity Verlet integration.
*
* apply displacements to positions (posq) after constraints have been enforced
*/ */
extern "C" __global__ void integrateVelocityVerletPart2(int numAtoms, mixed2* __restrict__ dt, real4* __restrict__ posq, KERNEL void integrateVelocityVerletPart2(int numAtoms, GLOBAL mixed2* RESTRICT dt, GLOBAL real4* RESTRICT posq, GLOBAL mixed4* RESTRICT velm,
real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, const mixed4* __restrict__ posDelta) { GLOBAL const mixed4* RESTRICT posDelta
#ifdef USE_MIXED_PRECISION
,GLOBAL real4* RESTRICT posqCorrection
#endif
){
mixed2 stepSize = dt[0]; mixed2 stepSize = dt[0];
int index = blockIdx.x*blockDim.x+threadIdx.x; int index = GLOBAL_ID;
if (index == 0) if (index == 0)
dt[0].x = stepSize.y; dt[0].x = stepSize.y;
for (; index < numAtoms; index += blockDim.x*gridDim.x) { while(index < numAtoms) {
mixed4 velocity = velm[index]; mixed4 velocity = velm[index];
if (velocity.w != 0.0) { if (velocity.w != 0.0) {
#ifdef USE_MIXED_PRECISION #ifdef USE_MIXED_PRECISION
...@@ -133,21 +147,23 @@ extern "C" __global__ void integrateVelocityVerletPart2(int numAtoms, mixed2* __ ...@@ -133,21 +147,23 @@ extern "C" __global__ void integrateVelocityVerletPart2(int numAtoms, mixed2* __
posq[index] = pos; posq[index] = pos;
#endif #endif
} }
index += GLOBAL_SIZE;
} }
} }
/** /**
* Perform the third step of Velocity Verlet integration. * Perform the third step of Velocity Verlet integration.
*
* modify the velocities (velm) after the force update
*/ */
extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPairs, int paddedNumAtoms, mixed2* __restrict__ dt, real4* __restrict__ posq, KERNEL void integrateVelocityVerletPart3(int numAtoms, int numPairs, int paddedNumAtoms, GLOBAL mixed2* RESTRICT dt, GLOBAL real4* RESTRICT posq,
real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed4* __restrict__ posDelta, GLOBAL mixed4* RESTRICT velm, GLOBAL const mm_long* RESTRICT force, GLOBAL const mixed4* RESTRICT posDelta,
const int* __restrict__ atomList, const int2* __restrict__ pairList) { GLOBAL const int* RESTRICT atomList, GLOBAL const int2* RESTRICT pairList
#ifdef USE_MIXED_PRECISION
,GLOBAL const real4* RESTRICT posqCorrection
#endif
){
mixed2 stepSize = dt[0]; mixed2 stepSize = dt[0];
#if __CUDA_ARCH__ >= 130 #ifdef SUPPORTS_DOUBLE_PRECISION
double oneOverDt = 1.0/stepSize.y; double oneOverDt = 1.0/stepSize.y;
#else #else
float oneOverDt = 1.0f/stepSize.y; float oneOverDt = 1.0f/stepSize.y;
...@@ -155,11 +171,10 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai ...@@ -155,11 +171,10 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai
#endif #endif
const mixed dtVel = 0.5f*(stepSize.x+stepSize.y); const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
const mixed scale = 0.5f*dtVel/(mixed) 0x100000000; const mixed scale = 0.5f*dtVel/(mixed) 0x100000000;
int index = blockIdx.x*blockDim.x+threadIdx.x; int index = GLOBAL_ID;
if (index == 0) if (index == 0)
dt[0].x = stepSize.y; dt[0].x = stepSize.y;
while(index < numAtoms) {
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
int atom = atomList[index]; int atom = atomList[index];
mixed4 velocity = velm[atom]; mixed4 velocity = velm[atom];
if (velocity.w != 0.0) { if (velocity.w != 0.0) {
...@@ -167,15 +182,17 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai ...@@ -167,15 +182,17 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai
velocity.x += scale*force[atom]*velocity.w + (deltaXconstrained.x - velocity.x*stepSize.y)*oneOverDt; velocity.x += scale*force[atom]*velocity.w + (deltaXconstrained.x - velocity.x*stepSize.y)*oneOverDt;
velocity.y += scale*force[atom+paddedNumAtoms]*velocity.w + (deltaXconstrained.y - velocity.y*stepSize.y)*oneOverDt; velocity.y += scale*force[atom+paddedNumAtoms]*velocity.w + (deltaXconstrained.y - velocity.y*stepSize.y)*oneOverDt;
velocity.z += scale*force[atom+paddedNumAtoms*2]*velocity.w + (deltaXconstrained.z - velocity.z*stepSize.y)*oneOverDt; velocity.z += scale*force[atom+paddedNumAtoms*2]*velocity.w + (deltaXconstrained.z - velocity.z*stepSize.y)*oneOverDt;
#if __CUDA_ARCH__ < 130 #ifndef SUPPORTS_DOUBLE_PRECISION
velocity.x += (deltaXconstrained.x - velocity.x*stepSize.y)*correction; velocity.x += (deltaXconstrained.x - velocity.x*stepSize.y)*correction;
velocity.y += (deltaXconstrained.y - velocity.y*stepSize.y)*correction; velocity.y += (deltaXconstrained.y - velocity.y*stepSize.y)*correction;
velocity.z += (deltaXconstrained.z - velocity.z*stepSize.y)*correction; velocity.z += (deltaXconstrained.z - velocity.z*stepSize.y)*correction;
#endif #endif
velm[atom] = velocity; velm[atom] = velocity;
} }
index += GLOBAL_SIZE;
} }
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numPairs; index += blockDim.x*gridDim.x) { index = GLOBAL_ID;
while(index < numPairs) {
int atom1 = pairList[index].x; int atom1 = pairList[index].x;
int atom2 = pairList[index].y; int atom2 = pairList[index].y;
mixed4 v1 = velm[atom1]; mixed4 v1 = velm[atom1];
...@@ -194,27 +211,33 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai ...@@ -194,27 +211,33 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai
relVel.x= v2.x - v1.x; relVel.x= v2.x - v1.x;
relVel.y= v2.y - v1.y; relVel.y= v2.y - v1.y;
relVel.z= v2.z - v1.z; relVel.z= v2.z - v1.z;
//
mixed3 comFrc; mixed3 comFrc;
comFrc.x = force[atom1] + force[atom2]; mixed F1x = scale*force[atom1];
comFrc.y = force[atom1 + paddedNumAtoms] + force[atom2 + paddedNumAtoms]; mixed F1y = scale*force[atom1+paddedNumAtoms];
comFrc.z = force[atom1 + paddedNumAtoms*2] + force[atom2 + paddedNumAtoms*2]; mixed F1z = scale*force[atom1+paddedNumAtoms*2];
mixed F2x = scale*force[atom2];
mixed F2y = scale*force[atom2+paddedNumAtoms];
mixed F2z = scale*force[atom2+paddedNumAtoms*2];
comFrc.x = F1x + F2x;
comFrc.y = F1y + F2y;
comFrc.z = F1z + F2z;
mixed3 relFrc; mixed3 relFrc;
relFrc.x = mass1fract*force[atom2] - mass2fract*force[atom1]; relFrc.x = mass1fract*F2x - mass2fract*F1x;
relFrc.y = mass1fract*force[atom2+paddedNumAtoms] - mass2fract*force[atom1+paddedNumAtoms]; relFrc.y = mass1fract*F2y - mass2fract*F1y;
relFrc.z = mass1fract*force[atom2+paddedNumAtoms*2] - mass2fract*force[atom1+paddedNumAtoms*2]; relFrc.z = mass1fract*F2z - mass2fract*F1z;
comVel.x += comFrc.x * scale * invTotMass; comVel.x += comFrc.x * invTotMass;
comVel.y += comFrc.y * scale * invTotMass; comVel.y += comFrc.y * invTotMass;
comVel.z += comFrc.z * scale * invTotMass; comVel.z += comFrc.z * invTotMass;
relVel.x += relFrc.x * scale * invRedMass; relVel.x += relFrc.x * invRedMass;
relVel.y += relFrc.y * scale * invRedMass; relVel.y += relFrc.y * invRedMass;
relVel.z += relFrc.z * scale * invRedMass; relVel.z += relFrc.z * invRedMass;
if (v1.w != 0.0f) { if (v1.w != 0.0f) {
mixed4 deltaXconstrained = posDelta[atom1]; mixed4 deltaXconstrained = posDelta[atom1];
v1.x = comVel.x - relVel.x*mass2fract + (deltaXconstrained.x - v1.x*stepSize.y)*oneOverDt; v1.x = comVel.x - relVel.x*mass2fract + (deltaXconstrained.x - v1.x*stepSize.y)*oneOverDt;
v1.y = comVel.y - relVel.y*mass2fract + (deltaXconstrained.y - v1.y*stepSize.y)*oneOverDt; v1.y = comVel.y - relVel.y*mass2fract + (deltaXconstrained.y - v1.y*stepSize.y)*oneOverDt;
v1.z = comVel.z - relVel.z*mass2fract + (deltaXconstrained.z - v1.z*stepSize.y)*oneOverDt; v1.z = comVel.z - relVel.z*mass2fract + (deltaXconstrained.z - v1.z*stepSize.y)*oneOverDt;
#if __CUDA_ARCH__ < 130 #ifndef SUPPORTS_DOUBLE_PRECISION
v1.x += (deltaXconstrained.x - v1.x*stepSize.y)*correction; v1.x += (deltaXconstrained.x - v1.x*stepSize.y)*correction;
v1.y += (deltaXconstrained.y - v1.y*stepSize.y)*correction; v1.y += (deltaXconstrained.y - v1.y*stepSize.y)*correction;
v1.z += (deltaXconstrained.z - v1.z*stepSize.y)*correction; v1.z += (deltaXconstrained.z - v1.z*stepSize.y)*correction;
...@@ -226,58 +249,57 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai ...@@ -226,58 +249,57 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai
v2.x = comVel.x + relVel.x*mass1fract + (deltaXconstrained.x - v2.x*stepSize.y)*oneOverDt; v2.x = comVel.x + relVel.x*mass1fract + (deltaXconstrained.x - v2.x*stepSize.y)*oneOverDt;
v2.y = comVel.y + relVel.y*mass1fract + (deltaXconstrained.y - v2.y*stepSize.y)*oneOverDt; v2.y = comVel.y + relVel.y*mass1fract + (deltaXconstrained.y - v2.y*stepSize.y)*oneOverDt;
v2.z = comVel.z + relVel.z*mass1fract + (deltaXconstrained.z - v2.z*stepSize.y)*oneOverDt; v2.z = comVel.z + relVel.z*mass1fract + (deltaXconstrained.z - v2.z*stepSize.y)*oneOverDt;
#if __CUDA_ARCH__ < 130 #ifndef SUPPORTS_DOUBLE_PRECISION
v2.x += (deltaXconstrained.x - v2.x*stepSize.y)*correction; v2.x += (deltaXconstrained.x - v2.x*stepSize.y)*correction;
v2.y += (deltaXconstrained.y - v2.y*stepSize.y)*correction; v2.y += (deltaXconstrained.y - v2.y*stepSize.y)*correction;
v2.z += (deltaXconstrained.z - v2.z*stepSize.y)*correction; v2.z += (deltaXconstrained.z - v2.z*stepSize.y)*correction;
#endif #endif
velm[atom2] = v2; velm[atom2] = v2;
} }
index += GLOBAL_SIZE;
} }
} }
KERNEL void integrateVelocityVerletHardWall(int numPairs, GLOBAL const float* RESTRICT maxPairDistance,
GLOBAL mixed2* RESTRICT dt, GLOBAL real4* RESTRICT posq,
GLOBAL mixed4* RESTRICT velm, GLOBAL const int2* RESTRICT pairList,
GLOBAL const float* RESTRICT pairTemperature
#ifdef USE_MIXED_PRECISION
,GLOBAL real4* RESTRICT posqCorrection
#endif
){
/**
* Apply the hard wall constraint
*/
extern "C" __global__ void integrateVelocityVerletHardWall(int numPairs, const float* __restrict__ maxPairDistance, mixed2* __restrict__ dt, real4* __restrict__ posq,
real4* __restrict__ posqCorrection, mixed4* __restrict__ velm,
const int2* __restrict__ pairList, const float* __restrict__ pairTemperature) {
mixed dtPos = dt[0].y; mixed dtPos = dt[0].y;
mixed maxDelta = (mixed) maxPairDistance[0]; mixed maxDelta = (mixed) maxPairDistance[0];
// Apply hard wall constraints. if (maxDelta > 0){
if (maxDelta > 0) { int index = GLOBAL_ID;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numPairs; index += blockDim.x*gridDim.x) { while(index < numPairs) {
const mixed hardWallScale = sqrt( ((mixed) pairTemperature[index]) * ((mixed) BOLTZ)); const mixed hardWallScale = sqrt( ((mixed) pairTemperature[index]) * ((mixed) BOLTZ));
int2 atom = make_int2(pairList[index].x, pairList[index].y); int atom1 = pairList[index].x;
int atom2 = pairList[index].y;
#ifdef USE_MIXED_PRECISION #ifdef USE_MIXED_PRECISION
real4 posv1 = posq[atom.x]; real4 posv1 = posq[atom1];
real4 posc1 = posqCorrection[atom.x]; real4 posc1 = posqCorrection[atom1];
mixed4 pos1 = make_mixed4(posv1.x+(mixed)posc1.x, posv1.y+(mixed)posc1.y, posv1.z+(mixed)posc1.z, posv1.w); mixed4 pos1 = make_mixed4(posv1.x+(mixed)posc1.x, posv1.y+(mixed)posc1.y, posv1.z+(mixed)posc1.z, posv1.w);
real4 posv2 = posq[atom.y]; real4 posv2 = posq[atom2];
real4 posc2 = posqCorrection[atom.y]; real4 posc2 = posqCorrection[atom2];
mixed4 pos2 = make_mixed4(posv2.x+(mixed)posc2.x, posv2.y+(mixed)posc2.y, posv2.z+(mixed)posc2.z, posv2.w); mixed4 pos2 = make_mixed4(posv2.x+(mixed)posc2.x, posv2.y+(mixed)posc2.y, posv2.z+(mixed)posc2.z, posv2.w);
#else #else
real4 pos1 = posq[atom.x]; real4 pos1 = posq[atom1];
real4 pos2 = posq[atom.y]; real4 pos2 = posq[atom2];
#endif #endif
mixed3 delta = make_mixed3( mixed3 delta = make_mixed3(pos1.x - pos2.x, pos1.y - pos2.y, pos1.z - pos2.z);
mixed (pos1.x - pos2.x),
mixed (pos1.y - pos2.y),
mixed (pos1.z - pos2.z)
);
mixed r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z); mixed r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
mixed rInv = 1/r; mixed rInv = 1/r;
if (rInv*maxDelta < 1.0) { if (rInv*maxDelta < 1.0) {
// The constraint has been violated, so make the inter-particle distance "bounce" // The constraint has been violated, so make the inter-particle distance "bounce"
// off the hard wall. // off the hard wall.
mixed3 bondDir = make_mixed3(delta.x * rInv, delta.y * rInv, delta.z * rInv); mixed3 bondDir = make_mixed3(delta.x * rInv, delta.y * rInv, delta.z * rInv);
mixed3 vel1 = make_mixed3(velm[atom.x].x, velm[atom.x].y, velm[atom.x].z); mixed3 vel1 = make_mixed3(velm[atom1].x, velm[atom1].y, velm[atom1].z);
mixed3 vel2 = make_mixed3(velm[atom.y].x, velm[atom.y].y, velm[atom.y].z); mixed3 vel2 = make_mixed3(velm[atom2].x, velm[atom2].y, velm[atom2].z);
mixed m1 = velm[atom.x].w != 0.0 ? 1.0/velm[atom.x].w : 0.0; mixed m1 = velm[atom1].w != 0.0 ? 1.0/velm[atom1].w : 0.0;
mixed m2 = velm[atom.y].w != 0.0 ? 1.0/velm[atom.y].w : 0.0; mixed m2 = velm[atom2].w != 0.0 ? 1.0/velm[atom2].w : 0.0;
mixed invTotMass = (m1 + m2 != 0.0) ? 1.0 /(m1 + m2) : 0.0; mixed invTotMass = (m1 + m2 != 0.0) ? 1.0 /(m1 + m2) : 0.0;
mixed deltaR = r-maxDelta; mixed deltaR = r-maxDelta;
mixed deltaT = dtPos; mixed deltaT = dtPos;
...@@ -298,12 +320,12 @@ extern "C" __global__ void integrateVelocityVerletHardWall(int numPairs, const f ...@@ -298,12 +320,12 @@ extern "C" __global__ void integrateVelocityVerletHardWall(int numPairs, const f
pos1.x += bondDir.x*dr; pos1.x += bondDir.x*dr;
pos1.y += bondDir.y*dr; pos1.y += bondDir.y*dr;
pos1.z += bondDir.z*dr; pos1.z += bondDir.z*dr;
velm[atom.x] = make_mixed4(vp1.x + bondDir.x*dotvr1, vp1.y + bondDir.y*dotvr1, vp1.z + bondDir.z*dotvr1, velm[atom.x].w); velm[atom1] = make_mixed4(vp1.x + bondDir.x*dotvr1, vp1.y + bondDir.y*dotvr1, vp1.z + bondDir.z*dotvr1, velm[atom1].w);
#ifdef USE_MIXED_PRECISION #ifdef USE_MIXED_PRECISION
posq[atom.x] = make_real4((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w); posq[atom1] = make_real4((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
posqCorrection[atom.x] = make_real4(pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0); posqCorrection[atom1] = make_real4(pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
#else #else
posq[atom.x] = pos1; posq[atom1] = pos1;
#endif #endif
} }
else { else {
...@@ -331,19 +353,21 @@ extern "C" __global__ void integrateVelocityVerletHardWall(int numPairs, const f ...@@ -331,19 +353,21 @@ extern "C" __global__ void integrateVelocityVerletHardWall(int numPairs, const f
pos2.x += bondDir.x*dr2; pos2.x += bondDir.x*dr2;
pos2.y += bondDir.y*dr2; pos2.y += bondDir.y*dr2;
pos2.z += bondDir.z*dr2; pos2.z += bondDir.z*dr2;
velm[atom.x] = make_mixed4(vp1.x + bondDir.x*dotvr1, vp1.y + bondDir.y*dotvr1, vp1.z + bondDir.z*dotvr1, velm[atom.x].w); velm[atom1] = make_mixed4(vp1.x + bondDir.x*dotvr1, vp1.y + bondDir.y*dotvr1, vp1.z + bondDir.z*dotvr1, velm[atom1].w);
velm[atom.y] = make_mixed4(vp2.x + bondDir.x*dotvr2, vp2.y + bondDir.y*dotvr2, vp2.z + bondDir.z*dotvr2, velm[atom.y].w); velm[atom2] = make_mixed4(vp2.x + bondDir.x*dotvr2, vp2.y + bondDir.y*dotvr2, vp2.z + bondDir.z*dotvr2, velm[atom2].w);
#ifdef USE_MIXED_PRECISION #ifdef USE_MIXED_PRECISION
posq[atom.x] = make_real4((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w); posq[atom1] = make_real4((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
posq[atom.y] = make_real4((real) pos2.x, (real) pos2.y, (real) pos2.z, (real) pos2.w); posq[atom2] = make_real4((real) pos2.x, (real) pos2.y, (real) pos2.z, (real) pos2.w);
posqCorrection[atom.x] = make_real4(pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0); posqCorrection[atom1] = make_real4(pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
posqCorrection[atom.y] = make_real4(pos2.x-(real) pos2.x, pos2.y-(real) pos2.y, pos2.z-(real) pos2.z, 0); posqCorrection[atom2] = make_real4(pos2.x-(real) pos2.x, pos2.y-(real) pos2.y, pos2.z-(real) pos2.z, 0);
#else #else
posq[atom.x] = pos1; posq[atom1] = pos1;
posq[atom.y] = pos2; posq[atom2] = pos2;
#endif #endif
} }
} }
index += GLOBAL_SIZE;
}
} }
} /* end of hard wall constraint part */
} }
...@@ -447,109 +447,6 @@ private: ...@@ -447,109 +447,6 @@ private:
CUfunction copyStateKernel, copyForcesKernel, addForcesKernel; CUfunction copyStateKernel, copyForcesKernel, addForcesKernel;
}; };
/*
* This kernel is invoked by NoseHooverIntegrator to take one time step.
*/
class CudaIntegrateVelocityVerletStepKernel : public IntegrateVelocityVerletStepKernel {
public:
CudaIntegrateVelocityVerletStepKernel(std::string name, const Platform& platform, CudaContext& cu) :
IntegrateVelocityVerletStepKernel(name, platform), cu(cu) { }
~CudaIntegrateVelocityVerletStepKernel() {}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the NoseHooverIntegrator this kernel will be used for
*/
void initialize(const System& system, const NoseHooverIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
* @param forcesAreValid a reference to the parent integrator's boolean for keeping
* track of the validity of the current forces.
*/
void execute(ContextImpl& context, const NoseHooverIntegrator& integrator, bool &forcesAreValid);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the NoseHooverIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const NoseHooverIntegrator& integrator);
private:
CudaContext& cu;
float prevMaxPairDistance;
CudaArray maxPairDistanceBuffer, pairListBuffer, atomListBuffer, pairTemperatureBuffer;
CUfunction kernel1, kernel2, kernel3, kernelHardWall;
};
/**
* This kernel is invoked by NoseHooverChain at the start of each time step to adjust the thermostat
* and update the associated particle velocities.
*/
class CudaNoseHooverChainKernel : public NoseHooverChainKernel {
public:
CudaNoseHooverChainKernel(std::string name, const Platform& platform, CudaContext& cu) : NoseHooverChainKernel(name, platform), cu(cu) {
}
~CudaNoseHooverChainKernel() {}
/**
* Initialize the kernel.
*/
void initialize();
/**
* Execute the kernel that propagates the Nose Hoover chain and determines the velocity scale factor.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the object describing the chain to be propagated.
* @param kineticEnergies the {absolute, relative} kineticEnergy of the particles being thermostated by this chain.
* @param timeStep the time step used by the integrator.
* @return the {absolute, relative} velocity scale factor to apply to the particles associated with this heat bath.
*/
std::pair<double, double> propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep);
/**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @return the total heat bath energy.
*/
double computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc);
/**
* Execute the kernel that computes the kinetic energy for a subset of atoms,
* or the relative kinetic energy of Drude particles with respect to their parent atoms
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param downloadValue whether the computed value should be downloaded and returned.
*
*/
std::pair<double,double> computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain, bool downloadValue);
/**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactors the {absolute, relative} multiplicative factor by which velocities are scaled.
*/
void scaleVelocities(ContextImpl& context, const NoseHooverChain &noseHooverChain, std::pair<double, double> scaleFactors);
private:
int sumWorkGroupSize;
CudaContext& cu;
CudaArray energyBuffer, scaleFactorBuffer, kineticEnergyBuffer, chainMasses, chainForces, heatBathEnergy;
std::map<int, CudaArray> atomlists, pairlists;
std::map<int, CUfunction> propagateKernels;
CUfunction reduceEnergyKernel;
CUfunction computeHeatBathEnergyKernel;
CUfunction computeAtomsKineticEnergyKernel;
CUfunction computePairsKineticEnergyKernel;
CUfunction scaleAtomsVelocitiesKernel;
CUfunction scalePairsVelocitiesKernel;
};
/** /**
* This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume * This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
*/ */
......
...@@ -134,9 +134,9 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform ...@@ -134,9 +134,9 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
if (name == ApplyAndersenThermostatKernel::Name()) if (name == ApplyAndersenThermostatKernel::Name())
return new CommonApplyAndersenThermostatKernel(name, platform, cu); return new CommonApplyAndersenThermostatKernel(name, platform, cu);
if (name == NoseHooverChainKernel::Name()) if (name == NoseHooverChainKernel::Name())
return new CudaNoseHooverChainKernel(name, platform, cu); return new CommonNoseHooverChainKernel(name, platform, cu);
if (name == IntegrateVelocityVerletStepKernel::Name()) if (name == IntegrateVelocityVerletStepKernel::Name())
return new CudaIntegrateVelocityVerletStepKernel(name, platform, cu); return new CommonIntegrateVelocityVerletStepKernel(name, platform, cu);
if (name == ApplyMonteCarloBarostatKernel::Name()) if (name == ApplyMonteCarloBarostatKernel::Name())
return new CudaApplyMonteCarloBarostatKernel(name, platform, cu); return new CudaApplyMonteCarloBarostatKernel(name, platform, cu);
if (name == RemoveCMMotionKernel::Name()) if (name == RemoveCMMotionKernel::Name())
......
...@@ -1497,118 +1497,6 @@ void CudaCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, in ...@@ -1497,118 +1497,6 @@ void CudaCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, in
nz = dispersionGridSizeZ; nz = dispersionGridSizeZ;
} }
} }
void CudaIntegrateVelocityVerletStepKernel::initialize(const System& system, const NoseHooverIntegrator& integrator) {
cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
map<string, string> defines;
defines["BOLTZ"] = cu.doubleToString(BOLTZ);
CUmodule module = cu.createModule(CudaKernelSources::velocityVerlet, defines, "");
kernel1 = cu.getKernel(module, "integrateVelocityVerletPart1");
kernel2 = cu.getKernel(module, "integrateVelocityVerletPart2");
kernel3 = cu.getKernel(module, "integrateVelocityVerletPart3");
kernelHardWall = cu.getKernel(module, "integrateVelocityVerletHardWall");
prevMaxPairDistance = -1.0;
maxPairDistanceBuffer.initialize<float>(cu, 1, "maxPairDistanceBuffer");
}
void CudaIntegrateVelocityVerletStepKernel::execute(ContextImpl& context, const NoseHooverIntegrator& integrator, bool &forcesAreValid) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int paddedNumAtoms = cu.getPaddedNumAtoms();
double dt = integrator.getStepSize();
cu.getIntegrationUtilities().setNextStepSize(dt);
if (!forcesAreValid) context.calcForcesAndEnergy(true, false);
const auto& atomList = integrator.getAllThermostatedIndividualParticles();
const auto& pairList = integrator.getAllThermostatedPairs();
int numAtoms = atomList.size();
int numPairs = pairList.size();
int numParticles = numAtoms + 2*numPairs;
float maxPairDistance = integrator.getMaximumPairDistance();
// Make sure atom and pair metadata is uploaded and has the correct dimensions
if (prevMaxPairDistance != maxPairDistance) {
std::vector<float> tmp(1, maxPairDistance);
maxPairDistanceBuffer.upload(tmp);
prevMaxPairDistance = maxPairDistance;
}
if (numAtoms !=0 && (!atomListBuffer.isInitialized() || atomListBuffer.getSize() != numAtoms)) {
if (atomListBuffer.isInitialized())
atomListBuffer.resize(atomList.size());
else
atomListBuffer.initialize<int>(cu, atomList.size(), "atomListBuffer");
atomListBuffer.upload(atomList);
}
if (numPairs !=0 && (!pairListBuffer.isInitialized() || pairListBuffer.getSize() != numPairs)) {
if (pairListBuffer.isInitialized()) {
pairListBuffer.resize(pairList.size());
pairTemperatureBuffer.resize(pairList.size());
}
else {
pairListBuffer.initialize<int2>(cu, pairList.size(), "pairListBuffer");
pairTemperatureBuffer.initialize<float>(cu, pairList.size(), "pairTemperatureBuffer");
}
std::vector<int2> tmp;
std::vector<float> tmp2;
for(const auto &pair : pairList) {
tmp.push_back(make_int2(std::get<0>(pair), std::get<1>(pair)));
tmp2.push_back(std::get<2>(pair));
}
pairListBuffer.upload(tmp);
pairTemperatureBuffer.upload(tmp2);
}
//// Call the first integration kernel.
CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
void* args1[] = {&numAtoms, &numPairs, &paddedNumAtoms, &cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
&atomListBuffer.getDevicePointer(), &pairListBuffer.getDevicePointer()};
cu.executeKernel(kernel1, args1, std::max(numAtoms,numPairs), 128);
//// Apply constraints.
integration.applyConstraints(integrator.getConstraintTolerance());
//// Call the second integration kernel.
void* args2[] = {&numParticles, &cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
&cu.getVelm().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
cu.executeKernel(kernel2, args2, numParticles, 128);
if (numPairs > 0) {
//// Enforce hard wall constraint
void* argsHardWall[] = {&numPairs, &maxPairDistanceBuffer.getDevicePointer(),
&cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
&cu.getVelm().getDevicePointer(), &pairListBuffer.getDevicePointer(),
&pairTemperatureBuffer.getDevicePointer()};
cu.executeKernel(kernelHardWall, argsHardWall, numPairs, 128);
}
integration.computeVirtualSites();
//// Update forces
context.calcForcesAndEnergy(true, false);
forcesAreValid = true;
//// Call the third integration kernel.
void* args3[] = {&numAtoms, &numPairs, &paddedNumAtoms, &cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
&atomListBuffer.getDevicePointer(), &pairListBuffer.getDevicePointer()};
cu.executeKernel(kernel3, args3, std::max(numAtoms,numPairs), 128);
// TODO: Figure out if this is really needed. The constraint velocities are accounted for
// in a finite difference sense in the step 3 kernel, when the velocities are updated.
integration.applyVelocityConstraints(integrator.getConstraintTolerance());
//// Update the time and step count.
cu.setTime(cu.getTime()+dt);
cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
}
double CudaIntegrateVelocityVerletStepKernel::computeKineticEnergy(ContextImpl& context, const NoseHooverIntegrator& integrator) {
return cu.getIntegrationUtilities().computeKineticEnergy(0);
}
class CudaCalcCustomCVForceKernel::ForceInfo : public CudaForceInfo { class CudaCalcCustomCVForceKernel::ForceInfo : public CudaForceInfo {
public: public:
...@@ -1820,357 +1708,6 @@ void CudaCalcCustomCVForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -1820,357 +1708,6 @@ void CudaCalcCustomCVForceKernel::copyParametersToContext(ContextImpl& context,
delete function.second; delete function.second;
} }
void CudaNoseHooverChainKernel::initialize() {
cu.setAsCurrent();
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
map<string, string> defines;
sumWorkGroupSize = 512;
defines["WORK_GROUP_SIZE"] = cu.intToString(sumWorkGroupSize);
defines["MIXEDEXP"] = useDouble ? "exp" : "expf";
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {1}) {";
defines["END_YS_LOOP"] = "}";
CUmodule module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "-std=c++11");
propagateKernels[1] = cu.getKernel(module, "propagateNoseHooverChain");
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {0.828981543588751, -0.657963087177502, 0.828981543588751}) {";
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "-std=c++11");
propagateKernels[3] = cu.getKernel(module, "propagateNoseHooverChain");
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}) {";
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "-std=c++11");
propagateKernels[5] = cu.getKernel(module, "propagateNoseHooverChain");
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "-std=c++11");
reduceEnergyKernel = cu.getKernel(module, "reduceEnergyPair");
computeHeatBathEnergyKernel = cu.getKernel(module, "computeHeatBathEnergy");
computeAtomsKineticEnergyKernel = cu.getKernel(module, "computeAtomsKineticEnergy");
computePairsKineticEnergyKernel = cu.getKernel(module, "computePairsKineticEnergy");
scaleAtomsVelocitiesKernel = cu.getKernel(module, "scaleAtomsVelocities");
scalePairsVelocitiesKernel = cu.getKernel(module, "scalePairsVelocities");
int energyBufferSize = cu.getEnergyBuffer().getSize();
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision())
energyBuffer.initialize<double2>(cu, energyBufferSize, "energyBuffer");
else
energyBuffer.initialize<float2>(cu, energyBufferSize, "energyBuffer");
}
std::pair<double, double> CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep) {
cu.setAsCurrent();
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
int chainID = nhc.getChainID();
int nAtoms = nhc.getThermostatedAtoms().size();
int nPairs = nhc.getThermostatedPairs().size();
int chainLength = nhc.getChainLength();
int numYS = nhc.getNumYoshidaSuzukiTimeSteps();
int numMTS = nhc.getNumMultiTimeSteps();
int numDOFs = nhc.getNumDegreesOfFreedom();
double temperature = nhc.getTemperature();
double frequency = nhc.getCollisionFrequency();
double relativeTemperature = nhc.getRelativeTemperature();
double relativeFrequency = nhc.getRelativeCollisionFrequency();
if (numYS != 1 && numYS != 3 && numYS != 5) {
throw OpenMMException("Number of Yoshida Suzuki time steps has to be 1, 3, or 5.");
}
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState();
if (!scaleFactorBuffer.isInitialized() || scaleFactorBuffer.getSize() == 0) {
if (useDouble) {
std::vector<double2> zeros{{0,0}};
if (scaleFactorBuffer.isInitialized())
scaleFactorBuffer.resize(1);
else
scaleFactorBuffer.initialize<double2>(cu, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(zeros);
}
else {
std::vector<float2> zeros{{0,0}};
if (scaleFactorBuffer.isInitialized())
scaleFactorBuffer.resize(1);
else
scaleFactorBuffer.initialize<float2>(cu, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(zeros);
}
}
std::vector<double> zeros(chainLength,0);
if (!chainForces.isInitialized() || !chainMasses.isInitialized()) {
if (useDouble) {
if (chainForces.isInitialized()) {
chainMasses.resize(chainLength);
chainForces.resize(chainLength);
}
else {
chainMasses.initialize<double>(cu, chainLength, "chainMasses");
chainForces.initialize<double>(cu, chainLength, "chainForces");
}
chainMasses.upload(zeros);
chainForces.upload(zeros);
}
else {
if (chainForces.isInitialized()) {
chainMasses.resize(chainLength);
chainForces.resize(chainLength);
}
else {
chainMasses.initialize<float>(cu, chainLength, "chainMasses");
chainForces.initialize<float>(cu, chainLength, "chainForces");
}
chainMasses.upload(zeros, true);
chainForces.upload(zeros, true);
}
}
if (chainForces.getSize() < chainLength)
chainMasses.resize(chainLength);
if (chainMasses.getSize() < chainLength)
chainMasses.resize(chainLength);
float timeStepFloat = (float) timeStep;
// N.B. We ignore the incoming kineticEnergy and grab it from the device buffer instead
if (nAtoms) {
if (!chainState.count(2*chainID))
chainState[2*chainID] = ComputeArray();
if (!chainState.at(2*chainID).isInitialized() || chainState.at(2*chainID).getSize() != chainLength) {
// We need to upload the CUDA array
if (useDouble) {
if (chainState.at(2*chainID).isInitialized())
chainState.at(2*chainID).resize(chainLength);
else
chainState.at(2*chainID).initialize<double2>(cu, chainLength, "chainState" + std::to_string(2*chainID));
std::vector<double2> zeros(chainLength, make_double2(0, 0));
chainState.at(2*chainID).upload(zeros.data());
}
else {
if (chainState.at(2*chainID).isInitialized())
chainState.at(2*chainID).resize(chainLength);
else
chainState.at(2*chainID).initialize<float2>(cu, chainLength, "chainState" + std::to_string(2*chainID));
std::vector<float2> zeros(chainLength, make_float2(0, 0));
chainState.at(2*chainID).upload(zeros.data());
}
}
int chainType = 0;
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
void *args[] = {
&cu.unwrap(chainState[2*chainID]).getDevicePointer(),
&kineticEnergyBuffer.getDevicePointer(),
&scaleFactorBuffer.getDevicePointer(),
&chainMasses.getDevicePointer(),
&chainForces.getDevicePointer(),
&chainType, &chainLength, &numMTS, &numDOFs,
&timeStepFloat,
useDouble ? (void*) &kT : (void*) &kTfloat,
&frequencyFloat
};
cu.executeKernel(propagateKernels[numYS], args, 1, 1);
}
if (nPairs) {
if (!chainState.count(2*chainID+1))
chainState[2*chainID+1] = ComputeArray();
if (!chainState.at(2*chainID+1).isInitialized() || chainState.at(2*chainID+1).getSize() != chainLength) {
// We need to upload the CUDA array
if (useDouble) {
if (chainState.at(2*chainID+1).isInitialized())
chainState.at(2*chainID+1).resize(chainLength);
else
chainState.at(2*chainID+1).initialize<double2>(cu, chainLength, "chainState" + std::to_string(2*chainID+1));
std::vector<double2> zeros(chainLength, make_double2(0, 0));
chainState.at(2*chainID+1).upload(zeros.data());
}
else {
if (chainState.at(2*chainID+1).isInitialized())
chainState.at(2*chainID+1).resize(chainLength);
else
chainState.at(2*chainID+1).initialize<float2>(cu, chainLength, "chainState" + std::to_string(2*chainID+1));
std::vector<float2> zeros(chainLength, make_float2(0, 0));
chainState.at(2*chainID+1).upload(zeros.data());
}
}
int chainType = 1;
double kT = BOLTZ * relativeTemperature;
int ndf = 3*nPairs;
float kTfloat = (float) kT;
float frequencyFloat = (float) relativeFrequency;
void *args[] = {
&cu.unwrap(chainState[2*chainID+1]).getDevicePointer(),
&kineticEnergyBuffer.getDevicePointer(),
&scaleFactorBuffer.getDevicePointer(),
&chainMasses.getDevicePointer(),
&chainForces.getDevicePointer(),
&chainType, &chainLength, &numMTS, &ndf,
&timeStepFloat,
useDouble ? (void*) &kT : (void*) &kTfloat,
&frequencyFloat
};
cu.executeKernel(propagateKernels[numYS], args, 1, 1);
}
return {0, 0};
}
double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc) {
cu.setAsCurrent();
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
int chainID = nhc.getChainID();
int chainLength = nhc.getChainLength();
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState();
bool absChainIsValid = chainState.count(2*chainID) != 0 &&
chainState[2*chainID].isInitialized() &&
chainState[2*chainID].getSize() == chainLength;
bool relChainIsValid = chainState.count(2*chainID+1) != 0 &&
chainState[2*chainID+1].isInitialized() &&
chainState[2*chainID+1].getSize() == chainLength;
if (!absChainIsValid && !relChainIsValid) return 0.0;
if (!heatBathEnergy.isInitialized() || heatBathEnergy.getSize() == 0) {
if (useDouble) {
std::vector<double> one(1);
heatBathEnergy.initialize<double>(cu, 1, "heatBathEnergy");
heatBathEnergy.upload(one);
}
else {
std::vector<float> one(1);
heatBathEnergy.initialize<float>(cu, 1, "heatBathEnergy");
heatBathEnergy.upload(one);
}
}
cu.clearBuffer(heatBathEnergy);
if (absChainIsValid) {
int numDOFs = nhc.getNumDegreesOfFreedom();
double temperature = nhc.getTemperature();
double frequency = nhc.getCollisionFrequency();
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
void * args[] = {&heatBathEnergy.getDevicePointer(), &chainLength, &numDOFs,
useDouble ? (void*) &kT : (void*) & kTfloat,
&frequencyFloat, &cu.unwrap(chainState[2*chainID]).getDevicePointer()};
cu.executeKernel(computeHeatBathEnergyKernel, args, 1, 1);
}
if (relChainIsValid) {
int numDOFs = 3 * nhc.getThermostatedPairs().size();
double temperature = nhc.getRelativeTemperature();
double frequency = nhc.getRelativeCollisionFrequency();
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
void * args[] = {&heatBathEnergy.getDevicePointer(), &chainLength, &numDOFs,
useDouble ? (void*) &kT : (void*) & kTfloat,
&frequencyFloat, &cu.unwrap(chainState[2*chainID+1]).getDevicePointer()};
cu.executeKernel(computeHeatBathEnergyKernel, args, 1, 1);
}
void * pinnedBuffer = cu.getPinnedBuffer();
heatBathEnergy.download(pinnedBuffer);
if (useDouble)
return *((double*) pinnedBuffer);
else
return *((float*) pinnedBuffer);
}
std::pair<double, double> CudaNoseHooverChainKernel::computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &nhc, bool downloadValue) {
cu.setAsCurrent();
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
int chainID = nhc.getChainID();
const auto & nhcAtoms = nhc.getThermostatedAtoms();
const auto & nhcPairs = nhc.getThermostatedPairs();
auto nAtoms = nhcAtoms.size();
auto nPairs = nhcPairs.size();
if (nAtoms) {
if (!atomlists.count(chainID)) {
// We need to upload the CUDA array
atomlists[chainID] = CudaArray();
atomlists[chainID].initialize<int>(cu, nAtoms, "atomlist" + std::to_string(chainID));
atomlists[chainID].upload(nhcAtoms);
}
if (atomlists[chainID].getSize() != nAtoms) {
throw OpenMMException("Number of atoms changed. Cannot be handled by the same Nose-Hoover thermostat.");
}
}
if (nPairs) {
if (!pairlists.count(chainID)) {
// We need to upload the CUDA array
pairlists[chainID] = CudaArray();
pairlists[chainID].initialize<int2>(cu, nPairs, "pairlist" + std::to_string(chainID));
std::vector<int2> int2vec;
for(const auto &p : nhcPairs) int2vec.push_back(make_int2(p.first, p.second));
pairlists[chainID].upload(int2vec);
}
if (pairlists[chainID].getSize() != nPairs) {
throw OpenMMException("Number of thermostated pairs changed. Cannot be handled by the same Nose-Hoover thermostat.");
}
}
if (!kineticEnergyBuffer.isInitialized() || kineticEnergyBuffer.getSize() == 0) {
if (useDouble) {
std::vector<double2> zeros{{0,0}};
kineticEnergyBuffer.initialize<double2>(cu, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(zeros);
}
else {
std::vector<float2> zeros{{0,0}};
kineticEnergyBuffer.initialize<float2>(cu, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(zeros);
}
}
cu.clearBuffer(energyBuffer);
if (nAtoms) {
void *args[] = {&energyBuffer.getDevicePointer(),&nAtoms, &cu.getVelm().getDevicePointer(), &atomlists[chainID].getDevicePointer()};
cu.executeKernel(computeAtomsKineticEnergyKernel, args, nAtoms);
}
if (nPairs) {
void *args[] = {&energyBuffer.getDevicePointer(),&nPairs, &cu.getVelm().getDevicePointer(), &pairlists[chainID].getDevicePointer()};
cu.executeKernel(computePairsKineticEnergyKernel, args, nPairs);
}
//taken from CudaContext::reduceEnergy(); the final kinetic energy will live in the kineticEnergy buffer
int bufferSize = energyBuffer.getSize();
void* args2[] = {&energyBuffer.getDevicePointer(), &kineticEnergyBuffer.getDevicePointer(), &bufferSize, &sumWorkGroupSize};
cu.executeKernel(reduceEnergyKernel, args2, sumWorkGroupSize, sumWorkGroupSize, sumWorkGroupSize*energyBuffer.getElementSize());
std::pair<double, double> KEs = {0, 0};
if (downloadValue) {
void * pinnedBuffer = cu.getPinnedBuffer();
kineticEnergyBuffer.download(pinnedBuffer);
KEs.first = useDouble ? *((double*) pinnedBuffer) : *((float*) pinnedBuffer);
KEs.second = useDouble ? *((double*) pinnedBuffer + 1) : *((float*) pinnedBuffer + 1);
}
return KEs;
}
void CudaNoseHooverChainKernel::scaleVelocities(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> scaleFactor) {
// For now we assume that the atoms and pairs info is valid, because compute{Atoms|Pairs}KineticEnergy must have been
// called before this kernel. If that ever ceases to be true, some sanity checks are needed here.
cu.setAsCurrent();
int chainID = nhc.getChainID();
auto nAtoms = nhc.getThermostatedAtoms().size();
auto nPairs = nhc.getThermostatedPairs().size();
if (nAtoms) {
void *args[] = {&scaleFactorBuffer.getDevicePointer(),
&nAtoms, &cu.getVelm().getDevicePointer(), &atomlists[chainID].getDevicePointer()};
cu.executeKernel(scaleAtomsVelocitiesKernel, args, nAtoms);
}
if (nPairs) {
void *args[] = {&scaleFactorBuffer.getDevicePointer(),
&nPairs, &cu.getVelm().getDevicePointer(), &pairlists[chainID].getDevicePointer()};
cu.executeKernel(scalePairsVelocitiesKernel, args, nPairs);
}
}
void CudaApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) { void CudaApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
cu.setAsCurrent(); cu.setAsCurrent();
savedPositions.initialize(cu, cu.getPaddedNumAtoms(), cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4), "savedPositions"); savedPositions.initialize(cu, cu.getPaddedNumAtoms(), cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4), "savedPositions");
......
#include <initializer_list>
extern "C" __global__ void propagateNoseHooverChain(mixed2* __restrict__ chainData, const mixed2 * __restrict__ energySum, mixed2* __restrict__ scaleFactor,
mixed* __restrict__ chainMasses, mixed* __restrict__ chainForces,
int chainType, int chainLength, int numMTS, int numDOFs, float timeStep,
mixed kT, float frequency){
const mixed & kineticEnergy = chainType ? energySum[0].y : energySum[0].x;
mixed &scale = chainType ? scaleFactor[0].y : scaleFactor[0].x;
scale = (mixed) 1;
if(kineticEnergy < 1e-8) return;
for (int bead = 0; bead < chainLength; ++bead) chainMasses[bead] = kT / (frequency * frequency);
chainMasses[0] *= numDOFs;
mixed KE2 = 2.0f * kineticEnergy;
mixed timeOverMTS = timeStep / numMTS;
chainForces[0] = (KE2 - numDOFs * kT) / chainMasses[0];
for (int bead = 0; bead < chainLength - 1; ++bead) {
chainForces[bead + 1] = (chainMasses[bead] * chainData[bead].y * chainData[bead].y - kT) / chainMasses[bead + 1];
}
for (int mts = 0; mts < numMTS; ++mts) {
BEGIN_YS_LOOP
mixed wdt = ys * timeOverMTS;
chainData[chainLength-1].y += 0.25f * wdt * chainForces[chainLength-1];
for (int bead = chainLength - 2; bead >= 0; --bead) {
mixed aa = MIXEDEXP(-0.125f * wdt * chainData[bead + 1].y);
chainData[bead].y = aa * (chainData[bead].y * aa + 0.25f * wdt * chainForces[bead]);
}
// update particle velocities
mixed aa = MIXEDEXP(-0.5f * wdt * chainData[0].y);
scale *= aa;
// update the thermostat positions
for (int bead = 0; bead < chainLength; ++bead) {
chainData[bead].x += 0.5f * chainData[bead].y * wdt;
}
// update the forces
chainForces[0] = (scale * scale * KE2 - numDOFs * kT) / chainMasses[0];
// update thermostat velocities
for (int bead = 0; bead < chainLength - 1; ++bead) {
mixed aa = MIXEDEXP(-0.125f * wdt * chainData[bead + 1].y);
chainData[bead].y = aa * (aa * chainData[bead].y + 0.25f * wdt * chainForces[bead]);
chainForces[bead + 1] = (chainMasses[bead] * chainData[bead].y * chainData[bead].y - kT) / chainMasses[bead + 1];
}
chainData[chainLength-1].y += 0.25f * wdt * chainForces[chainLength-1];
END_YS_LOOP
} // MTS loop
}
/**
* Compute total (potential + kinetic) energy of the Nose-Hoover beads
*/
extern "C" __global__ void computeHeatBathEnergy(mixed* __restrict__ heatBathEnergy, int chainLength, int numDOFs,
mixed kT, float frequency, const mixed2* __restrict__ chainData){
// Note that this is always incremented; make sure it's zeroed properly before the first call
mixed &energy = heatBathEnergy[0];
for(int i = 0; i < chainLength; ++i) {
mixed prefac = i ? 1 : numDOFs;
mixed mass = prefac * kT / (frequency * frequency);
mixed velocity = chainData[i].y;
// The kinetic energy of this bead
energy += 0.5f * mass * velocity * velocity;
// The potential energy of this bead
mixed position = chainData[i].x;
energy += prefac * kT * position;
}
}
extern "C" __global__ void computeAtomsKineticEnergy(mixed2 * __restrict__ energyBuffer, int numAtoms,
const mixed4* __restrict__ velm, const int *__restrict__ atoms){
mixed2 energy = make_mixed2(0,0);
//energy = 1; return;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
int atom = atoms[index];
mixed4 v = velm[atom];
mixed mass = v.w == 0 ? 0 : 1 / v.w;
energy.x += 0.5f * mass * (v.x*v.x + v.y*v.y + v.z*v.z);
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] = energy;
}
extern "C" __global__ void computePairsKineticEnergy(mixed2 * __restrict__ energyBuffer, int numPairs,
const mixed4* __restrict__ velm, const int2 *__restrict__ pairs){
mixed2 energy = make_mixed2(0,0);
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numPairs; index += blockDim.x*gridDim.x) {
int2 pair = pairs[index];
int atom1 = pair.x;
int atom2 = pair.y;
mixed4 v1 = velm[atom1];
mixed4 v2 = velm[atom2];
mixed m1 = v1.w == 0 ? 0 : 1 / v1.w;
mixed m2 = v2.w == 0 ? 0 : 1 / v2.w;
mixed4 cv;
cv.x = (m1*v1.x + m2*v2.x) / (m1 + m2);
cv.y = (m1*v1.y + m2*v2.y) / (m1 + m2);
cv.z = (m1*v1.z + m2*v2.z) / (m1 + m2);
mixed4 rv;
rv.x = v2.x - v1.x;
rv.y = v2.y - v1.y;
rv.z = v2.z - v1.z;
energy.x += 0.5f * (m1 + m2) * (cv.x*cv.x + cv.y*cv.y + cv.z*cv.z);
energy.y += 0.5f * (m1 * m2 / (m1 + m2)) * (rv.x*rv.x + rv.y*rv.y + rv.z*rv.z);
}
// The atoms version of this has been called already, so accumulate instead of assigning here
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x].x += energy.x;
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x].y += energy.y;
}
extern "C" __global__ void scaleAtomsVelocities(mixed2* __restrict__ scaleFactor, int numAtoms,
mixed4* __restrict__ velm, const int *__restrict__ atoms){
const mixed &scale = scaleFactor[0].x;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
int atom = atoms[index];
mixed4 &v = velm[atom];
v.x *= scale;
v.y *= scale;
v.z *= scale;
}
}
extern "C" __global__ void scalePairsVelocities(mixed2 * __restrict__ scaleFactor, int numPairs,
mixed4* __restrict__ velm, const int2 *__restrict__ pairs){
const mixed &absScale = scaleFactor[0].x;
const mixed &relScale = scaleFactor[0].y;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numPairs; index += blockDim.x*gridDim.x) {
int atom1 = pairs[index].x;
int atom2 = pairs[index].y;
mixed4 v1 = velm[atom1];
mixed4 v2 = velm[atom2];
mixed m1 = v1.w == 0 ? 0 : 1 / v1.w;
mixed m2 = v2.w == 0 ? 0 : 1 / v2.w;
mixed4 cv;
cv.x = (m1*v1.x + m2*v2.x) / (m1 + m2);
cv.y = (m1*v1.y + m2*v2.y) / (m1 + m2);
cv.z = (m1*v1.z + m2*v2.z) / (m1 + m2);
mixed4 rv;
rv.x = v2.x - v1.x;
rv.y = v2.y - v1.y;
rv.z = v2.z - v1.z;
v1.x = absScale * cv.x - relScale * rv.x * m2 / (m1 + m2);
v1.y = absScale * cv.y - relScale * rv.y * m2 / (m1 + m2);
v1.z = absScale * cv.z - relScale * rv.z * m2 / (m1 + m2);
v2.x = absScale * cv.x + relScale * rv.x * m1 / (m1 + m2);
v2.y = absScale * cv.y + relScale * rv.y * m1 / (m1 + m2);
v2.z = absScale * cv.z + relScale * rv.z * m1 / (m1 + m2);
velm[atom1] = v1;
velm[atom2] = v2;
}
}
/**
* Sum the energy buffer containing a pair of energies stored as mixed2. This is copied from utilities.cu with small modifications
*/
extern "C" __global__ void reduceEnergyPair(const mixed2* __restrict__ energyBuffer, mixed2* __restrict__ result, int bufferSize, int workGroupSize) {
__shared__ mixed2 tempBuffer[WORK_GROUP_SIZE];
const unsigned int thread = threadIdx.x;
mixed2 sum = make_mixed2(0,0);
for (unsigned int idx = thread; idx < bufferSize; idx += blockDim.x) {
sum.x += energyBuffer[idx].x;
sum.y += energyBuffer[idx].y;
}
tempBuffer[thread] = sum;
for (int i = 1; i < workGroupSize; i *= 2) {
__syncthreads();
if (thread%(i*2) == 0 && thread+i < workGroupSize) {
tempBuffer[thread].x += tempBuffer[thread+i].x;
tempBuffer[thread].y += tempBuffer[thread+i].y;
}
}
if (thread == 0)
*result = tempBuffer[0];
}
...@@ -428,109 +428,6 @@ private: ...@@ -428,109 +428,6 @@ private:
cl::Kernel copyStateKernel, copyForcesKernel, addForcesKernel; cl::Kernel copyStateKernel, copyForcesKernel, addForcesKernel;
}; };
/*
* This kernel is invoked by NoseHooverIntegrator to take one time step.
*/
class OpenCLIntegrateVelocityVerletStepKernel : public IntegrateVelocityVerletStepKernel {
public:
OpenCLIntegrateVelocityVerletStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
IntegrateVelocityVerletStepKernel(name, platform), cl(cl) { }
~OpenCLIntegrateVelocityVerletStepKernel() {}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the NoseHooverIntegrator this kernel will be used for
*/
void initialize(const System& system, const NoseHooverIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
* @param forcesAreValid a reference to the parent integrator's boolean for keeping
* track of the validity of the current forces.
*/
void execute(ContextImpl& context, const NoseHooverIntegrator& integrator, bool &forcesAreValid);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the NoseHooverIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const NoseHooverIntegrator& integrator);
private:
OpenCLContext& cl;
float prevMaxPairDistance;
OpenCLArray maxPairDistanceBuffer, pairListBuffer, atomListBuffer, pairTemperatureBuffer;
cl::Kernel kernel1, kernel2, kernel3, kernelHardWall;
};
/**
* This kernel is invoked by NoseHooverChain at the start of each time step to adjust the thermostat
* and update the associated particle velocities.
*/
class OpenCLNoseHooverChainKernel : public NoseHooverChainKernel {
public:
OpenCLNoseHooverChainKernel(std::string name, const Platform& platform, OpenCLContext& cl) : NoseHooverChainKernel(name, platform), cl(cl) {
}
~OpenCLNoseHooverChainKernel() {}
/**
* Initialize the kernel.
*/
void initialize();
/**
* Execute the kernel that propagates the Nose Hoover chain and determines the velocity scale factor.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the object describing the chain to be propagated.
* @param kineticEnergies the {absolute, relative} kineticEnergy of the particles being thermostated by this chain.
* @param timeStep the time step used by the integrator.
* @return the {absolute, relative} velocity scale factor to apply to the particles associated with this heat bath.
*/
std::pair<double, double> propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep);
/**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @return the total heat bath energy.
*/
double computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc);
/**
* Execute the kernel that computes the kinetic energy for a subset of atoms,
* or the relative kinetic energy of Drude particles with respect to their parent atoms
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param downloadValue whether the computed value should be downloaded and returned.
*
*/
std::pair<double,double> computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain, bool downloadValue);
/**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactors the {absolute, relative} multiplicative factor by which velocities are scaled.
*/
void scaleVelocities(ContextImpl& context, const NoseHooverChain &noseHooverChain, std::pair<double, double> scaleFactors);
private:
int sumWorkGroupSize;
OpenCLContext& cl;
OpenCLArray energyBuffer, scaleFactorBuffer, kineticEnergyBuffer, chainMasses, chainForces, heatBathEnergy;
std::map<int, OpenCLArray> atomlists, pairlists;
std::map<int, cl::Kernel> propagateKernels;
cl::Kernel reduceEnergyKernel;
cl::Kernel computeHeatBathEnergyKernel;
cl::Kernel computeAtomsKineticEnergyKernel;
cl::Kernel computePairsKineticEnergyKernel;
cl::Kernel scaleAtomsVelocitiesKernel;
cl::Kernel scalePairsVelocitiesKernel;
};
/** /**
* This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume * This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
*/ */
......
...@@ -132,9 +132,9 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo ...@@ -132,9 +132,9 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
if (name == ApplyAndersenThermostatKernel::Name()) if (name == ApplyAndersenThermostatKernel::Name())
return new CommonApplyAndersenThermostatKernel(name, platform, cl); return new CommonApplyAndersenThermostatKernel(name, platform, cl);
if (name == NoseHooverChainKernel::Name()) if (name == NoseHooverChainKernel::Name())
return new OpenCLNoseHooverChainKernel(name, platform, cl); return new CommonNoseHooverChainKernel(name, platform, cl);
if (name == IntegrateVelocityVerletStepKernel::Name()) if (name == IntegrateVelocityVerletStepKernel::Name())
return new OpenCLIntegrateVelocityVerletStepKernel(name, platform, cl); return new CommonIntegrateVelocityVerletStepKernel(name, platform, cl);
if (name == ApplyMonteCarloBarostatKernel::Name()) if (name == ApplyMonteCarloBarostatKernel::Name())
return new OpenCLApplyMonteCarloBarostatKernel(name, platform, cl); return new OpenCLApplyMonteCarloBarostatKernel(name, platform, cl);
if (name == RemoveCMMotionKernel::Name()) if (name == RemoveCMMotionKernel::Name())
......
...@@ -1667,151 +1667,6 @@ void OpenCLCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, ...@@ -1667,151 +1667,6 @@ void OpenCLCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx,
} }
} }
void OpenCLIntegrateVelocityVerletStepKernel::initialize(const System& system, const NoseHooverIntegrator& integrator) {
cl.getPlatformData().initializeContexts(system);
map<string, string> defines;
defines["BOLTZ"] = cl.doubleToString(BOLTZ);
cl::Program program = cl.createProgram(OpenCLKernelSources::velocityVerlet, defines, "");
kernel1 = cl::Kernel(program, "integrateVelocityVerletPart1");
kernel2 = cl::Kernel(program, "integrateVelocityVerletPart2");
kernel3 = cl::Kernel(program, "integrateVelocityVerletPart3");
kernelHardWall = cl::Kernel(program, "integrateVelocityVerletHardWall");
prevMaxPairDistance = (cl_float) -1.0;
maxPairDistanceBuffer.initialize<cl_float>(cl, 1, "maxPairDistanceBuffer");
}
void OpenCLIntegrateVelocityVerletStepKernel::execute(ContextImpl& context, const NoseHooverIntegrator& integrator, bool &forcesAreValid) {
OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
int paddedNumAtoms = cl.getPaddedNumAtoms();
double dt = integrator.getStepSize();
cl.getIntegrationUtilities().setNextStepSize(dt);
if (!forcesAreValid) context.calcForcesAndEnergy(true, false);
const auto& atomList = integrator.getAllThermostatedIndividualParticles();
const auto& pairList = integrator.getAllThermostatedPairs();
int numAtoms = atomList.size();
int numPairs = pairList.size();
int numParticles = numAtoms + 2*numPairs;
float maxPairDistance = integrator.getMaximumPairDistance();
// Make sure atom and pair metadata is uploaded and has the correct dimensions
if (prevMaxPairDistance != maxPairDistance) {
std::vector<float> tmp(1, maxPairDistance);
maxPairDistanceBuffer.upload(tmp);
prevMaxPairDistance = maxPairDistance;
}
if (numAtoms !=0 && (!atomListBuffer.isInitialized() || atomListBuffer.getSize() != numAtoms)) {
if (atomListBuffer.isInitialized())
atomListBuffer.resize(atomList.size());
else
atomListBuffer.initialize<cl_int>(cl, atomList.size(), "atomListBuffer");
atomListBuffer.upload(atomList);
}
if (numPairs !=0 && (!pairListBuffer.isInitialized() || pairListBuffer.getSize() != numPairs)) {
if (pairListBuffer.isInitialized()) {
pairListBuffer.resize(pairList.size());
pairTemperatureBuffer.resize(pairList.size());
}
else {
pairListBuffer.initialize<mm_int2>(cl, pairList.size(), "pairListBuffer");
pairTemperatureBuffer.initialize<cl_float>(cl, pairList.size(), "pairTemperatureBuffer");
}
std::vector<mm_int2> tmp;
std::vector<float> tmp2;
for(const auto &pair : pairList) {
tmp.push_back(mm_int2(std::get<0>(pair), std::get<1>(pair)));
tmp2.push_back(std::get<2>(pair));
}
pairListBuffer.upload(tmp);
pairTemperatureBuffer.upload(tmp2);
}
//// Call the first integration kernel.
kernel1.setArg<cl_int>(0, numAtoms);
kernel1.setArg<cl_int>(1, numPairs);
kernel1.setArg<cl_int>(2, paddedNumAtoms);
kernel1.setArg<cl::Buffer>(3, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, cl.getPosq().getDeviceBuffer());
setPosqCorrectionArg(cl, kernel1, 5);
kernel1.setArg<cl::Buffer>(6, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(7, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(8, integration.getPosDelta().getDeviceBuffer());
if (numAtoms > 0)
kernel1.setArg<cl::Buffer>(9, atomListBuffer.getDeviceBuffer());
else
kernel1.setArg<void*>(9, NULL);
if (numPairs > 0)
kernel1.setArg<cl::Buffer>(10, pairListBuffer.getDeviceBuffer());
else
kernel1.setArg<void*>(10, NULL);
cl.executeKernel(kernel1, std::max(numAtoms, numPairs));
//// Apply constraints.
integration.applyConstraints(integrator.getConstraintTolerance());
//// Call the second integration kernel.
kernel2.setArg<cl_int>(0, numParticles);
kernel2.setArg<cl::Buffer>(1, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
setPosqCorrectionArg(cl, kernel2, 3);
kernel2.setArg<cl::Buffer>(4, cl.getVelm().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(5, integration.getPosDelta().getDeviceBuffer());
cl.executeKernel(kernel2, numParticles);
if (numPairs > 0) {
//// Enforce hard wall constraint
kernelHardWall.setArg<cl_int>(0, numPairs);
kernelHardWall.setArg<cl::Buffer>(1, maxPairDistanceBuffer.getDeviceBuffer());
kernelHardWall.setArg<cl::Buffer>(2, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
kernelHardWall.setArg<cl::Buffer>(3, cl.getPosq().getDeviceBuffer());
setPosqCorrectionArg(cl, kernelHardWall, 4);
kernelHardWall.setArg<cl::Buffer>(5, cl.getVelm().getDeviceBuffer());
kernelHardWall.setArg<cl::Buffer>(6, pairListBuffer.getDeviceBuffer());
kernelHardWall.setArg<cl::Buffer>(7, pairTemperatureBuffer.getDeviceBuffer());
cl.executeKernel(kernelHardWall, numPairs);
}
integration.computeVirtualSites();
//// Update forces
context.calcForcesAndEnergy(true, false);
forcesAreValid = true;
//// Call the third integration kernel.
kernel3.setArg<cl_int>(0, numAtoms);
kernel3.setArg<cl_int>(1, numPairs);
kernel3.setArg<cl_int>(2, paddedNumAtoms);
kernel3.setArg<cl::Buffer>(3, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(4, cl.getPosq().getDeviceBuffer());
setPosqCorrectionArg(cl, kernel3, 5);
kernel3.setArg<cl::Buffer>(6, cl.getVelm().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(7, cl.getForce().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(8, integration.getPosDelta().getDeviceBuffer());
if (numAtoms > 0)
kernel3.setArg<cl::Buffer>(9, atomListBuffer.getDeviceBuffer());
else
kernel3.setArg<void*>(9, NULL);
if (numPairs > 0)
kernel3.setArg<cl::Buffer>(10, pairListBuffer.getDeviceBuffer());
else
kernel3.setArg<void*>(10, NULL);
cl.executeKernel(kernel3, std::max(numAtoms, numPairs));
integration.applyVelocityConstraints(integrator.getConstraintTolerance());
//// Update the time and step count.
cl.setTime(cl.getTime()+dt);
cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
}
double OpenCLIntegrateVelocityVerletStepKernel::computeKineticEnergy(ContextImpl& context, const NoseHooverIntegrator& integrator) {
return cl.getIntegrationUtilities().computeKineticEnergy(0);
}
class OpenCLCalcCustomCVForceKernel::ForceInfo : public OpenCLForceInfo { class OpenCLCalcCustomCVForceKernel::ForceInfo : public OpenCLForceInfo {
public: public:
ForceInfo(ComputeForceInfo& force) : OpenCLForceInfo(0), force(force) { ForceInfo(ComputeForceInfo& force) : OpenCLForceInfo(0), force(force) {
...@@ -2037,390 +1892,6 @@ void OpenCLCalcCustomCVForceKernel::copyParametersToContext(ContextImpl& context ...@@ -2037,390 +1892,6 @@ void OpenCLCalcCustomCVForceKernel::copyParametersToContext(ContextImpl& context
delete function.second; delete function.second;
} }
void OpenCLNoseHooverChainKernel::initialize() {
bool useDouble = cl.getUseDoublePrecision() || cl.getUseMixedPrecision();
map<string, string> defines;
defines["BEGIN_YS_LOOP"] = "const real arr[1] = {1.0}; for(int i=0;i<1;++i) { const real ys = arr[i];";
defines["END_YS_LOOP"] = "}";
cl::Program program = cl.createProgram(OpenCLKernelSources::noseHooverChain, defines);
propagateKernels[1] = cl::Kernel(program, "propagateNoseHooverChain");
defines["BEGIN_YS_LOOP"] = "const real arr[3] = {0.828981543588751, -0.657963087177502, 0.828981543588751}; for(int i=0;i<3;++i) { const real ys = arr[i];";
program = cl.createProgram(OpenCLKernelSources::noseHooverChain, defines);
propagateKernels[3] = cl::Kernel(program, "propagateNoseHooverChain");
defines["BEGIN_YS_LOOP"] = "const real arr[5] = {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}; for(int i=0;i<5;++i) { const real ys = arr[i];";
program = cl.createProgram(OpenCLKernelSources::noseHooverChain, defines);
propagateKernels[5] = cl::Kernel(program, "propagateNoseHooverChain");
program = cl.createProgram(OpenCLKernelSources::noseHooverChain, defines);
reduceEnergyKernel = cl::Kernel(program, "reduceEnergyPair");
computeHeatBathEnergyKernel = cl::Kernel(program, "computeHeatBathEnergy");
computeAtomsKineticEnergyKernel = cl::Kernel(program, "computeAtomsKineticEnergy");
computePairsKineticEnergyKernel = cl::Kernel(program, "computePairsKineticEnergy");
scaleAtomsVelocitiesKernel = cl::Kernel(program, "scaleAtomsVelocities");
scalePairsVelocitiesKernel = cl::Kernel(program, "scalePairsVelocities");
int energyBufferSize = cl.getEnergyBuffer().getSize();
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision())
energyBuffer.initialize<mm_double2>(cl, energyBufferSize, "energyBuffer");
else
energyBuffer.initialize<mm_float2>(cl, energyBufferSize, "energyBuffer");
}
std::pair<double, double> OpenCLNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep) {
bool useDouble = cl.getUseDoublePrecision() || cl.getUseMixedPrecision();
int chainID = nhc.getChainID();
int nAtoms = nhc.getThermostatedAtoms().size();
int nPairs = nhc.getThermostatedPairs().size();
int chainLength = nhc.getChainLength();
int numYS = nhc.getNumYoshidaSuzukiTimeSteps();
int numMTS = nhc.getNumMultiTimeSteps();
int numDOFs = nhc.getNumDegreesOfFreedom();
double temperature = nhc.getTemperature();
double frequency = nhc.getCollisionFrequency();
double relativeTemperature = nhc.getRelativeTemperature();
double relativeFrequency = nhc.getRelativeCollisionFrequency();
if (numYS != 1 && numYS != 3 && numYS != 5) {
throw OpenMMException("Number of Yoshida Suzuki time steps has to be 1, 3, or 5.");
}
auto & chainState = cl.getIntegrationUtilities().getNoseHooverChainState();
if (!scaleFactorBuffer.isInitialized() || scaleFactorBuffer.getSize() == 0) {
if (useDouble) {
std::vector<mm_double2> zeros{{0,0}};
if (scaleFactorBuffer.isInitialized())
scaleFactorBuffer.resize(1);
else
scaleFactorBuffer.initialize<mm_double2>(cl, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(zeros);
}
else {
std::vector<mm_float2> zeros{{0,0}};
if (scaleFactorBuffer.isInitialized())
scaleFactorBuffer.resize(1);
else
scaleFactorBuffer.initialize<mm_float2>(cl, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(zeros);
}
}
if (!chainForces.isInitialized() || !chainMasses.isInitialized()) {
if (useDouble) {
std::vector<cl_double> zeros(chainLength,0);
if (chainForces.isInitialized()) {
chainMasses.resize(chainLength);
chainForces.resize(chainLength);
}
else {
chainMasses.initialize<cl_double>(cl, chainLength, "chainMasses");
chainForces.initialize<cl_double>(cl, chainLength, "chainForces");
}
chainMasses.upload(zeros);
chainForces.upload(zeros);
}
else {
std::vector<cl_float> zeros(chainLength,0);
if (chainForces.isInitialized()) {
chainMasses.resize(chainLength);
chainForces.resize(chainLength);
}
else {
chainMasses.initialize<cl_float>(cl, chainLength, "chainMasses");
chainForces.initialize<cl_float>(cl, chainLength, "chainForces");
}
chainMasses.upload(zeros);
chainForces.upload(zeros);
}
}
if (chainForces.getSize() < chainLength)
chainMasses.resize(chainLength);
if (chainMasses.getSize() < chainLength)
chainMasses.resize(chainLength);
float timeStepFloat = (float) timeStep;
// N.B. We ignore the incoming kineticEnergy and grab it from the device buffer instead
if (nAtoms) {
if (!chainState.count(2*chainID))
chainState[2*chainID] = ComputeArray();
if (!chainState.at(2*chainID).isInitialized() || chainState.at(2*chainID).getSize() != chainLength) {
// We need to upload the OpenCL array
if (useDouble) {
if (chainState.at(2*chainID).isInitialized())
chainState.at(2*chainID).resize(chainLength);
else
chainState.at(2*chainID).initialize<mm_double2>(cl, chainLength, "chainState" + std::to_string(2*chainID));
std::vector<mm_double2> zeros(chainLength, mm_double2(0.0, 0.0));
chainState.at(2*chainID).upload(zeros.data());
}
else {
if (chainState.at(2*chainID).isInitialized())
chainState.at(2*chainID).resize(chainLength);
else
chainState.at(2*chainID).initialize<mm_float2>(cl, chainLength, "chainState" + std::to_string(2*chainID));
std::vector<mm_float2> zeros(chainLength, mm_float2(0.0f, 0.0f));
chainState.at(2*chainID).upload(zeros.data());
}
}
int chainType = 0;
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
propagateKernels[numYS].setArg<cl::Buffer>(0, cl.unwrap(chainState[2*chainID]).getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(1, kineticEnergyBuffer.getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(2, scaleFactorBuffer.getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(3, chainMasses.getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(4, chainForces.getDeviceBuffer());
propagateKernels[numYS].setArg<cl_int>(5, chainType);
propagateKernels[numYS].setArg<cl_int>(6, chainLength);
propagateKernels[numYS].setArg<cl_int>(7, numMTS);
propagateKernels[numYS].setArg<cl_int>(8, numDOFs);
propagateKernels[numYS].setArg<cl_float>(9, timeStepFloat);
if (useDouble)
propagateKernels[numYS].setArg<cl_double>(10, kT);
else
propagateKernels[numYS].setArg<cl_float>(10, kTfloat);
propagateKernels[numYS].setArg<cl_float>(11, frequencyFloat);
cl.executeKernel(propagateKernels[numYS], 1, 1);
}
if (nPairs) {
if (!chainState.count(2*chainID+1))
chainState[2*chainID+1] = ComputeArray();
if (!chainState.at(2*chainID+1).isInitialized() || chainState.at(2*chainID+1).getSize() != chainLength) {
// We need to upload the OpenCL array
if (useDouble) {
if (chainState.at(2*chainID+1).isInitialized())
chainState.at(2*chainID+1).resize(chainLength);
else
chainState.at(2*chainID+1).initialize<mm_double2>(cl, chainLength, "chainState" + std::to_string(2*chainID+1));
std::vector<mm_double2> zeros(chainLength, mm_double2(0.0, 0.0));
chainState.at(2*chainID+1).upload(zeros.data());
}
else {
if (chainState.at(2*chainID+1).isInitialized())
chainState.at(2*chainID+1).resize(chainLength);
else
chainState.at(2*chainID+1).initialize<mm_float2>(cl, chainLength, "chainState" + std::to_string(2*chainID+1));
std::vector<mm_float2> zeros(chainLength, mm_float2(0.0f, 0.0f));
chainState.at(2*chainID+1).upload(zeros.data());
}
}
int chainType = 1;
double kT = BOLTZ * relativeTemperature;
int ndf = 3*nPairs;
float kTfloat = (float) kT;
float frequencyFloat = (float) relativeFrequency;
propagateKernels[numYS].setArg<cl::Buffer>(0, cl.unwrap(chainState[2*chainID+1]).getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(1, kineticEnergyBuffer.getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(2, scaleFactorBuffer.getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(3, chainMasses.getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(4, chainForces.getDeviceBuffer());
propagateKernels[numYS].setArg<cl_int>(5, chainType);
propagateKernels[numYS].setArg<cl_int>(6, chainLength);
propagateKernels[numYS].setArg<cl_int>(7, numMTS);
propagateKernels[numYS].setArg<cl_int>(8, ndf);
propagateKernels[numYS].setArg<cl_float>(9, timeStepFloat);
if (useDouble)
propagateKernels[numYS].setArg<cl_double>(10, kT);
else
propagateKernels[numYS].setArg<cl_float>(10, kTfloat);
propagateKernels[numYS].setArg<cl_float>(11, frequencyFloat);
cl.executeKernel(propagateKernels[numYS], 1, 1);
}
return {0, 0};
}
double OpenCLNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc) {
bool useDouble = cl.getUseDoublePrecision() || cl.getUseMixedPrecision();
int chainID = nhc.getChainID();
int chainLength = nhc.getChainLength();
auto & chainState = cl.getIntegrationUtilities().getNoseHooverChainState();
bool absChainIsValid = chainState.count(2*chainID) != 0 &&
chainState[2*chainID].isInitialized() &&
chainState[2*chainID].getSize() == chainLength;
bool relChainIsValid = chainState.count(2*chainID+1) != 0 &&
chainState[2*chainID+1].isInitialized() &&
chainState[2*chainID+1].getSize() == chainLength;
if (!absChainIsValid && !relChainIsValid) return 0.0;
if (!heatBathEnergy.isInitialized() || heatBathEnergy.getSize() == 0) {
if (useDouble) {
std::vector<cl_double> one(1);
heatBathEnergy.initialize<cl_double>(cl, 1, "heatBathEnergy");
heatBathEnergy.upload(one);
}
else {
std::vector<cl_float> one(1);
heatBathEnergy.initialize<cl_float>(cl, 1, "heatBathEnergy");
heatBathEnergy.upload(one);
}
}
cl.clearBuffer(heatBathEnergy);
if (absChainIsValid) {
int numDOFs = nhc.getNumDegreesOfFreedom();
double temperature = nhc.getTemperature();
double frequency = nhc.getCollisionFrequency();
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
computeHeatBathEnergyKernel.setArg<cl::Buffer>(0, heatBathEnergy.getDeviceBuffer());
computeHeatBathEnergyKernel.setArg<cl_int>(1, chainLength);
computeHeatBathEnergyKernel.setArg<cl_int>(2, numDOFs);
if (useDouble)
computeHeatBathEnergyKernel.setArg<cl_double>(3, kT);
else
computeHeatBathEnergyKernel.setArg<cl_float>(3, kTfloat);
computeHeatBathEnergyKernel.setArg<cl_float>(4, frequencyFloat);
computeHeatBathEnergyKernel.setArg<cl::Buffer>(5, cl.unwrap(chainState[2*chainID]).getDeviceBuffer());
cl.executeKernel(computeHeatBathEnergyKernel, 1, 1);
}
if (relChainIsValid) {
int numDOFs = 3 * nhc.getThermostatedPairs().size();
double temperature = nhc.getRelativeTemperature();
double frequency = nhc.getRelativeCollisionFrequency();
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
computeHeatBathEnergyKernel.setArg<cl::Buffer>(0, heatBathEnergy.getDeviceBuffer());
computeHeatBathEnergyKernel.setArg<cl_int>(1, chainLength);
computeHeatBathEnergyKernel.setArg<cl_int>(2, numDOFs);
if (useDouble)
computeHeatBathEnergyKernel.setArg<cl_double>(3, kT);
else
computeHeatBathEnergyKernel.setArg<cl_float>(3, kTfloat);
computeHeatBathEnergyKernel.setArg<cl_float>(4, frequencyFloat);
computeHeatBathEnergyKernel.setArg<cl::Buffer>(5, cl.unwrap(chainState[2*chainID+1]).getDeviceBuffer());
cl.executeKernel(computeHeatBathEnergyKernel, 1, 1);
}
void * pinnedBuffer = cl.getPinnedBuffer();
heatBathEnergy.download(pinnedBuffer);
if (useDouble)
return *((double*) pinnedBuffer);
else
return *((float*) pinnedBuffer);
}
std::pair<double, double> OpenCLNoseHooverChainKernel::computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &nhc, bool downloadValue) {
bool useDouble = cl.getUseDoublePrecision() || cl.getUseMixedPrecision();
int chainID = nhc.getChainID();
const auto & nhcAtoms = nhc.getThermostatedAtoms();
const auto & nhcPairs = nhc.getThermostatedPairs();
auto nAtoms = nhcAtoms.size();
auto nPairs = nhcPairs.size();
if (nAtoms) {
if (!atomlists.count(chainID)) {
// We need to upload the OpenCL array
atomlists[chainID] = OpenCLArray();
atomlists[chainID].initialize<int>(cl, nAtoms, "atomlist" + std::to_string(chainID));
atomlists[chainID].upload(nhcAtoms);
}
if (atomlists[chainID].getSize() != nAtoms) {
throw OpenMMException("Number of atoms changed. Cannot be handled by the same Nose-Hoover thermostat.");
}
}
if (nPairs) {
if (!pairlists.count(chainID)) {
// We need to upload the OpenCL array
pairlists[chainID] = OpenCLArray();
pairlists[chainID].initialize<mm_int2>(cl, nPairs, "pairlist" + std::to_string(chainID));
std::vector<mm_int2> int2vec;
for(const auto &p : nhcPairs) int2vec.push_back(mm_int2(p.first, p.second));
pairlists[chainID].upload(int2vec);
}
if (pairlists[chainID].getSize() != nPairs) {
throw OpenMMException("Number of thermostated pairs changed. Cannot be handled by the same Nose-Hoover thermostat.");
}
}
if (!kineticEnergyBuffer.isInitialized() || kineticEnergyBuffer.getSize() == 0) {
if (useDouble) {
std::vector<mm_double2> zeros{{0,0}};
kineticEnergyBuffer.initialize<mm_double2>(cl, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(zeros);
}
else {
std::vector<mm_float2> zeros{{0,0}};
kineticEnergyBuffer.initialize<mm_float2>(cl, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(zeros);
}
}
cl.clearBuffer(cl.getEnergyBuffer());
if (nAtoms) {
computeAtomsKineticEnergyKernel.setArg<cl::Buffer>(0, energyBuffer.getDeviceBuffer());
computeAtomsKineticEnergyKernel.setArg<cl_int>(1, nAtoms);
computeAtomsKineticEnergyKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
computeAtomsKineticEnergyKernel.setArg<cl::Buffer>(3, atomlists[chainID].getDeviceBuffer());
cl.executeKernel(computeAtomsKineticEnergyKernel, nAtoms);
}
if (nPairs) {
computePairsKineticEnergyKernel.setArg<cl::Buffer>(0, energyBuffer.getDeviceBuffer());
computePairsKineticEnergyKernel.setArg<cl_int>(1, nPairs);
computePairsKineticEnergyKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
computePairsKineticEnergyKernel.setArg<cl::Buffer>(3, pairlists[chainID].getDeviceBuffer());
cl.executeKernel(computePairsKineticEnergyKernel, nPairs);
}
int bufferSize = energyBuffer.getSize();
int workGroupSize = cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
if (workGroupSize > 512)
workGroupSize = 512;
reduceEnergyKernel.setArg<cl::Buffer>(0, energyBuffer.getDeviceBuffer());
reduceEnergyKernel.setArg<cl::Buffer>(1, kineticEnergyBuffer.getDeviceBuffer());
reduceEnergyKernel.setArg<cl_int>(2, bufferSize);
reduceEnergyKernel.setArg<cl_int>(3, workGroupSize);
reduceEnergyKernel.setArg(4, workGroupSize*energyBuffer.getElementSize(), NULL);
cl.executeKernel(reduceEnergyKernel, workGroupSize, workGroupSize);
std::pair<double, double> KEs = {0, 0};
if (downloadValue) {
if (useDouble) {
mm_double2 tmp;
kineticEnergyBuffer.download(&tmp);
KEs.first = tmp.x;
KEs.second = tmp.y;
}
else {
mm_float2 tmp;
kineticEnergyBuffer.download(&tmp);
KEs.first = tmp.x;
KEs.second = tmp.y;
}
}
return KEs;
}
void OpenCLNoseHooverChainKernel::scaleVelocities(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> scaleFactor) {
// For now we assume that the atoms and pairs info is valid, because compute{Atoms|Pairs}KineticEnergy must have been
// called before this kernel. If that ever ceases to be true, some sanity checks are needed here.
int chainID = nhc.getChainID();
auto nAtoms = nhc.getThermostatedAtoms().size();
auto nPairs = nhc.getThermostatedPairs().size();
if (nAtoms) {
scaleAtomsVelocitiesKernel.setArg<cl::Buffer>(0, scaleFactorBuffer.getDeviceBuffer());
scaleAtomsVelocitiesKernel.setArg<cl_int>(1, nAtoms);
scaleAtomsVelocitiesKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
scaleAtomsVelocitiesKernel.setArg<cl::Buffer>(3, atomlists[chainID].getDeviceBuffer());
cl.executeKernel(scaleAtomsVelocitiesKernel, nAtoms);
}
if (nPairs) {
scalePairsVelocitiesKernel.setArg<cl::Buffer>(0, scaleFactorBuffer.getDeviceBuffer());
scalePairsVelocitiesKernel.setArg<cl_int>(1, nPairs);
scalePairsVelocitiesKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
scalePairsVelocitiesKernel.setArg<cl::Buffer>(3, pairlists[chainID].getDeviceBuffer());
cl.executeKernel(scalePairsVelocitiesKernel, nPairs);
}
}
void OpenCLApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) { void OpenCLApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
savedPositions.initialize(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedPositions"); savedPositions.initialize(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedPositions");
savedForces.initialize(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedForces"); savedForces.initialize(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedForces");
......
/**
* Perform the first step of Velocity Verlet integration.
*/
__kernel void integrateVelocityVerletPart1(int numAtoms, int numPairs, int paddedNumAtoms, __global const mixed2* restrict dt, __global const real4* restrict posq,
__global const real4* restrict posqCorrection, __global mixed4* restrict velm, __global const real4* restrict force, __global mixed4* restrict posDelta,
__global const int* restrict atomList, __global const int2* restrict pairList) {
const mixed2 stepSize = dt[0];
const mixed dtPos = stepSize.y;
const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
int index = get_global_id(0);
while (index < numAtoms) {
int atom = atomList[index];
mixed4 velocity = velm[atom];
if (velocity.w != 0.0) {
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[atom];
real4 pos2 = posqCorrection[atom];
mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[atom];
#endif
velocity.x += 0.5f * dtVel*force[atom].x*velocity.w;
velocity.y += 0.5f * dtVel*force[atom].y*velocity.w;
velocity.z += 0.5f * dtVel*force[atom].z*velocity.w;
pos.x = velocity.x*dtPos;
pos.y = velocity.y*dtPos;
pos.z = velocity.z*dtPos;
posDelta[atom] = pos;
velm[atom] = velocity;
}
index += get_global_size(0);
}
index = get_global_id(0);
while (index < numPairs){
int atom1 = pairList[index].x;
int atom2 = pairList[index].y;
mixed4 v1 = velm[atom1];
mixed4 v2 = velm[atom2];
mixed m1 = v1.w == 0.0f ? 0.0f : 1.0f / v1.w;
mixed m2 = v2.w == 0.0f ? 0.0f : 1.0f / v2.w;
mixed mass1fract = m1 / (m1 + m2);
mixed mass2fract = m2 / (m1 + m2);
mixed invRedMass = (m1 * m2 != 0.0f) ? (m1 + m2)/(m1 * m2) : 0.0f;
mixed invTotMass = (m1 + m2 != 0.0f) ? 1.0f /(m1 + m2) : 0.0f;
mixed3 comVel;
comVel.x= v1.x*mass1fract + v2.x*mass2fract;
comVel.y= v1.y*mass1fract + v2.y*mass2fract;
comVel.z= v1.z*mass1fract + v2.z*mass2fract;
mixed3 relVel;
relVel.x= v2.x - v1.x;
relVel.y= v2.y - v1.y;
relVel.z= v2.z - v1.z;
mixed3 comFrc;
comFrc.x = force[atom1].x + force[atom2].x;
comFrc.y = force[atom1].y + force[atom2].y;
comFrc.z = force[atom1].z + force[atom2].z;
mixed3 relFrc;
relFrc.x = mass1fract*force[atom2].x - mass2fract*force[atom1].x;
relFrc.y = mass1fract*force[atom2].y - mass2fract*force[atom1].y;
relFrc.z = mass1fract*force[atom2].z - mass2fract*force[atom1].z;
comVel.x += comFrc.x * 0.5f * dtVel * invTotMass;
comVel.y += comFrc.y * 0.5f * dtVel * invTotMass;
comVel.z += comFrc.z * 0.5f * dtVel * invTotMass;
relVel.x += relFrc.x * 0.5f * dtVel * invRedMass;
relVel.y += relFrc.y * 0.5f * dtVel * invRedMass;
relVel.z += relFrc.z * 0.5f * dtVel * invRedMass;
#ifdef USE_MIXED_PRECISION
real4 posv1 = posq[atom1];
real4 posv2 = posq[atom2];
real4 posc1 = posqCorrection[atom1];
real4 posc2 = posqCorrection[atom2];
mixed4 pos1 = (mixed4) (posv1.x+(mixed)posc1.x, posv1.y+(mixed)posc1.y, posv1.z+(mixed)posc1.z, posv1.w);
mixed4 pos2 = (mixed4) (posv2.x+(mixed)posc2.x, posv2.y+(mixed)posc2.y, posv2.z+(mixed)posc2.z, posv2.w);
#else
real4 pos1 = posq[atom1];
real4 pos2 = posq[atom2];
#endif
if (v1.w != 0.0f) {
v1.x = comVel.x - relVel.x*mass2fract;
v1.y = comVel.y - relVel.y*mass2fract;
v1.z = comVel.z - relVel.z*mass2fract;
pos1.x = v1.x*dtPos;
pos1.y = v1.y*dtPos;
pos1.z = v1.z*dtPos;
posDelta[atom1] = pos1;
velm[atom1] = v1;
}
if (v2.w != 0.0f) {
v2.x = comVel.x + relVel.x*mass1fract;
v2.y = comVel.y + relVel.y*mass1fract;
v2.z = comVel.z + relVel.z*mass1fract;
pos2.x = v2.x*dtPos;
pos2.y = v2.y*dtPos;
pos2.z = v2.z*dtPos;
posDelta[atom2] = pos2;
velm[atom2] = v2;
}
index += get_global_size(0);
}
}
/**
* Perform the second step of Velocity Verlet integration.
*/
__kernel void integrateVelocityVerletPart2(int numAtoms, __global mixed2* restrict dt, __global real4* restrict posq,
__global real4* restrict posqCorrection, __global mixed4* restrict velm, __global const mixed4* restrict posDelta) {
mixed2 stepSize = dt[0];
int index = get_global_id(0);
if (index == 0)
dt[0].x = stepSize.y;
while(index < numAtoms) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
mixed4 delta = posDelta[index];
pos.xyz += delta.xyz;
#ifdef USE_MIXED_PRECISION
posq[index] = (real4) ((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
}
index += get_global_size(0);
}
}
/**
* Perform the third step of Velocity Verlet integration.
*/
__kernel void integrateVelocityVerletPart3(int numAtoms, int numPairs, int paddedNumAtoms, __global mixed2* restrict dt, __global real4* restrict posq,
__global real4* restrict posqCorrection, __global mixed4* restrict velm, __global const real4* restrict force, __global const mixed4* restrict posDelta,
__global const int* restrict atomList, __global const int2* __restrict__ pairList) {
mixed2 stepSize = dt[0];
#ifndef SUPPORTS_DOUBLE_PRECISION
double oneOverDt = 1.0/stepSize.y;
#else
float oneOverDt = 1.0f/stepSize.y;
float correction = (1.0f-oneOverDt*stepSize.y)/stepSize.y;
#endif
const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
int index = get_global_id(0);
if (index == 0)
dt[0].x = stepSize.y;
while(index < numAtoms) {
int atom = atomList[index];
mixed4 velocity = velm[atom];
if (velocity.w != 0.0) {
mixed4 deltaXconstrained = posDelta[atom];
velocity.x += 0.5f * dtVel*force[atom].x*velocity.w + (deltaXconstrained.x - velocity.x*stepSize.y)*oneOverDt;
velocity.y += 0.5f * dtVel*force[atom].y*velocity.w + (deltaXconstrained.y - velocity.y*stepSize.y)*oneOverDt;
velocity.z += 0.5f * dtVel*force[atom].z*velocity.w + (deltaXconstrained.z - velocity.z*stepSize.y)*oneOverDt;
#ifdef SUPPORTS_DOUBLE_PRECISION
velocity.x += (deltaXconstrained.x - velocity.x*stepSize.y)*correction;
velocity.y += (deltaXconstrained.y - velocity.y*stepSize.y)*correction;
velocity.z += (deltaXconstrained.z - velocity.z*stepSize.y)*correction;
#endif
velm[atom] = velocity;
}
index += get_global_size(0);
}
index = get_global_id(0);
while(index < numPairs) {
int atom1 = pairList[index].x;
int atom2 = pairList[index].y;
mixed4 v1 = velm[atom1];
mixed4 v2 = velm[atom2];
mixed m1 = v1.w == 0.0f ? 0.0f : 1.0f / v1.w;
mixed m2 = v2.w == 0.0f ? 0.0f : 1.0f / v2.w;
mixed mass1fract = m1 / (m1 + m2);
mixed mass2fract = m2 / (m1 + m2);
mixed invRedMass = (m1 * m2 != 0.0f) ? (m1 + m2)/(m1 * m2) : 0.0f;
mixed invTotMass = (m1 + m2 != 0.0f) ? 1.0f /(m1 + m2) : 0.0f;
mixed3 comVel;
comVel.x= v1.x*mass1fract + v2.x*mass2fract;
comVel.y= v1.y*mass1fract + v2.y*mass2fract;
comVel.z= v1.z*mass1fract + v2.z*mass2fract;
mixed3 relVel;
relVel.x= v2.x - v1.x;
relVel.y= v2.y - v1.y;
relVel.z= v2.z - v1.z;
mixed3 comFrc;
comFrc.x = force[atom1].x + force[atom2].x;
comFrc.y = force[atom1].y + force[atom2].y;
comFrc.z = force[atom1].z + force[atom2].z;
mixed3 relFrc;
relFrc.x = mass1fract*force[atom2].x - mass2fract*force[atom1].x;
relFrc.y = mass1fract*force[atom2].y - mass2fract*force[atom1].y;
relFrc.z = mass1fract*force[atom2].z - mass2fract*force[atom1].z;
comVel.x += comFrc.x * 0.5f * dtVel * invTotMass;
comVel.y += comFrc.y * 0.5f * dtVel * invTotMass;
comVel.z += comFrc.z * 0.5f * dtVel * invTotMass;
relVel.x += relFrc.x * 0.5f * dtVel * invRedMass;
relVel.y += relFrc.y * 0.5f * dtVel * invRedMass;
relVel.z += relFrc.z * 0.5f * dtVel * invRedMass;
if (v1.w != 0.0f) {
mixed4 deltaXconstrained = posDelta[atom1];
v1.x = comVel.x - relVel.x*mass2fract + (deltaXconstrained.x - v1.x*stepSize.y)*oneOverDt;
v1.y = comVel.y - relVel.y*mass2fract + (deltaXconstrained.y - v1.y*stepSize.y)*oneOverDt;
v1.z = comVel.z - relVel.z*mass2fract + (deltaXconstrained.z - v1.z*stepSize.y)*oneOverDt;
#ifdef SUPPORTS_DOUBLE_PRECISION
v1.x += (deltaXconstrained.x - v1.x*stepSize.y)*correction;
v1.y += (deltaXconstrained.y - v1.y*stepSize.y)*correction;
v1.z += (deltaXconstrained.z - v1.z*stepSize.y)*correction;
#endif
velm[atom1] = v1;
}
if (v2.w != 0.0f) {
mixed4 deltaXconstrained = posDelta[atom2];
v2.x = comVel.x + relVel.x*mass1fract + (deltaXconstrained.x - v2.x*stepSize.y)*oneOverDt;
v2.y = comVel.y + relVel.y*mass1fract + (deltaXconstrained.y - v2.y*stepSize.y)*oneOverDt;
v2.z = comVel.z + relVel.z*mass1fract + (deltaXconstrained.z - v2.z*stepSize.y)*oneOverDt;
#ifdef SUPPORTS_DOUBLE_PRECISION
v2.x += (deltaXconstrained.x - v2.x*stepSize.y)*correction;
v2.y += (deltaXconstrained.y - v2.y*stepSize.y)*correction;
v2.z += (deltaXconstrained.z - v2.z*stepSize.y)*correction;
#endif
velm[atom2] = v2;
}
index += get_global_size(0);
}
}
__kernel void integrateVelocityVerletHardWall(int numPairs, __global const float* restrict maxPairDistance,
__global mixed2* restrict dt, __global real4* restrict posq,
__global real4* restrict posqCorrection, __global mixed4* restrict velm,
__global const int2* restrict pairList, __global const float* __restrict__ pairTemperature) {
mixed dtPos = dt[0].y;
mixed maxDelta = (mixed) maxPairDistance[0];
if (maxDelta > 0){
int index = get_global_id(0);
while(index < numPairs) {
const mixed hardWallScale = sqrt( ((mixed) pairTemperature[index]) * ((mixed) BOLTZ));
int2 atom = (int2) (pairList[index].x, pairList[index].y);
#ifdef USE_MIXED_PRECISION
real4 posv1 = posq[atom.x];
real4 posc1 = posqCorrection[atom.x];
mixed4 pos1 = (mixed4) (posv1.x+(mixed)posc1.x, posv1.y+(mixed)posc1.y, posv1.z+(mixed)posc1.z, posv1.w);
real4 posv2 = posq[atom.y];
real4 posc2 = posqCorrection[atom.y];
mixed4 pos2 = (mixed4) (posv2.x+(mixed)posc2.x, posv2.y+(mixed)posc2.y, posv2.z+(mixed)posc2.z, posv2.w);
#else
real4 pos1 = posq[atom.x];
real4 pos2 = posq[atom.y];
#endif
mixed3 delta = (mixed3) (
(mixed) (pos1.x - pos2.x),
(mixed) (pos1.y - pos2.y),
(mixed) (pos1.z - pos2.z)
);
mixed r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
mixed rInv = 1/r;
if (rInv*maxDelta < 1.0) {
// The constraint has been violated, so make the inter-particle distance "bounce"
// off the hard wall.
mixed3 bondDir = (mixed3) (delta.x * rInv, delta.y * rInv, delta.z * rInv);
mixed3 vel1 = (mixed3) (velm[atom.x].x, velm[atom.x].y, velm[atom.x].z);
mixed3 vel2 = (mixed3) (velm[atom.y].x, velm[atom.y].y, velm[atom.y].z);
mixed m1 = velm[atom.x].w != 0.0 ? 1.0/velm[atom.x].w : 0.0;
mixed m2 = velm[atom.y].w != 0.0 ? 1.0/velm[atom.y].w : 0.0;
mixed invTotMass = (m1 + m2 != 0.0) ? 1.0 /(m1 + m2) : 0.0;
mixed deltaR = r-maxDelta;
mixed deltaT = dtPos;
mixed dt = dtPos;
mixed dotvr1 = vel1.x*bondDir.x + vel1.y*bondDir.y + vel1.z*bondDir.z;
mixed3 vb1 = (mixed3) (bondDir.x*dotvr1, bondDir.y*dotvr1, bondDir.z*dotvr1);
mixed3 vp1 = (mixed3) (vel1.x-vb1.x, vel1.y-vb1.y, vel1.z-vb1.z);
if (m2 == 0) {
// The parent particle is massless, so move only the Drude particle.
if (dotvr1 != 0.0)
deltaT = deltaR/fabs(dotvr1);
if (deltaT > dtPos)
deltaT = dtPos;
dotvr1 = -dotvr1*hardWallScale/(fabs(dotvr1)*sqrt(m1));
mixed dr = -deltaR + deltaT*dotvr1;
pos1.x += bondDir.x*dr;
pos1.y += bondDir.y*dr;
pos1.z += bondDir.z*dr;
velm[atom.x] = (mixed4) (vp1.x + bondDir.x*dotvr1, vp1.y + bondDir.y*dotvr1, vp1.z + bondDir.z*dotvr1, velm[atom.x].w);
#ifdef USE_MIXED_PRECISION
posq[atom.x] = (real4) ((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
posqCorrection[atom.x] = (real4) (pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
#else
posq[atom.x] = pos1;
#endif
}
else {
// Move both particles.
mixed dotvr2 = vel2.x*bondDir.x + vel2.y*bondDir.y + vel2.z*bondDir.z;
mixed3 vb2 = (mixed3) (bondDir.x*dotvr2, bondDir.y*dotvr2, bondDir.z*dotvr2);
mixed3 vp2 = (mixed3) (vel2.x-vb2.x, vel2.y-vb2.y, vel2.z-vb2.z);
mixed vbCMass = (m1*dotvr1 + m2*dotvr2)*invTotMass;
dotvr1 -= vbCMass;
dotvr2 -= vbCMass;
if (dotvr1 != dotvr2)
deltaT = deltaR/fabs(dotvr1-dotvr2);
if (deltaT > dt)
deltaT = dt;
mixed vBond = hardWallScale/sqrt(m1);
dotvr1 = -dotvr1*vBond*m2*invTotMass/fabs(dotvr1);
dotvr2 = -dotvr2*vBond*m1*invTotMass/fabs(dotvr2);
mixed dr1 = -deltaR*m2*invTotMass + deltaT*dotvr1;
mixed dr2 = deltaR*m1*invTotMass + deltaT*dotvr2;
dotvr1 += vbCMass;
dotvr2 += vbCMass;
pos1.x += bondDir.x*dr1;
pos1.y += bondDir.y*dr1;
pos1.z += bondDir.z*dr1;
pos2.x += bondDir.x*dr2;
pos2.y += bondDir.y*dr2;
pos2.z += bondDir.z*dr2;
velm[atom.x] = (mixed4) (vp1.x + bondDir.x*dotvr1, vp1.y + bondDir.y*dotvr1, vp1.z + bondDir.z*dotvr1, velm[atom.x].w);
velm[atom.y] = (mixed4) (vp2.x + bondDir.x*dotvr2, vp2.y + bondDir.y*dotvr2, vp2.z + bondDir.z*dotvr2, velm[atom.y].w);
#ifdef USE_MIXED_PRECISION
posq[atom.x] = (real4) ((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
posq[atom.y] = (real4) ((real) pos2.x, (real) pos2.y, (real) pos2.z, (real) pos2.w);
posqCorrection[atom.x] = (real4) (pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
posqCorrection[atom.y] = (real4) (pos2.x-(real) pos2.x, pos2.y-(real) pos2.y, pos2.z-(real) pos2.z, 0);
#else
posq[atom.x] = pos1;
posq[atom.y] = pos2;
#endif
}
}
index += get_global_size(0);
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment