/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2025 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "openmm/common/CommonIntegrateQTBStepKernel.h"
#include "openmm/common/CommonKernelUtilities.h"
#include "openmm/common/ContextSelector.h"
#include "openmm/Context.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/QTBIntegratorUtilities.h"
#include "CommonKernelSources.h"
#include "SimTKOpenMMUtilities.h"
using namespace OpenMM;
using namespace std;
void CommonIntegrateQTBStepKernel::initialize(const System& system, const QTBIntegrator& integrator) {
cc.initializeContexts();
ContextSelector selector(cc);
cc.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
int numParticles = system.getNumParticles();
dt = integrator.getStepSize();
friction = integrator.getFriction();
segmentLength = (int) ceil(integrator.getSegmentLength()/integrator.getStepSize());
numFreq = (3*segmentLength+1)/2;
noise.initialize(cc, 3*3*segmentLength*numParticles, "noise");
SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
vector noiseVec(noise.getSize());
for (int i = 0; i < noiseVec.size(); i++)
noiseVec[i] = (float) SimTKOpenMMUtilities::getNormallyDistributedRandomNumber();
noise.upload(noiseVec);
int elementSize = (cc.getUseMixedPrecision() || cc.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
randomForce.initialize(cc, 3*segmentLength*numParticles, elementSize, "randomForce");
segmentVelocity.initialize(cc, 3*segmentLength*numParticles, elementSize, "segmentVelocity");
oldDelta.initialize(cc, cc.getPaddedNumAtoms(), 4*elementSize, "oldDelta");
thetad.initialize(cc, numFreq, elementSize, "thetad");
workspace.initialize(cc, 18*segmentLength*cc.getNumThreadBlocks(), elementSize, "workspace");
cutoffFunction.initialize(cc, numFreq, elementSize, "cutoffFunction");
vector cf(numFreq);
double cutoff = integrator.getCutoffFrequency();
double cutoffWidth = cutoff/100;
for (int i = 0; i < numFreq; i++) {
double w = M_PI*i/(numFreq*dt);
cf[i] = 1.0/(1.0+exp((w-cutoff)/cutoffWidth));
}
cutoffFunction.upload(cf, true);
vector typeMassVec, typeAdaptationRateVec;
vector > typeParticles;
QTBIntegratorUtilities::findTypes(system, integrator, particleTypeVec, typeParticles, typeMassVec, typeAdaptationRateVec);
particleType.initialize(cc, particleTypeVec.size(), "particleType");
particleType.upload(particleTypeVec);
typeAdaptationRate.initialize(cc, typeAdaptationRateVec.size(), "typeAdaptationRate");
typeAdaptationRate.upload(typeAdaptationRateVec, true);
vector typeParticleCountVec(typeParticles.size());
for (int i = 0; i < typeParticleCountVec.size(); i++)
typeParticleCountVec[i] = typeParticles[i].size();
typeParticleCount.initialize(cc, typeParticleCountVec.size(), "typeParticleCount");
typeParticleCount.upload(typeParticleCountVec);
numTypes = typeParticles.size();
adaptedFriction.initialize(cc, numFreq*numTypes, elementSize, "adaptedFriction");
vector adaptedFrictionVec(adaptedFriction.getSize(), friction);
adaptedFriction.upload(adaptedFrictionVec, true);
dfdt.initialize(cc, adaptedFriction.getSize(), sizeof(long long), "dfdt");
// Create the kernels.
map defines, replacements;
defines["M_PI"] = cc.doubleToString(M_PI);
int outputIndex;
replacements["FFT_FORWARD"] = createFFT(3*segmentLength, 0, outputIndex, true);
replacements["RECIP_DATA"] = (outputIndex == 0 ? "data0" : "data1");
replacements["FFT_BACKWARD"] = createFFT(3*segmentLength, outputIndex, outputIndex, false);
replacements["ADAPTATION_FFT"] = createFFT(3*segmentLength, 0, outputIndex, true);
replacements["ADAPTATION_RECIP"] = (outputIndex == 0 ? "data0" : "data1");
ComputeProgram program = cc.compileProgram(cc.replaceStrings(CommonKernelSources::qtb, replacements), defines);
kernel1 = program->createKernel("integrateQTBPart1");
kernel2 = program->createKernel("integrateQTBPart2");
kernel3 = program->createKernel("integrateQTBPart3");
noiseKernel = program->createKernel("generateNoise");
forceKernel = program->createKernel("generateRandomForce");
adapt1Kernel = program->createKernel("adaptFrictionPart1");
adapt2Kernel = program->createKernel("adaptFrictionPart2");
stepIndex = 0;
prevTemp = -1.0;
}
void CommonIntegrateQTBStepKernel::execute(ContextImpl& context, const QTBIntegrator& integrator) {
ContextSelector selector(cc);
IntegrationUtilities& integration = cc.getIntegrationUtilities();
int numAtoms = cc.getNumAtoms();
int paddedNumAtoms = cc.getPaddedNumAtoms();
double temperature = integrator.getTemperature();
if (!hasInitializedKernels) {
hasInitializedKernels = true;
bool useDouble = cc.getUseDoublePrecision() || cc.getUseMixedPrecision();
kernel1->addArg(numAtoms);
kernel1->addArg(paddedNumAtoms);
if (useDouble)
kernel1->addArg(dt);
else
kernel1->addArg((float) dt);
kernel1->addArg(cc.getVelm());
kernel1->addArg(cc.getLongForceBuffer());
kernel1->addArg(cc.getAtomIndexArray());
kernel2->addArg(numAtoms);
if (useDouble) {
kernel2->addArg(dt);
kernel2->addArg(friction);
}
else {
kernel2->addArg((float) dt);
kernel2->addArg((float) friction);
}
kernel2->addArg();
kernel2->addArg(cc.getVelm());
kernel2->addArg(integration.getPosDelta());
kernel2->addArg(oldDelta);
kernel2->addArg(randomForce);
kernel2->addArg(segmentVelocity);
kernel2->addArg(cc.getAtomIndexArray());
kernel3->addArg(numAtoms);
if (useDouble)
kernel3->addArg(dt);
else
kernel3->addArg((float) dt);
kernel3->addArg(cc.getPosq());
kernel3->addArg(cc.getVelm());
kernel3->addArg(integration.getPosDelta());
kernel3->addArg(oldDelta);
if (cc.getUseMixedPrecision())
kernel3->addArg(cc.getPosqCorrection());
noiseKernel->addArg(numAtoms);
noiseKernel->addArg(segmentLength);
noiseKernel->addArg(noise);
noiseKernel->addArg(integration.getRandom());
noiseKernel->addArg(); // Random index will be set just before it is executed.
forceKernel->addArg(numAtoms);
forceKernel->addArg(segmentLength);
if (useDouble) {
forceKernel->addArg(dt);
forceKernel->addArg(friction);
}
else {
forceKernel->addArg((float) dt);
forceKernel->addArg((float) friction);
}
forceKernel->addArg(noise);
forceKernel->addArg(randomForce);
forceKernel->addArg(cc.getVelm());
forceKernel->addArg(thetad);
forceKernel->addArg(cutoffFunction);
forceKernel->addArg(particleType);
forceKernel->addArg(adaptedFriction);
forceKernel->addArg(workspace);
adapt1Kernel->addArg(numAtoms);
adapt1Kernel->addArg(segmentLength);
adapt1Kernel->addArg(cc.getVelm());
adapt1Kernel->addArg(particleType);
adapt1Kernel->addArg(randomForce);
adapt1Kernel->addArg(segmentVelocity);
adapt1Kernel->addArg(adaptedFriction);
adapt1Kernel->addArg(dfdt);
adapt1Kernel->addArg(workspace);
adapt2Kernel->addArg(numTypes);
adapt2Kernel->addArg(segmentLength);
if (useDouble)
adapt2Kernel->addArg(dt);
else
adapt2Kernel->addArg((float) dt);
adapt2Kernel->addArg(typeParticleCount);
adapt2Kernel->addArg(typeAdaptationRate);
adapt2Kernel->addArg(adaptedFriction);
adapt2Kernel->addArg(dfdt);
}
cc.getIntegrationUtilities().setNextStepSize(dt);
// Update the random force at the start of each segment.
if (stepIndex%segmentLength == 0) {
if (temperature != prevTemp) {
vector thetaVec, thetadVec;
QTBIntegratorUtilities::calculateSpectrum(temperature, friction, dt, numFreq, thetaVec, thetadVec, cc.getThreadPool());
thetad.upload(thetadVec, true);
prevTemp = temperature;
}
cc.clearBuffer(dfdt);
adapt1Kernel->execute(3*numAtoms*128, 128);
adapt2Kernel->execute(numTypes*128, 128);
noiseKernel->setArg(4, integration.prepareRandomNumbers(3*numAtoms*segmentLength/4+1));
noiseKernel->execute(3*numAtoms*128, 128);
forceKernel->execute(3*numAtoms*128, 128);
stepIndex = 0;
}
// Perform the integration.
kernel2->setArg(3, stepIndex);
kernel1->execute(numAtoms);
integration.applyVelocityConstraints(integrator.getConstraintTolerance());
kernel2->execute(numAtoms);
integration.applyConstraints(integrator.getConstraintTolerance());
kernel3->execute(numAtoms);
integration.computeVirtualSites();
// Update the time and step count.
cc.setTime(cc.getTime()+dt);
cc.setStepCount(cc.getStepCount()+1);
stepIndex++;
cc.reorderAtoms();
// Reduce UI lag.
flushPeriodically(cc);
}
double CommonIntegrateQTBStepKernel::computeKineticEnergy(ContextImpl& context, const QTBIntegrator& integrator) {
return cc.getIntegrationUtilities().computeKineticEnergy(0.0);
}
void CommonIntegrateQTBStepKernel::getAdaptedFriction(ContextImpl& context, int particle, std::vector& friction) const {
ASSERT_VALID_INDEX(particle, particleTypeVec);
int type = particleTypeVec[particle];
friction.resize(numFreq);
ContextSelector selector(cc);
if (cc.getUseMixedPrecision() || cc.getUseDoublePrecision()) {
vector adaptedFrictionVec;
adaptedFriction.download(adaptedFrictionVec);
for (int i = 0; i < numFreq; i++)
friction[i] = adaptedFrictionVec[type*numFreq+i];
}
else {
vector adaptedFrictionVec;
adaptedFriction.download(adaptedFrictionVec);
for (int i = 0; i < numFreq; i++)
friction[i] = adaptedFrictionVec[type*numFreq+i];
}
}
void CommonIntegrateQTBStepKernel::setAdaptedFriction(ContextImpl& context, int particle, const std::vector& friction) {
ASSERT_VALID_INDEX(particle, particleTypeVec);
int type = particleTypeVec[particle];
ContextSelector selector(cc);
if (cc.getUseMixedPrecision() || cc.getUseDoublePrecision()) {
vector adaptedFrictionVec;
adaptedFriction.download(adaptedFrictionVec);
for (int i = 0; i < numFreq; i++)
adaptedFrictionVec[type*numFreq+i] = friction[i];
adaptedFriction.upload(adaptedFrictionVec);
}
else {
vector adaptedFrictionVec;
adaptedFriction.download(adaptedFrictionVec);
for (int i = 0; i < numFreq; i++)
adaptedFrictionVec[type*numFreq+i] = friction[i];
adaptedFriction.upload(adaptedFrictionVec);
}
}
void CommonIntegrateQTBStepKernel::createCheckpoint(ContextImpl& context, ostream& stream) const {
ContextSelector selector(cc);
stream.write((char*) &stepIndex, sizeof(int));
vector f;
noise.download(f);
stream.write((char*) f.data(), sizeof(float)*f.size());
if (cc.getUseMixedPrecision() || cc.getUseDoublePrecision()) {
vector d;
randomForce.download(d);
stream.write((char*) d.data(), sizeof(double)*d.size());
segmentVelocity.download(d);
stream.write((char*) d.data(), sizeof(double)*d.size());
adaptedFriction.download(d);
stream.write((char*) d.data(), sizeof(double)*d.size());
}
else {
randomForce.download(f);
stream.write((char*) f.data(), sizeof(float)*f.size());
segmentVelocity.download(f);
stream.write((char*) f.data(), sizeof(float)*f.size());
adaptedFriction.download(f);
stream.write((char*) f.data(), sizeof(float)*f.size());
}
}
void CommonIntegrateQTBStepKernel::loadCheckpoint(ContextImpl& context, istream& stream) {
ContextSelector selector(cc);
stream.read((char*) &stepIndex, sizeof(int));
vector f(noise.getSize());
stream.read((char*) f.data(), sizeof(float)*noise.getSize());
noise.upload(f);
if (cc.getUseMixedPrecision() || cc.getUseDoublePrecision()) {
vector d;
d.resize(randomForce.getSize());
stream.read((char*) d.data(), sizeof(double)*d.size());
randomForce.upload(d);
d.resize(segmentVelocity.getSize());
stream.read((char*) d.data(), sizeof(double)*d.size());
segmentVelocity.upload(d);
d.resize(adaptedFriction.getSize());
stream.read((char*) d.data(), sizeof(double)*d.size());
adaptedFriction.upload(d);
}
else {
f.resize(randomForce.getSize());
stream.read((char*) f.data(), sizeof(float)*f.size());
randomForce.upload(f);
f.resize(segmentVelocity.getSize());
stream.read((char*) f.data(), sizeof(float)*f.size());
segmentVelocity.upload(f);
f.resize(adaptedFriction.getSize());
stream.read((char*) f.data(), sizeof(float)*f.size());
adaptedFriction.upload(f);
}
}
string CommonIntegrateQTBStepKernel::createFFT(int size, int inputIndex, int& outputIndex, bool forward) {
stringstream source;
int stage = 0;
int L = size;
int m = 1;
string sign = (forward ? "1" : "-1");
string mult = (forward ? "multiplyComplex" : "multiplyComplexConj");
// Factor size, generating an appropriate block of code for each factor.
while (L > 1) {
int input = (inputIndex+stage)%2;
int output = 1-input;
int radix;
if (L%7 == 0)
radix = 7;
else if (L%5 == 0)
radix = 5;
else if (L%4 == 0)
radix = 4;
else if (L%3 == 0)
radix = 3;
else if (L%2 == 0)
radix = 2;
else
throw OpenMMException("QTBIntegrator: Number of steps in a segment must be a multiple of powers of 2, 3, 5, and 7");
source<<"{\n";
L = L/radix;
source<<"// Pass "<<(stage+1)<<" (radix "<