/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2025 Stanford University and the Authors. * * Authors: Peter Eastman * * Contributors: * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * * by the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU Lesser General Public License for more details. * * * * You should have received a copy of the GNU Lesser General Public License * * along with this program. If not, see . * * -------------------------------------------------------------------------- */ #include "openmm/common/CommonIntegrateQTBStepKernel.h" #include "openmm/common/CommonKernelUtilities.h" #include "openmm/common/ContextSelector.h" #include "openmm/Context.h" #include "openmm/internal/ContextImpl.h" #include "openmm/internal/QTBIntegratorUtilities.h" #include "CommonKernelSources.h" #include "SimTKOpenMMUtilities.h" using namespace OpenMM; using namespace std; void CommonIntegrateQTBStepKernel::initialize(const System& system, const QTBIntegrator& integrator) { cc.initializeContexts(); ContextSelector selector(cc); cc.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); int numParticles = system.getNumParticles(); dt = integrator.getStepSize(); friction = integrator.getFriction(); segmentLength = (int) ceil(integrator.getSegmentLength()/integrator.getStepSize()); numFreq = (3*segmentLength+1)/2; noise.initialize(cc, 3*3*segmentLength*numParticles, "noise"); SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed()); vector noiseVec(noise.getSize()); for (int i = 0; i < noiseVec.size(); i++) noiseVec[i] = (float) SimTKOpenMMUtilities::getNormallyDistributedRandomNumber(); noise.upload(noiseVec); int elementSize = (cc.getUseMixedPrecision() || cc.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); randomForce.initialize(cc, 3*segmentLength*numParticles, elementSize, "randomForce"); segmentVelocity.initialize(cc, 3*segmentLength*numParticles, elementSize, "segmentVelocity"); oldDelta.initialize(cc, cc.getPaddedNumAtoms(), 4*elementSize, "oldDelta"); thetad.initialize(cc, numFreq, elementSize, "thetad"); workspace.initialize(cc, 18*segmentLength*cc.getNumThreadBlocks(), elementSize, "workspace"); cutoffFunction.initialize(cc, numFreq, elementSize, "cutoffFunction"); vector cf(numFreq); double cutoff = integrator.getCutoffFrequency(); double cutoffWidth = cutoff/100; for (int i = 0; i < numFreq; i++) { double w = M_PI*i/(numFreq*dt); cf[i] = 1.0/(1.0+exp((w-cutoff)/cutoffWidth)); } cutoffFunction.upload(cf, true); vector typeMassVec, typeAdaptationRateVec; vector > typeParticles; QTBIntegratorUtilities::findTypes(system, integrator, particleTypeVec, typeParticles, typeMassVec, typeAdaptationRateVec); particleType.initialize(cc, particleTypeVec.size(), "particleType"); particleType.upload(particleTypeVec); typeAdaptationRate.initialize(cc, typeAdaptationRateVec.size(), "typeAdaptationRate"); typeAdaptationRate.upload(typeAdaptationRateVec, true); vector typeParticleCountVec(typeParticles.size()); for (int i = 0; i < typeParticleCountVec.size(); i++) typeParticleCountVec[i] = typeParticles[i].size(); typeParticleCount.initialize(cc, typeParticleCountVec.size(), "typeParticleCount"); typeParticleCount.upload(typeParticleCountVec); numTypes = typeParticles.size(); adaptedFriction.initialize(cc, numFreq*numTypes, elementSize, "adaptedFriction"); vector adaptedFrictionVec(adaptedFriction.getSize(), friction); adaptedFriction.upload(adaptedFrictionVec, true); dfdt.initialize(cc, adaptedFriction.getSize(), sizeof(long long), "dfdt"); // Create the kernels. map defines, replacements; defines["M_PI"] = cc.doubleToString(M_PI); int outputIndex; replacements["FFT_FORWARD"] = createFFT(3*segmentLength, 0, outputIndex, true); replacements["RECIP_DATA"] = (outputIndex == 0 ? "data0" : "data1"); replacements["FFT_BACKWARD"] = createFFT(3*segmentLength, outputIndex, outputIndex, false); replacements["ADAPTATION_FFT"] = createFFT(3*segmentLength, 0, outputIndex, true); replacements["ADAPTATION_RECIP"] = (outputIndex == 0 ? "data0" : "data1"); ComputeProgram program = cc.compileProgram(cc.replaceStrings(CommonKernelSources::qtb, replacements), defines); kernel1 = program->createKernel("integrateQTBPart1"); kernel2 = program->createKernel("integrateQTBPart2"); kernel3 = program->createKernel("integrateQTBPart3"); noiseKernel = program->createKernel("generateNoise"); forceKernel = program->createKernel("generateRandomForce"); adapt1Kernel = program->createKernel("adaptFrictionPart1"); adapt2Kernel = program->createKernel("adaptFrictionPart2"); stepIndex = 0; prevTemp = -1.0; } void CommonIntegrateQTBStepKernel::execute(ContextImpl& context, const QTBIntegrator& integrator) { ContextSelector selector(cc); IntegrationUtilities& integration = cc.getIntegrationUtilities(); int numAtoms = cc.getNumAtoms(); int paddedNumAtoms = cc.getPaddedNumAtoms(); double temperature = integrator.getTemperature(); if (!hasInitializedKernels) { hasInitializedKernels = true; bool useDouble = cc.getUseDoublePrecision() || cc.getUseMixedPrecision(); kernel1->addArg(numAtoms); kernel1->addArg(paddedNumAtoms); if (useDouble) kernel1->addArg(dt); else kernel1->addArg((float) dt); kernel1->addArg(cc.getVelm()); kernel1->addArg(cc.getLongForceBuffer()); kernel1->addArg(cc.getAtomIndexArray()); kernel2->addArg(numAtoms); if (useDouble) { kernel2->addArg(dt); kernel2->addArg(friction); } else { kernel2->addArg((float) dt); kernel2->addArg((float) friction); } kernel2->addArg(); kernel2->addArg(cc.getVelm()); kernel2->addArg(integration.getPosDelta()); kernel2->addArg(oldDelta); kernel2->addArg(randomForce); kernel2->addArg(segmentVelocity); kernel2->addArg(cc.getAtomIndexArray()); kernel3->addArg(numAtoms); if (useDouble) kernel3->addArg(dt); else kernel3->addArg((float) dt); kernel3->addArg(cc.getPosq()); kernel3->addArg(cc.getVelm()); kernel3->addArg(integration.getPosDelta()); kernel3->addArg(oldDelta); if (cc.getUseMixedPrecision()) kernel3->addArg(cc.getPosqCorrection()); noiseKernel->addArg(numAtoms); noiseKernel->addArg(segmentLength); noiseKernel->addArg(noise); noiseKernel->addArg(integration.getRandom()); noiseKernel->addArg(); // Random index will be set just before it is executed. forceKernel->addArg(numAtoms); forceKernel->addArg(segmentLength); if (useDouble) { forceKernel->addArg(dt); forceKernel->addArg(friction); } else { forceKernel->addArg((float) dt); forceKernel->addArg((float) friction); } forceKernel->addArg(noise); forceKernel->addArg(randomForce); forceKernel->addArg(cc.getVelm()); forceKernel->addArg(thetad); forceKernel->addArg(cutoffFunction); forceKernel->addArg(particleType); forceKernel->addArg(adaptedFriction); forceKernel->addArg(workspace); adapt1Kernel->addArg(numAtoms); adapt1Kernel->addArg(segmentLength); adapt1Kernel->addArg(cc.getVelm()); adapt1Kernel->addArg(particleType); adapt1Kernel->addArg(randomForce); adapt1Kernel->addArg(segmentVelocity); adapt1Kernel->addArg(adaptedFriction); adapt1Kernel->addArg(dfdt); adapt1Kernel->addArg(workspace); adapt2Kernel->addArg(numTypes); adapt2Kernel->addArg(segmentLength); if (useDouble) adapt2Kernel->addArg(dt); else adapt2Kernel->addArg((float) dt); adapt2Kernel->addArg(typeParticleCount); adapt2Kernel->addArg(typeAdaptationRate); adapt2Kernel->addArg(adaptedFriction); adapt2Kernel->addArg(dfdt); } cc.getIntegrationUtilities().setNextStepSize(dt); // Update the random force at the start of each segment. if (stepIndex%segmentLength == 0) { if (temperature != prevTemp) { vector thetaVec, thetadVec; QTBIntegratorUtilities::calculateSpectrum(temperature, friction, dt, numFreq, thetaVec, thetadVec, cc.getThreadPool()); thetad.upload(thetadVec, true); prevTemp = temperature; } cc.clearBuffer(dfdt); adapt1Kernel->execute(3*numAtoms*128, 128); adapt2Kernel->execute(numTypes*128, 128); noiseKernel->setArg(4, integration.prepareRandomNumbers(3*numAtoms*segmentLength/4+1)); noiseKernel->execute(3*numAtoms*128, 128); forceKernel->execute(3*numAtoms*128, 128); stepIndex = 0; } // Perform the integration. kernel2->setArg(3, stepIndex); kernel1->execute(numAtoms); integration.applyVelocityConstraints(integrator.getConstraintTolerance()); kernel2->execute(numAtoms); integration.applyConstraints(integrator.getConstraintTolerance()); kernel3->execute(numAtoms); integration.computeVirtualSites(); // Update the time and step count. cc.setTime(cc.getTime()+dt); cc.setStepCount(cc.getStepCount()+1); stepIndex++; cc.reorderAtoms(); // Reduce UI lag. flushPeriodically(cc); } double CommonIntegrateQTBStepKernel::computeKineticEnergy(ContextImpl& context, const QTBIntegrator& integrator) { return cc.getIntegrationUtilities().computeKineticEnergy(0.0); } void CommonIntegrateQTBStepKernel::getAdaptedFriction(ContextImpl& context, int particle, std::vector& friction) const { ASSERT_VALID_INDEX(particle, particleTypeVec); int type = particleTypeVec[particle]; friction.resize(numFreq); ContextSelector selector(cc); if (cc.getUseMixedPrecision() || cc.getUseDoublePrecision()) { vector adaptedFrictionVec; adaptedFriction.download(adaptedFrictionVec); for (int i = 0; i < numFreq; i++) friction[i] = adaptedFrictionVec[type*numFreq+i]; } else { vector adaptedFrictionVec; adaptedFriction.download(adaptedFrictionVec); for (int i = 0; i < numFreq; i++) friction[i] = adaptedFrictionVec[type*numFreq+i]; } } void CommonIntegrateQTBStepKernel::setAdaptedFriction(ContextImpl& context, int particle, const std::vector& friction) { ASSERT_VALID_INDEX(particle, particleTypeVec); int type = particleTypeVec[particle]; ContextSelector selector(cc); if (cc.getUseMixedPrecision() || cc.getUseDoublePrecision()) { vector adaptedFrictionVec; adaptedFriction.download(adaptedFrictionVec); for (int i = 0; i < numFreq; i++) adaptedFrictionVec[type*numFreq+i] = friction[i]; adaptedFriction.upload(adaptedFrictionVec); } else { vector adaptedFrictionVec; adaptedFriction.download(adaptedFrictionVec); for (int i = 0; i < numFreq; i++) adaptedFrictionVec[type*numFreq+i] = friction[i]; adaptedFriction.upload(adaptedFrictionVec); } } void CommonIntegrateQTBStepKernel::createCheckpoint(ContextImpl& context, ostream& stream) const { ContextSelector selector(cc); stream.write((char*) &stepIndex, sizeof(int)); vector f; noise.download(f); stream.write((char*) f.data(), sizeof(float)*f.size()); if (cc.getUseMixedPrecision() || cc.getUseDoublePrecision()) { vector d; randomForce.download(d); stream.write((char*) d.data(), sizeof(double)*d.size()); segmentVelocity.download(d); stream.write((char*) d.data(), sizeof(double)*d.size()); adaptedFriction.download(d); stream.write((char*) d.data(), sizeof(double)*d.size()); } else { randomForce.download(f); stream.write((char*) f.data(), sizeof(float)*f.size()); segmentVelocity.download(f); stream.write((char*) f.data(), sizeof(float)*f.size()); adaptedFriction.download(f); stream.write((char*) f.data(), sizeof(float)*f.size()); } } void CommonIntegrateQTBStepKernel::loadCheckpoint(ContextImpl& context, istream& stream) { ContextSelector selector(cc); stream.read((char*) &stepIndex, sizeof(int)); vector f(noise.getSize()); stream.read((char*) f.data(), sizeof(float)*noise.getSize()); noise.upload(f); if (cc.getUseMixedPrecision() || cc.getUseDoublePrecision()) { vector d; d.resize(randomForce.getSize()); stream.read((char*) d.data(), sizeof(double)*d.size()); randomForce.upload(d); d.resize(segmentVelocity.getSize()); stream.read((char*) d.data(), sizeof(double)*d.size()); segmentVelocity.upload(d); d.resize(adaptedFriction.getSize()); stream.read((char*) d.data(), sizeof(double)*d.size()); adaptedFriction.upload(d); } else { f.resize(randomForce.getSize()); stream.read((char*) f.data(), sizeof(float)*f.size()); randomForce.upload(f); f.resize(segmentVelocity.getSize()); stream.read((char*) f.data(), sizeof(float)*f.size()); segmentVelocity.upload(f); f.resize(adaptedFriction.getSize()); stream.read((char*) f.data(), sizeof(float)*f.size()); adaptedFriction.upload(f); } } string CommonIntegrateQTBStepKernel::createFFT(int size, int inputIndex, int& outputIndex, bool forward) { stringstream source; int stage = 0; int L = size; int m = 1; string sign = (forward ? "1" : "-1"); string mult = (forward ? "multiplyComplex" : "multiplyComplexConj"); // Factor size, generating an appropriate block of code for each factor. while (L > 1) { int input = (inputIndex+stage)%2; int output = 1-input; int radix; if (L%7 == 0) radix = 7; else if (L%5 == 0) radix = 5; else if (L%4 == 0) radix = 4; else if (L%3 == 0) radix = 3; else if (L%2 == 0) radix = 2; else throw OpenMMException("QTBIntegrator: Number of steps in a segment must be a multiple of powers of 2, 3, 5, and 7"); source<<"{\n"; L = L/radix; source<<"// Pass "<<(stage+1)<<" (radix "<