/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2011-2021 Stanford University and the Authors. * * Authors: Peter Eastman * * Contributors: * * * * Permission is hereby granted, free of charge, to any person obtaining a * * copy of this software and associated documentation files (the "Software"), * * to deal in the Software without restriction, including without limitation * * the rights to use, copy, modify, merge, publish, distribute, sublicense, * * and/or sell copies of the Software, and to permit persons to whom the * * Software is furnished to do so, subject to the following conditions: * * * * The above copyright notice and this permission notice shall be included in * * all copies or substantial portions of the Software. * * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * * USE OR OTHER DEALINGS IN THE SOFTWARE. * * -------------------------------------------------------------------------- */ #include "CommonRpmdKernels.h" #include "CommonRpmdKernelSources.h" #include "openmm/internal/ContextImpl.h" #include "openmm/common/ContextSelector.h" #include "openmm/common/IntegrationUtilities.h" #include "openmm/common/ExpressionUtilities.h" #include "openmm/common/NonbondedUtilities.h" #include "SimTKOpenMMRealType.h" using namespace OpenMM; using namespace std; /** * Select a size for an FFT that is a multiple of 2, 3, 5, and 7. */ static int findFFTDimension(int minimum) { if (minimum < 1) return 1; while (true) { // Attempt to factor the current value. int unfactored = minimum; for (int factor = 2; factor < 8; factor++) { while (unfactored > 1 && unfactored%factor == 0) unfactored /= factor; } if (unfactored == 1) return minimum; minimum++; } } void CommonIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) { cc.initializeContexts(); ContextSelector selector(cc); numCopies = integrator.getNumCopies(); numParticles = system.getNumParticles(); workgroupSize = numCopies; if (numCopies != findFFTDimension(numCopies)) throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5."); int paddedParticles = cc.getPaddedNumAtoms(); bool useDoublePrecision = (cc.getUseDoublePrecision() || cc.getUseMixedPrecision()); int elementSize = (useDoublePrecision ? sizeof(mm_double4) : sizeof(mm_float4)); forces.initialize(cc, numCopies*paddedParticles*3, "rpmdForces"); positions.initialize(cc, numCopies*paddedParticles, elementSize, "rpmdPositions"); velocities.initialize(cc, numCopies*paddedParticles, elementSize, "rpmdVelocities"); cc.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed()); // Fill in the posq and velm arrays with safe values to avoid a risk of nans. if (useDoublePrecision) { vector temp(positions.getSize()); for (int i = 0; i < positions.getSize(); i++) temp[i] = mm_double4(0, 0, 0, 0); positions.upload(temp); for (int i = 0; i < velocities.getSize(); i++) temp[i] = mm_double4(0, 0, 0, 1); velocities.upload(temp); } else { vector temp(positions.getSize()); for (int i = 0; i < positions.getSize(); i++) temp[i] = mm_float4(0, 0, 0, 0); positions.upload(temp); for (int i = 0; i < velocities.getSize(); i++) temp[i] = mm_float4(0, 0, 0, 1); velocities.upload(temp); } // Build a list of contractions. groupsNotContracted = -1; const map& contractions = integrator.getContractions(); int maxContractedCopies = 0; for (auto& c : contractions) { int group = c.first; int copies = c.second; if (group < 0 || group > 31) throw OpenMMException("RPMDIntegrator: Force group must be between 0 and 31"); if (copies < 0 || copies > numCopies) throw OpenMMException("RPMDIntegrator: Number of copies for contraction cannot be greater than the total number of copies being simulated"); if (copies != findFFTDimension(copies)) throw OpenMMException("RPMDIntegrator: Number of copies for contraction must be a multiple of powers of 2, 3, and 5."); if (copies != numCopies) { if (groupsByCopies.find(copies) == groupsByCopies.end()) { groupsByCopies[copies] = 1< maxContractedCopies) maxContractedCopies = copies; } else groupsByCopies[copies] |= 1< 0) { contractedForces.initialize(cc, maxContractedCopies*paddedParticles*3, "rpmdContractedForces"); contractedPositions.initialize(cc, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions"); } // Create kernels. map defines; defines["NUM_ATOMS"] = cc.intToString(cc.getNumAtoms()); defines["PADDED_NUM_ATOMS"] = cc.intToString(cc.getPaddedNumAtoms()); defines["NUM_COPIES"] = cc.intToString(numCopies); defines["THREAD_BLOCK_SIZE"] = cc.intToString(workgroupSize); defines["HBAR"] = cc.doubleToString(1.054571628e-34*AVOGADRO/(1000*1e-12)); defines["SCALE"] = cc.doubleToString(1.0/sqrt((double) numCopies)); defines["M_PI"] = cc.doubleToString(M_PI); map replacements; replacements["FFT_Q_FORWARD"] = createFFT(numCopies, "q", true); replacements["FFT_Q_BACKWARD"] = createFFT(numCopies, "q", false); replacements["FFT_V_FORWARD"] = createFFT(numCopies, "v", true); replacements["FFT_V_BACKWARD"] = createFFT(numCopies, "v", false); ComputeProgram program = cc.compileProgram(cc.replaceStrings(CommonRpmdKernelSources::rpmd, replacements), defines); pileKernel = program->createKernel("applyPileThermostat"); stepKernel = program->createKernel("integrateStep"); velocitiesKernel = program->createKernel("advanceVelocities"); copyToContextKernel = program->createKernel("copyDataToContext"); copyFromContextKernel = program->createKernel("copyDataFromContext"); translateKernel = program->createKernel("applyCellTranslations"); // Create kernels for doing contractions. for (auto& g : groupsByCopies) { int copies = g.first; replacements.clear(); replacements["NUM_CONTRACTED_COPIES"] = cc.intToString(copies); replacements["POS_SCALE"] = cc.doubleToString(1.0/numCopies); replacements["FORCE_SCALE"] = cc.doubleToString(0x100000000/(double) copies); replacements["FFT_Q_FORWARD"] = createFFT(numCopies, "q", true); replacements["FFT_Q_BACKWARD"] = createFFT(copies, "q", false); replacements["FFT_F_FORWARD"] = createFFT(copies, "f", true); replacements["FFT_F_BACKWARD"] = createFFT(numCopies, "f", false); program = cc.compileProgram(cc.replaceStrings(CommonRpmdKernelSources::rpmdContraction, replacements), defines); positionContractionKernels[copies] = program->createKernel("contractPositions"); forceContractionKernels[copies] = program->createKernel("contractForces"); } } void CommonIntegrateRPMDStepKernel::initializeKernels(ContextImpl& context) { hasInitializedKernels = true; pileKernel->addArg(velocities); pileKernel->addArg(cc.getIntegrationUtilities().getRandom()); pileKernel->addArg(); pileKernel->addArg(); pileKernel->addArg(); pileKernel->addArg(); stepKernel->addArg(positions); stepKernel->addArg(velocities); stepKernel->addArg(forces); stepKernel->addArg(); stepKernel->addArg(); velocitiesKernel->addArg(velocities); velocitiesKernel->addArg(forces); velocitiesKernel->addArg(); translateKernel->addArg(positions); translateKernel->addArg(cc.getPosq()); translateKernel->addArg(cc.getAtomIndexArray()); translateKernel->addArg(); copyToContextKernel->addArg(velocities); copyToContextKernel->addArg(cc.getVelm()); copyToContextKernel->addArg(); copyToContextKernel->addArg(cc.getPosq()); copyToContextKernel->addArg(cc.getAtomIndexArray()); copyToContextKernel->addArg(); copyFromContextKernel->addArg(cc.getLongForceBuffer()); copyFromContextKernel->addArg(); copyFromContextKernel->addArg(cc.getVelm()); copyFromContextKernel->addArg(velocities); copyFromContextKernel->addArg(cc.getPosq()); copyFromContextKernel->addArg(); copyFromContextKernel->addArg(cc.getAtomIndexArray()); copyFromContextKernel->addArg(); for (auto& g : groupsByCopies) { int copies = g.first; positionContractionKernels[copies]->addArg(positions); positionContractionKernels[copies]->addArg(contractedPositions); forceContractionKernels[copies]->addArg(forces); forceContractionKernels[copies]->addArg(contractedForces); } } void CommonIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) { ContextSelector selector(cc); if (!hasInitializedKernels) initializeKernels(context); IntegrationUtilities& integration = cc.getIntegrationUtilities(); // Loop over copies and compute the force on each one. if (!forcesAreValid) computeForces(context); // Apply the PILE-L thermostat. bool useDoublePrecision = (cc.getUseDoublePrecision() || cc.getUseMixedPrecision()); double dt = integrator.getStepSize(); pileKernel->setArg(2, integration.prepareRandomNumbers(numParticles*numCopies)); if (useDoublePrecision) { pileKernel->setArg(3, dt); pileKernel->setArg(4, integrator.getTemperature()*BOLTZ); pileKernel->setArg(5, integrator.getFriction()); stepKernel->setArg(3, dt); stepKernel->setArg(4, integrator.getTemperature()*BOLTZ); velocitiesKernel->setArg(2, dt); } else { pileKernel->setArg(3, (float) dt); pileKernel->setArg(4, (float) (integrator.getTemperature()*BOLTZ)); pileKernel->setArg(5, (float) integrator.getFriction()); stepKernel->setArg(3, (float) dt); stepKernel->setArg(4, (float) (integrator.getTemperature()*BOLTZ)); velocitiesKernel->setArg(2, (float) dt); } if (integrator.getApplyThermostat()) pileKernel->execute(numParticles*numCopies, workgroupSize); // Update positions and velocities. stepKernel->execute(numParticles*numCopies, workgroupSize); // Calculate forces based on the updated positions. computeForces(context); // Update velocities. velocitiesKernel->execute(numParticles*numCopies, workgroupSize); // Apply the PILE-L thermostat again. if (integrator.getApplyThermostat()) { pileKernel->setArg(2, integration.prepareRandomNumbers(numParticles*numCopies)); pileKernel->execute(numParticles*numCopies, workgroupSize); } // Update the time and step count. cc.setTime(cc.getTime()+dt); cc.setStepCount(cc.getStepCount()+1); cc.reorderAtoms(); if (cc.getAtomsWereReordered() && cc.getNonbondedUtilities().getUsePeriodic()) { // Atoms may have been translated into a different periodic box, so apply // the same translation to all the beads. translateKernel->setArg(3, numCopies-1); translateKernel->execute(cc.getNumAtoms()); } } void CommonIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { // Compute forces from all groups that didn't have a specified contraction. copyToContextKernel->setArg(2, positions); copyFromContextKernel->setArg(1, forces); copyFromContextKernel->setArg(5, positions); for (int i = 0; i < numCopies; i++) { copyToContextKernel->setArg(5, i); copyToContextKernel->execute(cc.getNumAtoms()); context.computeVirtualSites(); Vec3 initialBox[3]; context.getPeriodicBoxVectors(initialBox[0], initialBox[1], initialBox[2]); context.updateContextState(); Vec3 finalBox[3]; context.getPeriodicBoxVectors(finalBox[0], finalBox[1], finalBox[2]); if (initialBox[0] != finalBox[0] || initialBox[1] != finalBox[1] || initialBox[2] != finalBox[2]) throw OpenMMException("Standard barostats cannot be used with RPMDIntegrator. Use RPMDMonteCarloBarostat instead."); context.calcForcesAndEnergy(true, false, groupsNotContracted); copyFromContextKernel->setArg(7, i); copyFromContextKernel->execute(cc.getNumAtoms()); } // Now loop over contractions and compute forces from them. if (groupsByCopies.size() > 0) { copyToContextKernel->setArg(2, contractedPositions); copyFromContextKernel->setArg(1, contractedForces); copyFromContextKernel->setArg(5, contractedPositions); for (auto& g : groupsByCopies) { int copies = g.first; int groupFlags = g.second; // Find the contracted positions. positionContractionKernels[copies]->execute(numParticles*numCopies, workgroupSize); // Compute forces. for (int i = 0; i < copies; i++) { copyToContextKernel->setArg(5, i); copyToContextKernel->execute(cc.getNumAtoms()); context.computeVirtualSites(); context.calcForcesAndEnergy(true, false, groupFlags); copyFromContextKernel->setArg(7, i); copyFromContextKernel->execute(cc.getNumAtoms()); } // Apply the forces to the original copies. forceContractionKernels[copies]->execute(numParticles*numCopies, workgroupSize); } } if (groupsByCopies.size() > 0) { // Ensure the Context contains the positions from the last copy, since we'll assume that later. copyToContextKernel->setArg(2, positions); copyToContextKernel->setArg(5, numCopies-1); copyToContextKernel->execute(cc.getNumAtoms()); } } double CommonIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context, const RPMDIntegrator& integrator) { return cc.getIntegrationUtilities().computeKineticEnergy(0); } void CommonIntegrateRPMDStepKernel::setPositions(int copy, const vector& pos) { if (!positions.isInitialized()) throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context"); if (pos.size() != numParticles) throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()"); // Adjust the positions based on the current cell offsets. const vector& order = cc.getAtomIndex(); Vec3 a, b, c; cc.getPeriodicBoxVectors(a, b, c); vector offsetPos(numParticles); for (int i = 0; i < numParticles; ++i) { mm_int4 offset = cc.getPosCellOffsets()[i]; offsetPos[order[i]] = pos[order[i]] + Vec3(offset.x*a[0], offset.y*b[1], offset.z*c[2]); } // Record the positions. ContextSelector selector(cc); if (cc.getUseDoublePrecision()) { vector posq(cc.getPaddedNumAtoms()); cc.getPosq().download(posq); for (int i = 0; i < numParticles; i++) posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posq[i].w); positions.uploadSubArray(&posq[0], copy*cc.getPaddedNumAtoms(), numParticles); } else if (cc.getUseMixedPrecision()) { vector posqf(cc.getPaddedNumAtoms()); cc.getPosq().download(posqf); vector posq(cc.getPaddedNumAtoms()); for (int i = 0; i < numParticles; i++) posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posqf[i].w); positions.uploadSubArray(&posq[0], copy*cc.getPaddedNumAtoms(), numParticles); } else { vector posq(cc.getPaddedNumAtoms()); cc.getPosq().download(posq); for (int i = 0; i < numParticles; i++) posq[i] = mm_float4((float) offsetPos[i][0], (float) offsetPos[i][1], (float) offsetPos[i][2], posq[i].w); positions.uploadSubArray(&posq[0], copy*cc.getPaddedNumAtoms(), numParticles); } } void CommonIntegrateRPMDStepKernel::setVelocities(int copy, const vector& vel) { if (!velocities.isInitialized()) throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context"); if (vel.size() != numParticles) throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()"); ContextSelector selector(cc); if (cc.getUseDoublePrecision() || cc.getUseMixedPrecision()) { vector velm(cc.getPaddedNumAtoms()); cc.getVelm().download(velm); for (int i = 0; i < numParticles; i++) velm[i] = mm_double4(vel[i][0], vel[i][1], vel[i][2], velm[i].w); velocities.uploadSubArray(&velm[0], copy*cc.getPaddedNumAtoms(), numParticles); } else { vector velm(cc.getPaddedNumAtoms()); cc.getVelm().download(velm); for (int i = 0; i < numParticles; i++) velm[i] = mm_float4((float) vel[i][0], (float) vel[i][1], (float) vel[i][2], velm[i].w); velocities.uploadSubArray(&velm[0], copy*cc.getPaddedNumAtoms(), numParticles); } } void CommonIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) { ContextSelector selector(cc); if (!hasInitializedKernels) initializeKernels(context); copyToContextKernel->setArg(2, positions); copyToContextKernel->setArg(5, copy); copyToContextKernel->execute(cc.getNumAtoms()); } string CommonIntegrateRPMDStepKernel::createFFT(int size, const string& variable, bool forward) { stringstream source; int stage = 0; int L = size; int m = 1; string sign = (forward ? "1.0f" : "-1.0f"); string multReal = (forward ? "multiplyComplexRealPart" : "multiplyComplexRealPartConj"); string multImag = (forward ? "multiplyComplexImagPart" : "multiplyComplexImagPartConj"); source<<"{\n"; source<<"LOCAL_ARG mixed3* real0 = "< 1) { int input = stage%2; int output = 1-input; int radix; if (L%5 == 0) radix = 5; else if (L%4 == 0) radix = 4; else if (L%3 == 0) radix = 3; else if (L%2 == 0) radix = 2; else throw OpenMMException("Illegal size for FFT: "+cc.intToString(size)); source<<"{\n"; L = L/radix; source<<"// Pass "<<(stage+1)<<" (radix "<