/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit. * * See https://openmm.org/development. * * * * Portions copyright (c) 2008-2025 Stanford University and the Authors. * * Authors: Peter Eastman, Evan Pretti * * Contributors: * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * * by the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU Lesser General Public License for more details. * * * * You should have received a copy of the GNU Lesser General Public License * * along with this program. If not, see . * * -------------------------------------------------------------------------- */ #include "openmm/Context.h" #include "openmm/internal/ConstantPotentialForceImpl.h" #include "openmm/common/BondedUtilities.h" #include "openmm/common/CommonCalcConstantPotentialForce.h" #include "openmm/common/CommonKernelUtilities.h" #include "openmm/common/ContextSelector.h" #include "openmm/common/NonbondedUtilities.h" #include "CommonKernelSources.h" #include "SimTKOpenMMRealType.h" #include #include #include #include #include #include "tnt_array2d.h" #include "jama_cholesky.h" using namespace OpenMM; using namespace std; class CommonCalcConstantPotentialForceKernel::ForceInfo : public ComputeForceInfo { public: ForceInfo(const ConstantPotentialForce& force, const vector& sysElec, const vector& electrodeParams) : force(force), sysElec(sysElec), electrodeParams(electrodeParams) { } bool areParticlesIdentical(int particle1, int particle2) { int elec1 = sysElec[particle1]; int elec2 = sysElec[particle2]; if (elec1 == -1 && elec2 == -1) { // Both particles are non-electrode; check their fixed charges. double charge1, charge2; force.getParticleParameters(particle1, charge1); force.getParticleParameters(particle2, charge2); return (charge1 == charge2); } if (elec1 == -1 || elec2 == -1) { // One particle is non-electrode (but not both, handled above). return false; } // Both particles are electrode particles. mm_double4 electrodeParams1 = electrodeParams[elec1]; mm_double4 electrodeParams2 = electrodeParams[elec2]; return electrodeParams1.x == electrodeParams2.x && electrodeParams1.y == electrodeParams2.y && electrodeParams1.z == electrodeParams2.z; } int getNumParticleGroups() { return force.getNumExceptions(); } void getParticlesInGroup(int index, vector& particles) { int particle1, particle2; double chargeProd; force.getExceptionParameters(index, particle1, particle2, chargeProd); particles.resize(2); particles[0] = particle1; particles[1] = particle2; } bool areGroupsIdentical(int group1, int group2) { int particle1, particle2; double chargeProd1, chargeProd2; force.getExceptionParameters(group1, particle1, particle2, chargeProd1); force.getExceptionParameters(group2, particle1, particle2, chargeProd2); return (chargeProd1 == chargeProd2); } private: const ConstantPotentialForce& force; const vector& sysElec; const vector& electrodeParams; }; class CommonCalcConstantPotentialForceKernel::ReorderListener : public ComputeContext::ReorderListener { public: ReorderListener(ComputeContext& cc, int numElectrodeParticles, int paddedProblemSize, const vector& sysToElec, const vector& elecToSys) : cc(cc), numElectrodeParticles(numElectrodeParticles), paddedProblemSize(paddedProblemSize), sysToElec(sysToElec), elecToSys(elecToSys) { numParticles = cc.getNumAtoms(); lastOrder.assign(cc.getAtomIndex().begin(), cc.getAtomIndex().end()); } void addChargeArray(ComputeArray& chargeArray) { chargeArrays.push_back(&chargeArray); } void execute() { // Reorder guess charges. if (chargeArrays.empty()) { return; } const vector& order = cc.getAtomIndex(); for (int index = 0; index < chargeArrays.size(); index++) { ComputeArray* chargeArray = chargeArrays[index]; vector charges(paddedProblemSize), swap(numElectrodeParticles); chargeArray->download(charges, true); for (int ii = 0; ii < numElectrodeParticles; ii++) { swap[sysToElec[lastOrder[elecToSys[ii]]]] = charges[ii]; } for (int ii = 0; ii < numElectrodeParticles; ii++) { charges[ii] = swap[sysToElec[order[elecToSys[ii]]]]; } chargeArray->upload(charges, true); } lastOrder.assign(order.begin(), order.end()); } private: ComputeContext& cc; int numParticles, numElectrodeParticles, paddedProblemSize; const vector& sysToElec; const vector& elecToSys; vector lastOrder; vector chargeArrays; }; class CommonCalcConstantPotentialForceKernel::InvalidatePostComputation : public ComputeContext::ForcePostComputation { public: InvalidatePostComputation(ComputeContext& cc, CommonConstantPotentialSolver* solver) : cc(cc), solver(solver) { } double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) { if(!cc.getForcesValid()) { solver->discardSavedSolution(); } return 0.0; } private: ComputeContext& cc; CommonConstantPotentialSolver* solver; }; CommonConstantPotentialSolver::CommonConstantPotentialSolver(ComputeContext& cc, int numParticles, int numElectrodeParticles, int paddedProblemSize) : numParticles(numParticles), numElectrodeParticles(numElectrodeParticles), paddedProblemSize(paddedProblemSize), valid(false), hasSavedSolution(false) { savedPositions.initialize(cc, cc.getPosq().getSize(), cc.getPosq().getElementSize(), "savedPositions"); savedCharges.initialize(cc, paddedProblemSize, cc.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "savedCharges"); checkSavedPositionsKernelResult.initialize(cc, 1, "checkSavedPositionsKernelResult"); } CommonConstantPotentialSolver::~CommonConstantPotentialSolver() { } void CommonConstantPotentialSolver::compileKernels(CommonCalcConstantPotentialForceKernel& kernel) { map defines; defines["NUM_PARTICLES"] = kernel.cc.intToString(numParticles); ComputeProgram program = kernel.cc.compileProgram(CommonKernelSources::constantPotentialSolver, defines); checkSavedPositionsKernel = program->createKernel("checkSavedPositions"); checkSavedPositionsKernel->addArg(kernel.cc.getPosq()); checkSavedPositionsKernel->addArg(savedPositions); checkSavedPositionsKernel->addArg(checkSavedPositionsKernelResult); } void CommonConstantPotentialSolver::invalidate() { valid = false; hasSavedSolution = false; } void CommonConstantPotentialSolver::discardSavedSolution() { hasSavedSolution = false; } void CommonConstantPotentialSolver::solve(CommonCalcConstantPotentialForceKernel& kernel) { // If box vectors or positions have not changed, and there is a solution // already computed, we can simply reload it instead of solving again. if (hasSavedSolution) { if (savedBoxVectors[0] != kernel.boxVectors[0] || savedBoxVectors[1] != kernel.boxVectors[1] || savedBoxVectors[2] != kernel.boxVectors[2]) { hasSavedSolution = false; } } if (hasSavedSolution) { kernel.cc.clearBuffer(checkSavedPositionsKernelResult); checkSavedPositionsKernel->execute(numParticles); int result; checkSavedPositionsKernelResult.download(&result); if (result) { hasSavedSolution = false; } } if (hasSavedSolution) { savedCharges.copyTo(kernel.electrodeCharges); kernel.mustUpdateElectrodeCharges = true; return; } solveImpl(kernel); hasSavedSolution = true; savedBoxVectors[0] = kernel.boxVectors[0]; savedBoxVectors[1] = kernel.boxVectors[1]; savedBoxVectors[2] = kernel.boxVectors[2]; kernel.cc.getPosq().copyTo(savedPositions); kernel.electrodeCharges.copyTo(savedCharges); } void CommonConstantPotentialSolver::getGuessChargeArrays(vector& arrays) { arrays.clear(); } CommonConstantPotentialMatrixSolver::CommonConstantPotentialMatrixSolver(ComputeContext& cc, int numParticles, int numElectrodeParticles, int paddedProblemSize) : CommonConstantPotentialSolver(cc, numParticles, numElectrodeParticles, paddedProblemSize) { int elementSize = cc.getUseDoublePrecision() ? sizeof(double) : sizeof(float); electrodePosData.initialize(cc, numElectrodeParticles, cc.getPosq().getElementSize(), "electrodePosData"); capacitance.initialize(cc, (size_t) paddedProblemSize * paddedProblemSize, elementSize, "capacitance"); constraintVector.initialize(cc, numElectrodeParticles, elementSize, "constraintVector"); checkSavedElectrodePositionsKernelResult.initialize(cc, 1, "checkSavedElectrodePositionsKernelResult"); } void CommonConstantPotentialMatrixSolver::compileKernels(CommonCalcConstantPotentialForceKernel& kernel) { CommonConstantPotentialSolver::compileKernels(kernel); map defines; defines["NUM_PARTICLES"] = kernel.cc.intToString(numParticles); defines["NUM_ELECTRODE_PARTICLES"] = kernel.cc.intToString(numElectrodeParticles); defines["THREAD_BLOCK_SIZE"] = kernel.cc.intToString(kernel.maxThreadBlockSize); defines["CHUNK_SIZE"] = kernel.cc.intToString(kernel.chunkSize); defines["CHUNK_COUNT"] = kernel.cc.intToString(kernel.chunkCount); defines["PADDED_PROBLEM_SIZE"] = kernel.cc.intToString(kernel.paddedProblemSize); if (kernel.useChargeConstraint) { defines["USE_CHARGE_CONSTRAINT"] = "1"; } ComputeProgram program = kernel.cc.compileProgram(CommonKernelSources::constantPotentialMatrixSolver, defines); checkSavedElectrodePositionsKernel = program->createKernel("checkSavedElectrodePositions"); checkSavedElectrodePositionsKernel->addArg(kernel.cc.getPosq()); checkSavedElectrodePositionsKernel->addArg(electrodePosData); checkSavedElectrodePositionsKernel->addArg(kernel.elecToSys); checkSavedElectrodePositionsKernel->addArg(checkSavedElectrodePositionsKernelResult); saveElectrodePositionsKernel = program->createKernel("saveElectrodePositions"); saveElectrodePositionsKernel->addArg(kernel.cc.getPosq()); saveElectrodePositionsKernel->addArg(electrodePosData); saveElectrodePositionsKernel->addArg(kernel.elecToSys); solveKernel = program->createKernel("solve"); solveKernel->addArg(kernel.electrodeCharges); solveKernel->addArg(kernel.chargeDerivatives); solveKernel->addArg(capacitance); if (kernel.useChargeConstraint) { solveKernel->addArg(constraintVector); solveKernel->addArg(); // chargeTarget } } void CommonConstantPotentialMatrixSolver::solveImpl(CommonCalcConstantPotentialForceKernel& kernel) { ensureValid(kernel); // Zero electrode charges and get derivatives at zero charge. kernel.cc.clearBuffer(kernel.electrodeCharges); kernel.mustUpdateElectrodeCharges = true; kernel.doDerivatives(); // Solve for electrode charges directly using capacitance matrix and // calculated derivatives. if (kernel.useChargeConstraint) { if (kernel.cc.getUseDoublePrecision()) { solveKernel->setArg(4, kernel.chargeTarget); } else { solveKernel->setArg(4, (float) kernel.chargeTarget); } } solveKernel->execute(kernel.maxThreadBlockSize, kernel.maxThreadBlockSize); kernel.mustUpdateElectrodeCharges = true; } void CommonConstantPotentialMatrixSolver::ensureValid(CommonCalcConstantPotentialForceKernel& kernel) { // Initializes or updates the precomputed capacitance matrix if this is its // first use or electrode parameters have changed since its initialization. // Check for changes to box vectors or electrode positions that might // invalidate a matrix that is currently marked valid. if (valid) { if (boxVectors[0] != kernel.boxVectors[0] || boxVectors[1] != kernel.boxVectors[1] || boxVectors[2] != kernel.boxVectors[2]) { valid = false; } } if (valid) { kernel.cc.clearBuffer(checkSavedElectrodePositionsKernelResult); checkSavedElectrodePositionsKernel->execute(numElectrodeParticles); int result; checkSavedElectrodePositionsKernelResult.download(&result); if (result) { valid = false; } } if (valid) { return; } // We must have a valid neighbor list before populating the matrix. kernel.ensureValidNeighborList(); // Store the current box vectors and electrode positions before updating the // capacitance matrix. valid = true; boxVectors[0] = kernel.boxVectors[0]; boxVectors[1] = kernel.boxVectors[1]; boxVectors[2] = kernel.boxVectors[2]; saveElectrodePositionsKernel->execute(numElectrodeParticles); TNT::Array2D A(paddedProblemSize, paddedProblemSize); vector dUdQ0(numElectrodeParticles); vector dUdQ(numElectrodeParticles); // Get derivatives when all electrode charges are zeroed. kernel.cc.clearBuffer(kernel.electrodeCharges); kernel.mustUpdateElectrodeCharges = true; kernel.doDerivatives(); kernel.chargeDerivatives.download(dUdQ0, true); vector electrodeCharges(paddedProblemSize); for (int ii = 0; ii < numElectrodeParticles; ii++) { // Get derivatives when one electrode charge is set. electrodeCharges[ii] = 1.0; kernel.electrodeCharges.upload(electrodeCharges, true); kernel.mustUpdateElectrodeCharges = true; kernel.doDerivatives(); kernel.chargeDerivatives.download(dUdQ, true); electrodeCharges[ii] = 0.0; // Set matrix elements, subtracting zero charge derivatives so that the // matrix will end up being the (charge-independent) Hessian. for (int jj = 0; jj < ii; jj++) { A[ii][jj] = A[jj][ii] = dUdQ[jj] - dUdQ0[jj]; } A[ii][ii] = dUdQ[ii] - dUdQ0[ii]; for (int jj = numElectrodeParticles; jj < paddedProblemSize; jj++) { A[ii][jj] = A[jj][ii] = 0.0; } } for (int ii = numElectrodeParticles; ii < paddedProblemSize; ii++) { for (int jj = numElectrodeParticles; jj < paddedProblemSize; jj++) { A[ii][jj] = ii == jj ? 1.0 : 0.0; } } // Compute Cholesky decomposition representation of the inverse. JAMA::Cholesky choleskyInverse(A); if (!choleskyInverse.is_spd()) { throw OpenMMException("Electrode matrix not positive definite"); } // Load the results of the Cholesky decomposition into a buffer. TNT::Array2D choleskyLower = choleskyInverse.getL(); vector hostCapacitance(capacitance.getSize()); size_t index = 0; for (int ii = 0; ii < paddedProblemSize; ii++) { double scale = 1.0 / choleskyLower[ii][ii]; // Load the lower triangle. for (int jj = 0; jj < ii; jj++) { hostCapacitance[index++] = choleskyLower[ii][jj] * scale; } // Load the reciprocal of the diagonal element. hostCapacitance[index++] = scale; // Load the transpose of the lower triangle into the upper triangle. for (int jj = ii + 1; jj < paddedProblemSize; jj++) { hostCapacitance[index++] = choleskyLower[jj][ii] * scale; } } capacitance.upload(hostCapacitance, true); // Precompute the appropriate scaling vector to enforce constant total // charge if requested. The vector is parallel to one obtained by solving // Aq = b for all q_i = 1 (ensuring the constrained charges will actually be // the correct constrained minimum of the quadratic form for the energy), // and is scaled so that adding it to a vector of charges increases the // total charge by 1 (making it easy to calculate the necessary offset). if (kernel.useChargeConstraint) { TNT::Array1D solution = choleskyInverse.solve(TNT::Array1D(paddedProblemSize, 1.0)); vector hostConstraintVector(static_cast(solution), static_cast(solution) + numElectrodeParticles); double constraintScaleInv = 0.0; for (int ii = 0; ii < numElectrodeParticles; ii++) { constraintScaleInv += hostConstraintVector[ii]; } double constraintScale = 1.0 / constraintScaleInv; for (int ii = 0; ii < numElectrodeParticles; ii++) { hostConstraintVector[ii] *= constraintScale; } constraintVector.upload(hostConstraintVector, true); } } CommonConstantPotentialCGSolver::CommonConstantPotentialCGSolver(ComputeContext& cc, int numParticles, int numElectrodeParticles, int paddedProblemSize, bool precond) : CommonConstantPotentialSolver(cc, numParticles, numElectrodeParticles, paddedProblemSize), precondRequested(precond) { int elementSize = cc.getUseDoublePrecision() ? sizeof(double) : sizeof(float); threadBlockCount = cc.getNumThreadBlocks(); threadBlockSize = min(cc.getMaxThreadBlockSize(), cc.computeThreadBlockSize(max(5 * elementSize, 4 * elementSize + (int) sizeof(double)))); q.initialize(cc, paddedProblemSize, elementSize, "q"); grad.initialize(cc, numElectrodeParticles, elementSize, "grad"); projGrad.initialize(cc, numElectrodeParticles, elementSize, "projGrad"); precGrad.initialize(cc, numElectrodeParticles, elementSize, "precGrad"); qStep.initialize(cc, paddedProblemSize, elementSize, "qStep"); gradStep.initialize(cc, numElectrodeParticles, elementSize, "gradStep"); grad0.initialize(cc, numElectrodeParticles, elementSize, "grad0"); qLast.initialize(cc, paddedProblemSize, elementSize, "qLast"); blockSums1.initialize(cc, threadBlockCount, 5 * elementSize, "blockSums1"); blockSums2.initialize(cc, 1, 3 * elementSize, "blockSums2"); convergedResult.initialize(cc, 1, sizeof(int), "convergedResult"); if (precondRequested) { // If double precision is supported, this will hold double values; // otherwise, it will hold mm_float2 values. precondVector.initialize(cc, numElectrodeParticles, sizeof(double), "precondVector"); } convergedDownloadStartEvent = cc.createEvent(); convergedDownloadFinishEvent = cc.createEvent(); convergedDownloadQueue = cc.createQueue(); cc.clearBuffer(qLast); } void CommonConstantPotentialCGSolver::compileKernels(CommonCalcConstantPotentialForceKernel& kernel) { CommonConstantPotentialSolver::compileKernels(kernel); map defines; defines["ERROR_TARGET"] = kernel.cc.doubleToString(kernel.cgErrorTol * kernel.cgErrorTol * numElectrodeParticles); defines["NUM_ELECTRODE_PARTICLES"] = kernel.cc.intToString(numElectrodeParticles); defines["THREAD_BLOCK_SIZE"] = kernel.cc.intToString(threadBlockSize); defines["THREAD_BLOCK_COUNT"] = kernel.cc.intToString(threadBlockCount); if (kernel.useChargeConstraint) { defines["USE_CHARGE_CONSTRAINT"] = "1"; } if (precondRequested) { defines["PRECOND_REQUESTED"] = "1"; } ComputeProgram program = kernel.cc.compileProgram(CommonKernelSources::constantPotentialCGSolver, defines); solveInitializeStep1Kernel = program->createKernel("solveInitializeStep1"); solveInitializeStep1Kernel->addArg(kernel.electrodeCharges); solveInitializeStep1Kernel->addArg(qLast); if (kernel.useChargeConstraint) { solveInitializeStep1Kernel->addArg(); // chargeTarget } solveInitializeStep2Kernel = program->createKernel("solveInitializeStep2"); solveInitializeStep2Kernel->addArg(kernel.chargeDerivatives); solveInitializeStep2Kernel->addArg(grad); solveInitializeStep2Kernel->addArg(projGrad); solveInitializeStep2Kernel->addArg(convergedResult); solveInitializeStep3Kernel = program->createKernel("solveInitializeStep3"); solveInitializeStep3Kernel->addArg(kernel.electrodeCharges); solveInitializeStep3Kernel->addArg(kernel.chargeDerivatives); solveInitializeStep3Kernel->addArg(grad); solveInitializeStep3Kernel->addArg(projGrad); solveInitializeStep3Kernel->addArg(precGrad); solveInitializeStep3Kernel->addArg(qStep); solveInitializeStep3Kernel->addArg(grad0); if (precondRequested) { solveInitializeStep3Kernel->addArg(precondVector); solveInitializeStep3Kernel->addArg(); // precondActivated } solveLoopStep1Kernel = program->createKernel("solveLoopStep1"); solveLoopStep1Kernel->addArg(kernel.chargeDerivatives); solveLoopStep1Kernel->addArg(q); solveLoopStep1Kernel->addArg(grad); solveLoopStep1Kernel->addArg(qStep); solveLoopStep1Kernel->addArg(gradStep); solveLoopStep1Kernel->addArg(grad0); solveLoopStep1Kernel->addArg(blockSums1); solveLoopStep2Kernel = program->createKernel("solveLoopStep2"); solveLoopStep2Kernel->addArg(blockSums1); solveLoopStep2Kernel->addArg(convergedResult); solveLoopStep3Kernel = program->createKernel("solveLoopStep3"); solveLoopStep3Kernel->addArg(q); solveLoopStep3Kernel->addArg(grad); solveLoopStep3Kernel->addArg(qStep); solveLoopStep3Kernel->addArg(gradStep); solveLoopStep3Kernel->addArg(blockSums1); solveLoopStep3Kernel->addArg(convergedResult); if (kernel.useChargeConstraint) { solveLoopStep3Kernel->addArg(); // chargeTarget } solveLoopStep4Kernel = program->createKernel("solveLoopStep4"); solveLoopStep4Kernel->addArg(grad); solveLoopStep4Kernel->addArg(projGrad); solveLoopStep4Kernel->addArg(precGrad); solveLoopStep4Kernel->addArg(gradStep); solveLoopStep4Kernel->addArg(blockSums2); solveLoopStep4Kernel->addArg(convergedResult); if (precondRequested) { solveLoopStep4Kernel->addArg(precondVector); solveLoopStep4Kernel->addArg(); // precondActivated } solveLoopStep5Kernel = program->createKernel("solveLoopStep5"); solveLoopStep5Kernel->addArg(kernel.electrodeCharges); solveLoopStep5Kernel->addArg(precGrad); solveLoopStep5Kernel->addArg(qStep); solveLoopStep5Kernel->addArg(blockSums1); solveLoopStep5Kernel->addArg(blockSums2); solveLoopStep5Kernel->addArg(convergedResult); } void CommonConstantPotentialCGSolver::solveImpl(CommonCalcConstantPotentialForceKernel& kernel) { ensureValid(kernel); if (kernel.useChargeConstraint) { if (kernel.cc.getUseDoublePrecision()) { solveInitializeStep1Kernel->setArg(2, kernel.chargeTarget); solveLoopStep3Kernel->setArg(6, kernel.chargeTarget); } else { solveInitializeStep1Kernel->setArg(2, (float) kernel.chargeTarget); solveLoopStep3Kernel->setArg(6, (float) kernel.chargeTarget); } } if (precondRequested) { solveInitializeStep3Kernel->setArg(8, (int) precondActivated); solveLoopStep4Kernel->setArg(7, (int) precondActivated); } int converged; // Evaluate the initial gradient Aq - b. solveInitializeStep1Kernel->execute(threadBlockSize, threadBlockSize); kernel.mustUpdateElectrodeCharges = true; kernel.doDerivatives(); // Check for convergence at the initial guess charges. solveInitializeStep2Kernel->execute(threadBlockSize, threadBlockSize); convergedResult.download(&converged); if (converged) { return; } // Save the current charges, then evaluate the gradient with zero charges // (-b) so that we can later compute Ap as (Ap - b) - (-b). kernel.electrodeCharges.copyTo(q); kernel.cc.clearBuffer(kernel.electrodeCharges); kernel.mustUpdateElectrodeCharges = true; kernel.doDerivatives(); // Prepare for conjugate gradient iterations. solveInitializeStep3Kernel->execute(threadBlockSize, threadBlockSize); kernel.cc.clearBuffer(blockSums1); // Perform conjugate gradient iterations. int* convergedPinned = (int*) kernel.cc.getPinnedBuffer(); for (int iter = 0; iter <= numElectrodeParticles; iter++) { // Evaluate the matrix-vector product A qStep. kernel.mustUpdateElectrodeCharges = true; if (iter > 0) { // This is a subsequent iteration; check for convergence. We can // start clearing buffers for the derivatives while we wait for the // flag to finish being copied from the device back to the host. kernel.initDoDerivatives(); kernel.initPmeExecute(); convergedDownloadFinishEvent->wait(); converged = *convergedPinned; if (converged) { break; } kernel.doDerivatives(false); } else { // This is the first iteration; evaluate derivatives normally. kernel.doDerivatives(); } solveLoopStep1Kernel->execute(numElectrodeParticles, threadBlockSize); solveLoopStep2Kernel->execute(threadBlockSize, threadBlockSize); solveLoopStep3Kernel->execute(numElectrodeParticles, threadBlockSize); // Periodically recompute the gradient vector instead of updating it to // reduce the accumulation of roundoff error. if (iter != 0 && iter % 32 == 0) { kernel.mustUpdateElectrodeCharges = true; kernel.doDerivatives(); kernel.chargeDerivatives.copyTo(grad); } solveLoopStep4Kernel->execute(threadBlockSize, threadBlockSize); convergedDownloadStartEvent->enqueue(); kernel.cc.setCurrentQueue(convergedDownloadQueue); convergedDownloadStartEvent->queueWait(convergedDownloadQueue); convergedResult.download(convergedPinned, false); convergedDownloadFinishEvent->enqueue(); kernel.cc.restoreDefaultQueue(); solveLoopStep5Kernel->execute(numElectrodeParticles, threadBlockSize); if (iter == numElectrodeParticles) { // No more iterations are allowed: download the convergence flag. convergedDownloadFinishEvent->wait(); converged = *convergedPinned; } } if (!converged) { throw OpenMMException("Constant potential conjugate gradient iterations not converged"); } // Store the final charges. q.copyTo(kernel.electrodeCharges); kernel.mustUpdateElectrodeCharges = true; } void CommonConstantPotentialCGSolver::getGuessChargeArrays(vector& arrays) { CommonConstantPotentialSolver::getGuessChargeArrays(arrays); arrays.push_back(&qLast); } void CommonConstantPotentialCGSolver::ensureValid(CommonCalcConstantPotentialForceKernel& kernel) { // Initializes or updates information for a preconditioner for the conjugate // gradient method if this is its first use or electrode parameters have // changed since its initialization. // No action is required if the box vectors have not changed. if (valid && boxVectors[0] == kernel.boxVectors[0] && boxVectors[1] == kernel.boxVectors[1] && boxVectors[2] == kernel.boxVectors[2]) { return; } valid = true; boxVectors[0] = kernel.boxVectors[0]; boxVectors[1] = kernel.boxVectors[1]; boxVectors[2] = kernel.boxVectors[2]; precondActivated = false; if (precondRequested) { // If electrode self-energy contributions differ between electrodes, a // preconditioner may help convergence; otherwise, it provides no // benefit and may slow convergence due to roundoff error. for (int ie = 1; ie < kernel.numElectrodes; ie++) { // Note hostElectrodeParams[1].w has the scale for electrode 0, etc. if (kernel.hostElectrodeParams[ie + 1].w != kernel.hostElectrodeParams[ie].w) { precondActivated = true; break; } } } float pmeTerm = 0.0f; if (precondActivated) { // Save all positions and charges. vector posqSave(kernel.cc.getPaddedNumAtoms()); vector qSave; kernel.cc.getPosq().download(posqSave, true); if (!kernel.usePosqCharges) { qSave.resize(kernel.cc.getPaddedNumAtoms()); kernel.charges.download(qSave, true); } vector posqCopy(posqSave); vector qCopy(qSave); // Zero all charges. if (kernel.usePosqCharges) { for (int i = 0; i < numParticles; i++) { posqCopy[i].w = 0.0; } } else { for (int i = 0; i < numParticles; i++) { qCopy[i] = 0.0; } } // Place a unit charge at the origin. int i0 = kernel.hostElecToSys[0]; posqCopy[i0] = mm_double4(0.0, 0.0, 0.0, 1.0); if (!kernel.usePosqCharges) { qCopy[i0] = 1.0; } kernel.cc.getPosq().upload(posqCopy, true); if (!kernel.usePosqCharges) { kernel.charges.upload(qCopy, true); } // Perform a reference PME calculation with a single charge at the // origin to find the constant offset on the preconditioner diagonal due // to the PME calculation. This will actually vary slightly with // position but only due to finite accuracy of the PME splines, so it is // fine to assume it will be constant for the preconditioner. kernel.cc.clearBuffer(kernel.chargeDerivativesFixed); kernel.pmeShouldSort = true; kernel.pmeExecute(false, false, true); kernel.pmeShouldSort = true; vector derivatives(numElectrodeParticles); kernel.chargeDerivativesFixed.download(derivatives); double pmeTerm = derivatives[0] / (double) 0x100000000; // Restore all positions and charges. kernel.cc.getPosq().upload(posqSave, true); if (!kernel.usePosqCharges) { kernel.charges.upload(qSave, true); } // The diagonal has a contribution from reciprocal space, Ewald // self-interaction, Ewald neutralizing plasma, Gaussian // self-interaction, and Thomas-Fermi contributions. double plasmaScale = CommonCalcConstantPotentialForceKernel::PLASMA_SCALE / (boxVectors[0][0] * boxVectors[1][1] * boxVectors[2][2] * kernel.ewaldAlpha * kernel.ewaldAlpha); vector hostPrecondVector(numElectrodeParticles); double precondScaleInv = 0.0; for (int ii = 0; ii < numElectrodeParticles; ii++) { hostPrecondVector[ii] = 1.0 / (2.0 * (kernel.hostElectrodeParams[kernel.hostElecElec[ii] + 1].w - plasmaScale) + pmeTerm); precondScaleInv += hostPrecondVector[ii]; } double precondScale = 1.0 / precondScaleInv; for (int ii = 0; ii < numElectrodeParticles; ii++) { hostPrecondVector[ii] *= precondScale; } if (kernel.cc.getSupportsDoublePrecision()) { precondVector.upload(hostPrecondVector); } else { vector hostPrecondVectorSplit(numElectrodeParticles); for (int ii = 0; ii < numElectrodeParticles; ii++) { float const x = (float) hostPrecondVector[ii]; hostPrecondVectorSplit[ii].x = x; hostPrecondVectorSplit[ii].y = (float) (hostPrecondVector[ii] - (double) x); } precondVector.upload(hostPrecondVectorSplit); } } } const double CommonCalcConstantPotentialForceKernel::SELF_ALPHA_SCALE = ONE_4PI_EPS0 / sqrt(PI_M); const double CommonCalcConstantPotentialForceKernel::SELF_ETA_SCALE = ONE_4PI_EPS0 / sqrt(2.0 * PI_M); const double CommonCalcConstantPotentialForceKernel::SELF_TF_SCALE = 1.0 / (2.0 * EPSILON0); const double CommonCalcConstantPotentialForceKernel::PLASMA_SCALE = 1.0 / (8.0 * EPSILON0); CommonCalcConstantPotentialForceKernel::~CommonCalcConstantPotentialForceKernel() { ContextSelector selector(cc); if (solver != NULL) { delete solver; } } void CommonCalcConstantPotentialForceKernel::commonInitialize(const System& system, const ConstantPotentialForce& force, bool deviceIsCpu, bool useFixedPointChargeSpreading) { ContextSelector selector(cc); if (cc.getNumContexts() > 1) { throw OpenMMException("ConstantPotentialForce does not support using multiple devices"); } forceGroup = force.getForceGroup(); maxThreadBlockSize = cc.getMaxThreadBlockSize(); int elementSize = cc.getUseDoublePrecision() ? sizeof(double) : sizeof(float); this->deviceIsCpu = deviceIsCpu; this->useFixedPointChargeSpreading = useFixedPointChargeSpreading; this->usePosqCharges = cc.requestPosqCharges(); int forceIndex; for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex) { } string prefix = "constantPotential" + cc.intToString(forceIndex) + "_"; // Get particle parameters. numParticles = force.getNumParticles(); setCharges.resize(numParticles); for (int i = 0; i < numParticles; i++) { force.getParticleParameters(i, setCharges[i]); } // Get exceptions and identify "1-4" exceptions (those that don't zero the // charge product). for (int i = 0; i < force.getNumExceptions(); i++) { int particle1, particle2; double chargeProd; force.getExceptionParameters(i, particle1, particle2, chargeProd); exclusions.push_back(pair(particle1, particle2)); if (chargeProd != 0.0) { exceptionIndex[i] = exceptions.size(); exceptions.push_back(i); } } // Get a list of all exclusions per particle, including exclusions for // self-interaction. vector > exclusionList(numParticles); for (int i = 0; i < numParticles; i++) { exclusionList[i].push_back(i); } for (auto exclusion : exclusions) { exclusionList[exclusion.first].push_back(exclusion.second); exclusionList[exclusion.second].push_back(exclusion.first); } // Get nonbonded parameters. cutoff = force.getCutoffDistance(); ConstantPotentialForceImpl::calcPMEParameters(system, force, ewaldAlpha, gridSizeX, gridSizeY, gridSizeZ); gridSizeX = cc.findLegalFFTDimension(gridSizeX); gridSizeY = cc.findLegalFFTDimension(gridSizeY); gridSizeZ = cc.findLegalFFTDimension(gridSizeZ); // Get electrode parameters. sysToElec will be a map from system particle // indices to electrode particle indices (or -1 if the particle is not an // electrode particle), while elecToSys will be a map from electrode // particle indices to system particle indices. sysElec will be a map from // system particle indices to electrode indices (or -1 if the particle is // not an electrode particle), while elecElec will be a map from electrode // particle indices to electrode indices. Precompute and store electrode // parameters for electrodes [-1, numElectrodes - 1] as [0, numElectrodes] // in hostElectrodeParams for convenient lookup: // x: potential // y: gaussianWidth // z: thomasFermiScale // w: Precomputed self-energy scale numElectrodes = force.getNumElectrodes(); hostSysToElec.resize(cc.getPaddedNumAtoms(), -1); hostSysElec.resize(cc.getPaddedNumAtoms(), -1); hostElectrodeParams.resize(numElectrodes + 1); for (int ie = -1; ie < numElectrodes; ie++) { double potential = 0.0; double gaussianWidth = 0.0; double thomasFermiScale = 0.0; double selfScale = -SELF_ALPHA_SCALE * ewaldAlpha; if (ie != -1) { set electrodeParticles; force.getElectrodeParameters(ie, electrodeParticles, potential, gaussianWidth, thomasFermiScale); for (int i : electrodeParticles) { hostSysToElec[i] = hostElecToSys.size(); hostSysElec[i] = ie; hostElecToSys.push_back(i); hostElecElec.push_back(ie); } selfScale += SELF_ETA_SCALE / gaussianWidth + SELF_TF_SCALE * thomasFermiScale; } hostElectrodeParams[ie + 1] = mm_double4(potential, gaussianWidth, thomasFermiScale, selfScale); } numElectrodeParticles = hostElecToSys.size(); hasElectrodes = (numElectrodeParticles != 0); chunkSize = deviceIsCpu ? 1 : 32; chunkCount = (numElectrodeParticles + chunkSize - 1) / chunkSize; paddedProblemSize = chunkCount * chunkSize; // Clear charges on electrode particles. for (int ii = 0; ii < numElectrodeParticles; ii++) { setCharges[hostElecToSys[ii]] = 0.0; } // If using a charge constraint, set the charge target to be that on the // electrode particles (so that the overall charge is constrained correctly // if the non-electrolyte particles are non-neutral). useChargeConstraint = force.getUseChargeConstraint(); if (useChargeConstraint) { chargeTarget = force.getChargeConstraintTarget(); for (int i = 0; i < numParticles; i++) { chargeTarget -= setCharges[i]; } } // Save external field. force.getExternalField(externalField); // Set up PME. pmeSetup(); // Create bonded force for exclusion parameters. int numExclusions = force.getNumExceptions(); if (numExclusions > 0) { exclusionScales.initialize(cc, numExclusions, elementSize, "exclusionScales"); vector > hostExclusionAtoms(numExclusions, vector(2)); vector hostExclusionScales(numExclusions); for (int i = 0; i < numExclusions; i++) { hostExclusionAtoms[i][0] = exclusions[i].first; hostExclusionAtoms[i][1] = exclusions[i].second; hostExclusionScales[i] = ONE_4PI_EPS0 * setCharges[exclusions[i].first] * setCharges[exclusions[i].second]; } exclusionScales.upload(hostExclusionScales, true); map replacements; replacements["APPLY_PERIODIC"] = force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0"; replacements["EWALD_ALPHA"] = cc.doubleToString(ewaldAlpha); replacements["PARAMS"] = cc.getBondedUtilities().addArgument(exclusionScales, "real"); replacements["TWO_OVER_SQRT_PI"] = cc.doubleToString(2.0 / sqrt(M_PI)); cc.getBondedUtilities().addInteraction(hostExclusionAtoms, cc.replaceStrings(CommonKernelSources::constantPotentialExclusions, replacements), forceGroup); } // Upload 1-4 exception parameters and create bonded force. int numExceptions = exceptions.size(); if (numExceptions > 0) { exceptionScales.initialize(cc, numExceptions, elementSize, "exceptionScales"); vector > hostExceptionAtoms(numExceptions, vector(2)); vector hostExceptionScales(numExceptions); for (int i = 0; i < numExceptions; i++) { double chargeProd; force.getExceptionParameters(exceptions[i], hostExceptionAtoms[i][0], hostExceptionAtoms[i][1], chargeProd); hostExceptionScales[i] = ONE_4PI_EPS0 * chargeProd; } exceptionScales.upload(hostExceptionScales, true); map replacements; replacements["APPLY_PERIODIC"] = force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0"; replacements["PARAMS"] = cc.getBondedUtilities().addArgument(exceptionScales, "real"); cc.getBondedUtilities().addInteraction(hostExceptionAtoms, cc.replaceStrings(CommonKernelSources::constantPotentialExceptions, replacements), forceGroup); } // Upload parameters for electrodes. electrodeParams.initialize(cc, numElectrodes + 1, 4 * elementSize, "electrodeParams"); electrodeParams.upload(hostElectrodeParams, true); // Initialize lookup tables for constant potential on the device. sysToElec.initialize(cc, cc.getPaddedNumAtoms(), "sysToElec"); sysElec.initialize(cc, cc.getPaddedNumAtoms(), "sysElec"); sysToElec.upload(hostSysToElec); sysElec.upload(hostSysElec); if (hasElectrodes) { elecToSys.initialize(cc, numElectrodeParticles, "elecToSys"); elecElec.initialize(cc, numElectrodeParticles, "elecElec"); elecToSys.upload(hostElecToSys); elecElec.upload(hostElecElec); } // Initialize nonbonded force. map nonbondedReplacements; nonbondedReplacements["EWALD_ALPHA"] = cc.doubleToString(ewaldAlpha); nonbondedReplacements["ONE_4PI_EPS0"] = cc.doubleToString(ONE_4PI_EPS0); nonbondedReplacements["TWO_OVER_SQRT_PI"] = cc.doubleToString(2.0 / sqrt(M_PI)); if (usePosqCharges) { nonbondedReplacements["CHARGE1"] = "posq1.w"; nonbondedReplacements["CHARGE2"] = "posq2.w"; } else { nonbondedReplacements["CHARGE1"] = prefix + "charge1"; nonbondedReplacements["CHARGE2"] = prefix + "charge2"; cc.getNonbondedUtilities().addParameter(ComputeParameterInfo(charges, prefix + "charge", "real", 1)); } nonbondedReplacements["SYSELEC1"] = prefix + "sysElec1"; nonbondedReplacements["SYSELEC2"] = prefix + "sysElec2"; cc.getNonbondedUtilities().addParameter(ComputeParameterInfo(sysElec, prefix + "sysElec", "int", 1)); nonbondedReplacements["PARAMS"] = prefix + "params"; cc.getNonbondedUtilities().addArgument(ComputeParameterInfo(electrodeParams, prefix + "params", "real", 4)); cc.getNonbondedUtilities().addInteraction(true, true, true, force.getCutoffDistance(), exclusionList, cc.replaceStrings(CommonKernelSources::constantPotentialCoulombEnergyForces, nonbondedReplacements), force.getForceGroup(), true, false); // Initialize the constant potential solver. method = force.getConstantPotentialMethod(); cgErrorTol = force.getCGErrorTolerance(); if (method == ConstantPotentialForce::Matrix) { if (hasElectrodes) { solver = new CommonConstantPotentialMatrixSolver(cc, numParticles, numElectrodeParticles, paddedProblemSize); } } else if (method == ConstantPotentialForce::CG) { if (hasElectrodes) { solver = new CommonConstantPotentialCGSolver(cc, numParticles, numElectrodeParticles, paddedProblemSize, force.getUsePreconditioner()); } } else { throw OpenMMException("internal error: invalid constant potential method"); } // Upload fixed charges and initial guesses for electrode charges. hostNonElectrodeCharges = setCharges; hostNonElectrodeCharges.resize(cc.getPaddedNumAtoms()); nonElectrodeCharges.initialize(cc, cc.getPaddedNumAtoms(), elementSize, "nonElectrodeCharges"); nonElectrodeCharges.upload(hostNonElectrodeCharges, true); if (hasElectrodes) { hostElectrodeCharges.resize(paddedProblemSize); electrodeCharges.initialize(cc, paddedProblemSize, elementSize, "electrodeCharges"); electrodeCharges.upload(hostElectrodeCharges, true); chargeDerivatives.initialize(cc, numElectrodeParticles, elementSize, "chargeDerivatives"); chargeDerivativesFixed.initialize(cc, numElectrodeParticles, "chargeDerivativesFixed"); } // nonElectrodeCharges holds all fixed charges, and all electrode charges in // nonElectrodeCharges should be zeroed. electrodeCharges holds all current // electrode charges. charges holds charges to actually use for evaluation // if usePosqCharges is false; otherwise, those charges will be in posq. // The mustUpdate flags indicate that something has changed and charges or // posq must be updated before energy/force/derivative evaluation. charges.initialize(cc, cc.getPaddedNumAtoms(), elementSize, "charges"); mustUpdateNonElectrodeCharges = true; mustUpdateElectrodeCharges = hasElectrodes; totalChargeBuffer.initialize(cc, 1, elementSize, "totalChargeBuffer"); info = new ForceInfo(force, hostSysElec, hostElectrodeParams); cc.addForce(info); // Initialize cell offsets for finite field computation. posCellOffsets.initialize(cc, cc.getPaddedNumAtoms(), "posCellOffsets"); hostPosCellOffsets = cc.getPosCellOffsets(); posCellOffsets.upload(hostPosCellOffsets); // Create a reorder listener to swap electrode guess charges. ReorderListener* listener = new ReorderListener(cc, numElectrodeParticles, paddedProblemSize, hostSysToElec, hostElecToSys); if (hasElectrodes) { vector guessChargeArrays; solver->getGuessChargeArrays(guessChargeArrays); for (ComputeArray* guessChargeArray : guessChargeArrays) { listener->addChargeArray(*guessChargeArray); } } cc.addReorderListener(listener); // Create a post computation object to avoid caching solutions if forces // are invalid (e.g., due to the neighbor list needing to be resized). if (hasElectrodes) { cc.addPostComputation(new InvalidatePostComputation(cc, solver)); } } void CommonCalcConstantPotentialForceKernel::copyParametersToContext(ContextImpl& context, const ConstantPotentialForce& force, int firstParticle, int lastParticle, int firstException, int lastException, int firstElectrode, int lastElectrode) { ContextSelector selector(cc); // Get particle parameters. if (force.getNumParticles() != numParticles) { throw OpenMMException("updateParametersInContext: The number of particles has changed"); } if (firstParticle <= lastParticle) { for (int i = firstParticle; i <= lastParticle; i++) { if (hostSysElec[i] == -1) { force.getParticleParameters(i, setCharges[i]); hostNonElectrodeCharges[i] = setCharges[i]; } } if (cc.getUseDoublePrecision()) { nonElectrodeCharges.uploadSubArray(&hostNonElectrodeCharges[firstParticle], firstParticle, lastParticle - firstParticle + 1); } else { vector hostNonElectrodeChargesFloat(lastParticle - firstParticle + 1); for (int i = firstParticle; i <= lastParticle; i++) { hostNonElectrodeChargesFloat[i - firstParticle] = (float) hostNonElectrodeCharges[i]; } nonElectrodeCharges.uploadSubArray(&hostNonElectrodeChargesFloat[0], firstParticle, lastParticle - firstParticle + 1); } mustUpdateNonElectrodeCharges = true; } // Check exceptions. int numExclusions = force.getNumExceptions(); int numExceptions = exceptions.size(); vector checkExceptions; for (int i = 0; i < numExclusions; i++) { int particle1, particle2; double chargeProd; force.getExceptionParameters(i, particle1, particle2, chargeProd); if (exceptionIndex.find(i) == exceptionIndex.end()) { if (chargeProd != 0.0) { throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed"); } } else { checkExceptions.push_back(i); } } if (checkExceptions.size() != numExceptions) { throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed"); } // Upload exclusion parameters. if (numExclusions > 0 && (firstParticle <= lastParticle || firstException <= lastException)) { vector hostExclusionScales(numExclusions); for (int i = 0; i < numExclusions; i++) { hostExclusionScales[i] = ONE_4PI_EPS0 * setCharges[exclusions[i].first] * setCharges[exclusions[i].second]; } exclusionScales.upload(hostExclusionScales, true); } // Upload exception parameters. if (numExceptions > 0 && firstException <= lastException) { vector hostExceptionScales(numExceptions); for (int i = 0; i < numExceptions; i++) { int particle1, particle2; double chargeProd; force.getExceptionParameters(exceptions[i], particle1, particle2, chargeProd); hostExceptionScales[i] = ONE_4PI_EPS0 * chargeProd; } exceptionScales.upload(hostExceptionScales, true); } // Get electrode parameters. set allElectrodeParticles; for (int ie = 0; ie < force.getNumElectrodes(); ie++) { set electrodeParticles; double potential; double gaussianWidth; double thomasFermiScale; force.getElectrodeParameters(ie, electrodeParticles, potential, gaussianWidth, thomasFermiScale); for (int i : electrodeParticles) { if (hostSysElec[i] != ie) { throw OpenMMException("updateParametersInContext: The electrode assignment of a particle has changed"); } allElectrodeParticles.insert(i); } if (ie >= firstElectrode && ie <= lastElectrode) { double selfScale = SELF_ETA_SCALE / gaussianWidth + SELF_TF_SCALE * thomasFermiScale - SELF_ALPHA_SCALE * ewaldAlpha; hostElectrodeParams[ie + 1] = mm_double4(potential, gaussianWidth, thomasFermiScale, selfScale); } } if (allElectrodeParticles.size() != numElectrodeParticles) { throw OpenMMException("updateParametersInContext: The electrode state of a particle has changed"); } if (firstElectrode <= lastElectrode) { electrodeParams.upload(hostElectrodeParams, true); } // Update charge target. if (useChargeConstraint) { chargeTarget = force.getChargeConstraintTarget(); for (int i = 0; i < numParticles; i++) { chargeTarget -= setCharges[i]; } } // Update external field. force.getExternalField(externalField); cc.invalidateMolecules(info, firstParticle <= lastParticle || firstElectrode <= lastElectrode, firstException <= lastException); if (solver != NULL) { solver->discardSavedSolution(); if (firstElectrode <= lastElectrode) { solver->invalidate(); } } } void CommonCalcConstantPotentialForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const { alpha = ewaldAlpha; nx = gridSizeX; ny = gridSizeY; nz = gridSizeZ; } double CommonCalcConstantPotentialForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { ContextSelector selector(cc); ensureInitialized(context); cc.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]); setKernelInputs(includeEnergy, includeForces); pmeShouldSort = true; if (solver != NULL) { solver->solve(*this); } return doEnergyForces(includeForces, includeEnergy); } void CommonCalcConstantPotentialForceKernel::getCharges(ContextImpl& context, vector& chargesOut) { ContextSelector selector(cc); ensureInitialized(context); ensureValidNeighborList(); cc.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]); setKernelInputs(false, false); pmeShouldSort = true; if (solver != NULL) { solver->solve(*this); } // Preserve fixed charges exactly and load solved values of fluctuating // charges. chargesOut = setCharges; if (hasElectrodes) { const vector& order = cc.getAtomIndex(); electrodeCharges.download(hostElectrodeCharges, true); for (int ii = 0; ii < numElectrodeParticles; ii++) { chargesOut[order[hostElecToSys[ii]]] = hostElectrodeCharges[ii]; } } } void CommonCalcConstantPotentialForceKernel::ensureInitialized(ContextImpl& context) { if (hasInitializedKernel) { return; } NonbondedUtilities& nb = cc.getNonbondedUtilities(); map defines; defines["CUTOFF"] = cc.doubleToString(cutoff); defines["CUTOFF_SQUARED"] = cc.doubleToString(cutoff * cutoff); defines["EWALD_ALPHA"] = cc.doubleToString(ewaldAlpha); defines["ONE_4PI_EPS0"] = cc.doubleToString(ONE_4PI_EPS0); defines["NUM_PARTICLES"] = cc.intToString(numParticles); defines["NUM_ELECTRODE_PARTICLES"] = cc.intToString(numElectrodeParticles); defines["NUM_EXCLUSION_TILES"] = cc.intToString(nb.getExclusionTiles().getSize()); defines["PADDED_NUM_ATOMS"] = cc.intToString(cc.getPaddedNumAtoms()); defines["PLASMA_SCALE"] = cc.doubleToString(PLASMA_SCALE); defines["THREAD_BLOCK_SIZE"] = cc.intToString(maxThreadBlockSize); defines["TILE_SIZE"] = cc.intToString(ComputeContext::TileSize); defines["WORK_GROUP_SIZE"] = cc.intToString(nb.getForceThreadBlockSize()); if (usePosqCharges) { defines["USE_POSQ_CHARGES"] = "1"; } if (deviceIsCpu) { defines["DEVICE_IS_CPU"] = "1"; } ComputeProgram program = cc.compileProgram(CommonKernelSources::constantPotential, defines); updateNonElectrodeChargesKernel = program->createKernel("updateNonElectrodeCharges"); updateNonElectrodeChargesKernel->addArg(cc.getPosq()); updateNonElectrodeChargesKernel->addArg(charges); updateNonElectrodeChargesKernel->addArg(nonElectrodeCharges); updateNonElectrodeChargesKernel->addArg(sysElec); if (hasElectrodes) { updateElectrodeChargesKernel = program->createKernel("updateElectrodeCharges"); updateElectrodeChargesKernel->addArg(cc.getPosq()); updateElectrodeChargesKernel->addArg(charges); updateElectrodeChargesKernel->addArg(electrodeCharges); updateElectrodeChargesKernel->addArg(elecToSys); } getTotalChargeKernel = program->createKernel("getTotalCharge"); getTotalChargeKernel->addArg(cc.getPosq()); getTotalChargeKernel->addArg(charges); getTotalChargeKernel->addArg(totalChargeBuffer); evaluateSelfEnergyForcesKernel = program->createKernel("evaluateSelfEnergyForces"); evaluateSelfEnergyForcesKernel->addArg(cc.getPosq()); evaluateSelfEnergyForcesKernel->addArg(charges); evaluateSelfEnergyForcesKernel->addArg(sysElec); evaluateSelfEnergyForcesKernel->addArg(electrodeParams); evaluateSelfEnergyForcesKernel->addArg(totalChargeBuffer); evaluateSelfEnergyForcesKernel->addArg(posCellOffsets); evaluateSelfEnergyForcesKernel->addArg(); // periodicBoxVecX evaluateSelfEnergyForcesKernel->addArg(); // periodicBoxVecY evaluateSelfEnergyForcesKernel->addArg(); // periodicBoxVecZ evaluateSelfEnergyForcesKernel->addArg(); // externalField evaluateSelfEnergyForcesKernel->addArg(cc.getEnergyBuffer()); evaluateSelfEnergyForcesKernel->addArg(cc.getLongForceBuffer()); if (hasElectrodes) { evaluateDirectDerivativesKernel = program->createKernel("evaluateDirectDerivatives"); evaluateDirectDerivativesKernel->addArg(cc.getPosq()); evaluateDirectDerivativesKernel->addArg(charges); evaluateDirectDerivativesKernel->addArg(sysToElec); evaluateDirectDerivativesKernel->addArg(sysElec); evaluateDirectDerivativesKernel->addArg(electrodeParams); evaluateDirectDerivativesKernel->addArg(chargeDerivativesFixed); evaluateDirectDerivativesKernel->addArg(); // periodicBoxSize evaluateDirectDerivativesKernel->addArg(); // invPeriodicBoxSize evaluateDirectDerivativesKernel->addArg(); // periodicBoxVecX evaluateDirectDerivativesKernel->addArg(); // periodicBoxVecY evaluateDirectDerivativesKernel->addArg(); // periodicBoxVecZ evaluateDirectDerivativesKernel->addArg(nb.getExclusionTiles()); evaluateDirectDerivativesKernel->addArg(nb.getInteractingTiles()); evaluateDirectDerivativesKernel->addArg(nb.getInteractionCount()); evaluateDirectDerivativesKernel->addArg(nb.getBlockCenters()); evaluateDirectDerivativesKernel->addArg(nb.getBlockBoundingBoxes()); evaluateDirectDerivativesKernel->addArg(nb.getInteractingAtoms()); evaluateDirectDerivativesKernel->addArg(); // maxTiles finishDerivativesKernel = program->createKernel("finishDerivatives"); finishDerivativesKernel->addArg(cc.getPosq()); finishDerivativesKernel->addArg(charges); finishDerivativesKernel->addArg(elecToSys); finishDerivativesKernel->addArg(elecElec); finishDerivativesKernel->addArg(electrodeParams); finishDerivativesKernel->addArg(totalChargeBuffer); finishDerivativesKernel->addArg(posCellOffsets); finishDerivativesKernel->addArg(); // periodicBoxVecX finishDerivativesKernel->addArg(); // periodicBoxVecY finishDerivativesKernel->addArg(); // periodicBoxVecZ finishDerivativesKernel->addArg(); // externalField finishDerivativesKernel->addArg(chargeDerivatives); finishDerivativesKernel->addArg(chargeDerivativesFixed); } pmeCompileKernels(); if (solver != NULL) { solver->compileKernels(*this); } hasInitializedKernel = true; } double CommonCalcConstantPotentialForceKernel::doEnergyForces(bool includeForces, bool includeEnergy) { if (mustUpdateNonElectrodeCharges) { updateNonElectrodeChargesKernel->execute(numParticles); mustUpdateNonElectrodeCharges = false; } if (mustUpdateElectrodeCharges) { if (hasElectrodes) { updateElectrodeChargesKernel->execute(numElectrodeParticles); } mustUpdateElectrodeCharges = false; } pmeExecute(includeEnergy, includeForces, false); // Ewald neutralizing plasma and per-particle energy. if (includeEnergy || includeForces) { getTotalChargeKernel->execute(maxThreadBlockSize, maxThreadBlockSize); if (cc.getUseDoublePrecision()) { evaluateSelfEnergyForcesKernel->setArg(9, mm_double4(externalField[0], externalField[1], externalField[2], 0)); } else { evaluateSelfEnergyForcesKernel->setArg(9, mm_float4((float) externalField[0], (float) externalField[1], (float) externalField[2], 0)); } evaluateSelfEnergyForcesKernel->execute(numParticles); } return 0.0; } void CommonCalcConstantPotentialForceKernel::initDoDerivatives() { if (mustUpdateNonElectrodeCharges) { updateNonElectrodeChargesKernel->execute(numParticles); mustUpdateNonElectrodeCharges = false; } if (mustUpdateElectrodeCharges) { if (hasElectrodes) { updateElectrodeChargesKernel->execute(numElectrodeParticles); } mustUpdateElectrodeCharges = false; } cc.clearBuffer(chargeDerivatives); cc.clearBuffer(chargeDerivativesFixed); } void CommonCalcConstantPotentialForceKernel::doDerivatives(bool init) { if (init) { initDoDerivatives(); } pmeExecute(false, false, true, init); NonbondedUtilities& nb = cc.getNonbondedUtilities(); evaluateDirectDerivativesKernel->execute(nb.getNumForceThreadBlocks() * nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize()); // Ewald neutralizing plasma and per-particle derivatives. getTotalChargeKernel->execute(maxThreadBlockSize, maxThreadBlockSize); finishDerivativesKernel->execute(numElectrodeParticles); } void CommonCalcConstantPotentialForceKernel::pmeSetup() { pmeDefines["PME_ORDER"] = cc.intToString(PmeOrder); pmeDefines["NUM_ATOMS"] = cc.intToString(numParticles); pmeDefines["PADDED_NUM_ATOMS"] = cc.intToString(cc.getPaddedNumAtoms()); pmeDefines["NUM_INDICES"] = cc.intToString(numElectrodeParticles); pmeDefines["RECIP_EXP_FACTOR"] = cc.doubleToString(M_PI * M_PI / (ewaldAlpha * ewaldAlpha)); pmeDefines["GRID_SIZE_X"] = cc.intToString(gridSizeX); pmeDefines["GRID_SIZE_Y"] = cc.intToString(gridSizeY); pmeDefines["GRID_SIZE_Z"] = cc.intToString(gridSizeZ); pmeDefines["EPSILON_FACTOR"] = cc.doubleToString(sqrt(ONE_4PI_EPS0)); pmeDefines["M_PI"] = cc.doubleToString(M_PI); if (deviceIsCpu) { pmeDefines["DEVICE_IS_CPU"] = "1"; } if (useFixedPointChargeSpreading) { pmeDefines["USE_FIXED_POINT_CHARGE_SPREADING"] = "1"; } // Create required data structures. int elementSize = (cc.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int gridElements = gridSizeX*gridSizeY*gridSizeZ; pmeGrid1.initialize(cc, gridElements, 2*elementSize, "pmeGrid1"); pmeGrid2.initialize(cc, gridElements, 2*elementSize, "pmeGrid2"); pmeBsplineModuliX.initialize(cc, gridSizeX, elementSize, "pmeBsplineModuliX"); pmeBsplineModuliY.initialize(cc, gridSizeY, elementSize, "pmeBsplineModuliY"); pmeBsplineModuliZ.initialize(cc, gridSizeZ, elementSize, "pmeBsplineModuliZ"); pmeAtomGridIndex.initialize(cc, numParticles, "pmeAtomGridIndex"); int energyElementSize = (cc.getUseDoublePrecision() || cc.getUseMixedPrecision() ? sizeof(double) : sizeof(float)); sort = cc.createSort(new SortTrait(), cc.getNumAtoms()); fft = cc.createFFT(gridSizeX, gridSizeY, gridSizeZ, true); // Initialize the b-spline moduli. int maxSize = max(max(gridSizeX, gridSizeY), gridSizeZ); vector data(PmeOrder); vector ddata(PmeOrder); vector bsplines_data(maxSize); data[PmeOrder-1] = 0.0; data[1] = 0.0; data[0] = 1.0; for (int i = 3; i < PmeOrder; i++) { double div = 1.0/(i-1.0); data[i-1] = 0.0; for (int j = 1; j < (i-1); j++) data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]); data[0] = div*data[0]; } // Differentiate. ddata[0] = -data[0]; for (int i = 1; i < PmeOrder; i++) ddata[i] = data[i-1]-data[i]; double div = 1.0/(PmeOrder-1); data[PmeOrder-1] = 0.0; for (int i = 1; i < (PmeOrder-1); i++) data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]); data[0] = div*data[0]; for (int i = 0; i < maxSize; i++) bsplines_data[i] = 0.0; for (int i = 1; i <= PmeOrder; i++) bsplines_data[i] = data[i-1]; // Evaluate the actual bspline moduli for X/Y/Z. for (int dim = 0; dim < 3; dim++) { int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ); vector moduli(ndata); for (int i = 0; i < ndata; i++) { double sc = 0.0; double ss = 0.0; for (int j = 0; j < ndata; j++) { double arg = (2.0*M_PI*i*j)/ndata; sc += bsplines_data[j]*cos(arg); ss += bsplines_data[j]*sin(arg); } moduli[i] = sc*sc+ss*ss; } for (int i = 0; i < ndata; i++) if (moduli[i] < 1.0e-7) moduli[i] = (moduli[(i-1+ndata)%ndata]+moduli[(i+1)%ndata])*0.5; if (dim == 0) pmeBsplineModuliX.upload(moduli, true); else if (dim == 1) pmeBsplineModuliY.upload(moduli, true); else pmeBsplineModuliZ.upload(moduli, true); } } void CommonCalcConstantPotentialForceKernel::pmeCompileKernels() { map replacements; replacements["CHARGE"] = (usePosqCharges ? "pos.w" : "charges[atom]"); ComputeProgram program = cc.compileProgram(cc.replaceStrings(CommonKernelSources::pme, replacements), pmeDefines); pmeGridIndexKernel = program->createKernel("findAtomGridIndex"); pmeGridIndexKernel->addArg(cc.getPosq()); pmeGridIndexKernel->addArg(pmeAtomGridIndex); for (int i = 0; i < 8; i++) { pmeGridIndexKernel->addArg(); } pmeSpreadChargeKernel = program->createKernel("gridSpreadCharge"); pmeSpreadChargeKernel->addArg(cc.getPosq()); if (useFixedPointChargeSpreading) { pmeSpreadChargeKernel->addArg(pmeGrid2); } else { pmeSpreadChargeKernel->addArg(pmeGrid1); } for (int i = 0; i < 8; i++) { pmeSpreadChargeKernel->addArg(); } pmeSpreadChargeKernel->addArg(pmeAtomGridIndex); pmeSpreadChargeKernel->addArg(charges); pmeConvolutionKernel = program->createKernel("reciprocalConvolution"); pmeConvolutionKernel->addArg(pmeGrid2); pmeConvolutionKernel->addArg(pmeBsplineModuliX); pmeConvolutionKernel->addArg(pmeBsplineModuliY); pmeConvolutionKernel->addArg(pmeBsplineModuliZ); for (int i = 0; i < 3; i++) { pmeConvolutionKernel->addArg(); } pmeEvalEnergyKernel = program->createKernel("gridEvaluateEnergy"); pmeEvalEnergyKernel->addArg(pmeGrid2); pmeEvalEnergyKernel->addArg(cc.getEnergyBuffer()); pmeEvalEnergyKernel->addArg(pmeBsplineModuliX); pmeEvalEnergyKernel->addArg(pmeBsplineModuliY); pmeEvalEnergyKernel->addArg(pmeBsplineModuliZ); for (int i = 0; i < 3; i++) { pmeEvalEnergyKernel->addArg(); } pmeInterpolateForceKernel = program->createKernel("gridInterpolateForce"); pmeInterpolateForceKernel->addArg(cc.getPosq()); pmeInterpolateForceKernel->addArg(cc.getLongForceBuffer()); pmeInterpolateForceKernel->addArg(pmeGrid1); for (int i = 0; i < 8; i++) { pmeInterpolateForceKernel->addArg(); } pmeInterpolateForceKernel->addArg(pmeAtomGridIndex); pmeInterpolateForceKernel->addArg(charges); if (hasElectrodes) { pmeInterpolateChargeDerivativesKernel = program->createKernel("gridInterpolateChargeDerivatives"); pmeInterpolateChargeDerivativesKernel->addArg(cc.getPosq()); pmeInterpolateChargeDerivativesKernel->addArg(chargeDerivativesFixed); pmeInterpolateChargeDerivativesKernel->addArg(pmeGrid1); for (int i = 0; i < 8; i++) { pmeInterpolateChargeDerivativesKernel->addArg(); } pmeInterpolateChargeDerivativesKernel->addArg(elecToSys); } if (useFixedPointChargeSpreading) { pmeFinishSpreadChargeKernel = program->createKernel("finishSpreadCharge"); pmeFinishSpreadChargeKernel->addArg(pmeGrid2); pmeFinishSpreadChargeKernel->addArg(pmeGrid1); } } void CommonCalcConstantPotentialForceKernel::initPmeExecute() { if (useFixedPointChargeSpreading) { cc.clearBuffer(pmeGrid2); } else { cc.clearBuffer(pmeGrid1); } } void CommonCalcConstantPotentialForceKernel::pmeExecute(bool includeEnergy, bool includeForces, bool includeChargeDerivatives, bool init) { if (init) { initPmeExecute(); } if (pmeShouldSort) { pmeGridIndexKernel->execute(cc.getNumAtoms()); sort->sort(pmeAtomGridIndex); pmeShouldSort = false; } pmeSpreadChargeKernel->execute(cc.getNumAtoms()); if (useFixedPointChargeSpreading) { pmeFinishSpreadChargeKernel->execute(gridSizeX*gridSizeY*gridSizeZ); } fft->execFFT(pmeGrid1, pmeGrid2, true); if (includeEnergy) { pmeEvalEnergyKernel->execute(gridSizeX*gridSizeY*gridSizeZ); } if (includeForces || includeChargeDerivatives) { pmeConvolutionKernel->execute(gridSizeX*gridSizeY*gridSizeZ); fft->execFFT(pmeGrid2, pmeGrid1, false); if (includeForces) { if (deviceIsCpu) { pmeInterpolateForceKernel->execute(cc.getNumThreadBlocks(), 1); } else { pmeInterpolateForceKernel->execute(cc.getNumAtoms()); } } if (includeChargeDerivatives && hasElectrodes) { if (deviceIsCpu) { pmeInterpolateChargeDerivativesKernel->execute(cc.getNumThreadBlocks(), 1); } else { pmeInterpolateChargeDerivativesKernel->execute(numElectrodeParticles); } } } } void CommonCalcConstantPotentialForceKernel::setKernelInputs(bool includeEnergy, bool includeForces) { double determinant = boxVectors[0][0] * boxVectors[1][1] * boxVectors[2][2]; double scale = 1.0 / determinant; mm_double4 recipBoxVectors[3]; recipBoxVectors[0] = mm_double4(boxVectors[1][1] * boxVectors[2][2] * scale, 0, 0, 0); recipBoxVectors[1] = mm_double4(-boxVectors[1][0] * boxVectors[2][2] * scale, boxVectors[0][0] * boxVectors[2][2] * scale, 0, 0); recipBoxVectors[2] = mm_double4((boxVectors[1][0] * boxVectors[2][1] - boxVectors[1][1] * boxVectors[2][0]) * scale, -boxVectors[0][0] * boxVectors[2][1] * scale, boxVectors[0][0] * boxVectors[1][1] * scale, 0); mm_float4 recipBoxVectorsFloat[3]; for (int i = 0; i < 3; i++) { recipBoxVectorsFloat[i] = mm_float4((float) recipBoxVectors[i].x, (float) recipBoxVectors[i].y, (float) recipBoxVectors[i].z, 0); } setPeriodicBoxArgs(cc, pmeGridIndexKernel, 2); setPeriodicBoxArgs(cc, pmeSpreadChargeKernel, 2); if (includeEnergy || includeForces) { if (cc.getUseDoublePrecision()) { evaluateSelfEnergyForcesKernel->setArg(6, mm_double4(boxVectors[0][0], boxVectors[0][1], boxVectors[0][2], 0)); evaluateSelfEnergyForcesKernel->setArg(7, mm_double4(boxVectors[1][0], boxVectors[1][1], boxVectors[1][2], 0)); evaluateSelfEnergyForcesKernel->setArg(8, mm_double4(boxVectors[2][0], boxVectors[2][1], boxVectors[2][2], 0)); } else { evaluateSelfEnergyForcesKernel->setArg(6, mm_float4((float) boxVectors[0][0], (float) boxVectors[0][1], (float) boxVectors[0][2], 0)); evaluateSelfEnergyForcesKernel->setArg(7, mm_float4((float) boxVectors[1][0], (float) boxVectors[1][1], (float) boxVectors[1][2], 0)); evaluateSelfEnergyForcesKernel->setArg(8, mm_float4((float) boxVectors[2][0], (float) boxVectors[2][1], (float) boxVectors[2][2], 0)); } } if (includeForces) { setPeriodicBoxArgs(cc, pmeInterpolateForceKernel, 3); } if (hasElectrodes) { setPeriodicBoxArgs(cc, pmeInterpolateChargeDerivativesKernel, 3); setPeriodicBoxArgs(cc, evaluateDirectDerivativesKernel, 6); evaluateDirectDerivativesKernel->setArg(17, (unsigned int) cc.getNonbondedUtilities().getInteractingTiles().getSize()); if (cc.getUseDoublePrecision()) { finishDerivativesKernel->setArg(7, mm_double4(boxVectors[0][0], boxVectors[0][1], boxVectors[0][2], 0)); finishDerivativesKernel->setArg(8, mm_double4(boxVectors[1][0], boxVectors[1][1], boxVectors[1][2], 0)); finishDerivativesKernel->setArg(9, mm_double4(boxVectors[2][0], boxVectors[2][1], boxVectors[2][2], 0)); finishDerivativesKernel->setArg(10, mm_double4(externalField[0], externalField[1], externalField[2], 0)); } else { finishDerivativesKernel->setArg(7, mm_float4((float) boxVectors[0][0], (float) boxVectors[0][1], (float) boxVectors[0][2], 0)); finishDerivativesKernel->setArg(8, mm_float4((float) boxVectors[1][0], (float) boxVectors[1][1], (float) boxVectors[1][2], 0)); finishDerivativesKernel->setArg(9, mm_float4((float) boxVectors[2][0], (float) boxVectors[2][1], (float) boxVectors[2][2], 0)); finishDerivativesKernel->setArg(10, mm_float4((float) externalField[0], (float) externalField[1], (float) externalField[2], 0)); } } if (cc.getUseDoublePrecision()) { pmeGridIndexKernel->setArg(7, recipBoxVectors[0]); pmeGridIndexKernel->setArg(8, recipBoxVectors[1]); pmeGridIndexKernel->setArg(9, recipBoxVectors[2]); pmeSpreadChargeKernel->setArg(7, recipBoxVectors[0]); pmeSpreadChargeKernel->setArg(8, recipBoxVectors[1]); pmeSpreadChargeKernel->setArg(9, recipBoxVectors[2]); if (includeEnergy) { pmeEvalEnergyKernel->setArg(5, recipBoxVectors[0]); pmeEvalEnergyKernel->setArg(6, recipBoxVectors[1]); pmeEvalEnergyKernel->setArg(7, recipBoxVectors[2]); } if (includeForces || hasElectrodes) { pmeConvolutionKernel->setArg(4, recipBoxVectors[0]); pmeConvolutionKernel->setArg(5, recipBoxVectors[1]); pmeConvolutionKernel->setArg(6, recipBoxVectors[2]); if (includeForces) { pmeInterpolateForceKernel->setArg(8, recipBoxVectors[0]); pmeInterpolateForceKernel->setArg(9, recipBoxVectors[1]); pmeInterpolateForceKernel->setArg(10, recipBoxVectors[2]); } if (hasElectrodes) { pmeInterpolateChargeDerivativesKernel->setArg(8, recipBoxVectors[0]); pmeInterpolateChargeDerivativesKernel->setArg(9, recipBoxVectors[1]); pmeInterpolateChargeDerivativesKernel->setArg(10, recipBoxVectors[2]); } } } else { pmeGridIndexKernel->setArg(7, recipBoxVectorsFloat[0]); pmeGridIndexKernel->setArg(8, recipBoxVectorsFloat[1]); pmeGridIndexKernel->setArg(9, recipBoxVectorsFloat[2]); pmeSpreadChargeKernel->setArg(7, recipBoxVectorsFloat[0]); pmeSpreadChargeKernel->setArg(8, recipBoxVectorsFloat[1]); pmeSpreadChargeKernel->setArg(9, recipBoxVectorsFloat[2]); if (includeEnergy) { pmeEvalEnergyKernel->setArg(5, recipBoxVectorsFloat[0]); pmeEvalEnergyKernel->setArg(6, recipBoxVectorsFloat[1]); pmeEvalEnergyKernel->setArg(7, recipBoxVectorsFloat[2]); } if (includeForces || hasElectrodes) { pmeConvolutionKernel->setArg(4, recipBoxVectorsFloat[0]); pmeConvolutionKernel->setArg(5, recipBoxVectorsFloat[1]); pmeConvolutionKernel->setArg(6, recipBoxVectorsFloat[2]); if (includeForces) { pmeInterpolateForceKernel->setArg(8, recipBoxVectorsFloat[0]); pmeInterpolateForceKernel->setArg(9, recipBoxVectorsFloat[1]); pmeInterpolateForceKernel->setArg(10, recipBoxVectorsFloat[2]); } if (hasElectrodes) { pmeInterpolateChargeDerivativesKernel->setArg(8, recipBoxVectorsFloat[0]); pmeInterpolateChargeDerivativesKernel->setArg(9, recipBoxVectorsFloat[1]); pmeInterpolateChargeDerivativesKernel->setArg(10, recipBoxVectorsFloat[2]); } } } const vector& newPosCellOffsets = cc.getPosCellOffsets(); bool mustUpdatePosCellOffsets = false; for (int i = 0; i < numParticles; i++) { mm_int4 oldOffset = hostPosCellOffsets[i]; mm_int4 newOffset = newPosCellOffsets[i]; if (newOffset.x != oldOffset.x || newOffset.y != oldOffset.y || newOffset.z != oldOffset.z) { mustUpdatePosCellOffsets = true; break; } } if (mustUpdatePosCellOffsets) { hostPosCellOffsets.assign(newPosCellOffsets.begin(), newPosCellOffsets.end()); posCellOffsets.upload(hostPosCellOffsets); } } void CommonCalcConstantPotentialForceKernel::ensureValidNeighborList() { // Save the forcesValid flag since we use it to monitor the neighbor list build. bool oldForcesValid = cc.getForcesValid(); do { // If we need to try to build the neighbor list again (i.e., it needs to be made bigger), // getForcesValid() will return false after computeInteractions() completes. cc.setForcesValid(true); cc.getNonbondedUtilities().prepareInteractions(1 << forceGroup); cc.getNonbondedUtilities().computeInteractions(1 << forceGroup, false, false); } while(!cc.getForcesValid()); if (hasElectrodes) { evaluateDirectDerivativesKernel->setArg(17, (unsigned int) cc.getNonbondedUtilities().getInteractingTiles().getSize()); } // Restore the old value of the flag. cc.setForcesValid(oldForcesValid); }