/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2009 Stanford University and the Authors. * * Authors: Peter Eastman * * Contributors: * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * * by the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU Lesser General Public License for more details. * * * * You should have received a copy of the GNU Lesser General Public License * * along with this program. If not, see . * * -------------------------------------------------------------------------- */ #include "OpenCLIntegrationUtilities.h" #include "OpenCLArray.h" #include "OpenCLKernelSources.h" #include "openmm/HarmonicAngleForce.h" #include "quern.h" #include "OpenCLExpressionUtilities.h" #include #include #include #include using namespace OpenMM; using namespace std; struct OpenCLIntegrationUtilities::ShakeCluster { int centralID; int peripheralID[3]; int size; bool valid; double distance; double centralInvMass, peripheralInvMass; ShakeCluster() : valid(true) { } ShakeCluster(int centralID, double invMass) : centralID(centralID), centralInvMass(invMass), size(0), valid(true) { } void addAtom(int id, double dist, double invMass) { if (size == 3 || (size > 0 && abs(dist-distance)/distance > 1e-8) || (size > 0 && abs(invMass-peripheralInvMass)/peripheralInvMass > 1e-8)) valid = false; else { peripheralID[size++] = id; distance = dist; peripheralInvMass = invMass; } } void markInvalid(map& allClusters, vector& invalidForShake) { valid = false; invalidForShake[centralID] = true; for (int i = 0; i < size; i++) { invalidForShake[peripheralID[i]] = true; map::iterator otherCluster = allClusters.find(peripheralID[i]); if (otherCluster != allClusters.end() && otherCluster->second.valid) otherCluster->second.markInvalid(allClusters, invalidForShake); } } }; struct OpenCLIntegrationUtilities::ConstraintOrderer : public binary_function { const vector& atom1; const vector& atom2; const vector& constraints; ConstraintOrderer(const vector& atom1, const vector& atom2, const vector& constraints) : atom1(atom1), atom2(atom2), constraints(constraints) { } bool operator()(int x, int y) { int ix = constraints[x]; int iy = constraints[y]; if (atom1[ix] != atom1[iy]) return atom1[ix] < atom1[iy]; return atom2[ix] < atom2[iy]; } }; OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, const System& system) : context(context), posDelta(NULL), settleAtoms(NULL), settleParams(NULL), shakeAtoms(NULL), shakeParams(NULL), random(NULL), randomSeed(NULL), randomPos(0), stepSize(NULL), ccmaAtoms(NULL), ccmaDistance(NULL), ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL), ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConverged(NULL), ccmaConvergedBuffer(NULL), hasInitializedConstraintKernels(false) { // Create workspace arrays. posDelta = new OpenCLArray(context, context.getPaddedNumAtoms(), "posDelta"); stepSize = new OpenCLArray(context, 1, "stepSize", true); stepSize->set(0, mm_float2(0.0f, 0.0f)); stepSize->upload(); // Create kernels for enforcing constraints. cl::Program settleProgram = context.createProgram(OpenCLKernelSources::settle); settleKernel = cl::Kernel(settleProgram, "applySettle"); cl::Program shakeProgram = context.createProgram(OpenCLKernelSources::shakeHydrogens); shakeKernel = cl::Kernel(shakeProgram, "applyShakeToHydrogens"); // Record the set of constraints and how many constraints each atom is involved in. int numConstraints = system.getNumConstraints(); vector atom1(numConstraints); vector atom2(numConstraints); vector distance(numConstraints); vector constraintCount(context.getNumAtoms(), 0); for (int i = 0; i < numConstraints; i++) { system.getConstraintParameters(i, atom1[i], atom2[i], distance[i]); constraintCount[atom1[i]]++; constraintCount[atom2[i]]++; } // Identify clusters of three atoms that can be treated with SETTLE. First, for every // atom that might be part of such a cluster, make a list of the two other atoms it is // connected to. int numAtoms = system.getNumParticles(); vector > settleConstraints(numAtoms); for (int i = 0; i < (int)atom1.size(); i++) { if (constraintCount[atom1[i]] == 2 && constraintCount[atom2[i]] == 2) { settleConstraints[atom1[i]][atom2[i]] = (float) distance[i]; settleConstraints[atom2[i]][atom1[i]] = (float) distance[i]; } } // Now remove the ones that don't actually form closed loops of three atoms. vector settleClusters; for (int i = 0; i < (int)settleConstraints.size(); i++) { if (settleConstraints[i].size() == 2) { int partner1 = settleConstraints[i].begin()->first; int partner2 = (++settleConstraints[i].begin())->first; if (settleConstraints[partner1].size() != 2 || settleConstraints[partner2].size() != 2 || settleConstraints[partner1].find(partner2) == settleConstraints[partner1].end()) settleConstraints[i].clear(); else if (i < partner1 && i < partner2) settleClusters.push_back(i); } else settleConstraints[i].clear(); } // Record the SETTLE clusters. vector isShakeAtom(numAtoms, false); if (settleClusters.size() > 0) { vector atoms; vector params; for (int i = 0; i < (int) settleClusters.size(); i++) { int atom1 = settleClusters[i]; int atom2 = settleConstraints[atom1].begin()->first; int atom3 = (++settleConstraints[atom1].begin())->first; float dist12 = settleConstraints[atom1].find(atom2)->second; float dist13 = settleConstraints[atom1].find(atom3)->second; float dist23 = settleConstraints[atom2].find(atom3)->second; if (dist12 == dist13) { // atom1 is the central atom atoms.push_back(mm_int4(atom1, atom2, atom3, 0)); params.push_back(mm_float2(dist12, dist23)); } else if (dist12 == dist23) { // atom2 is the central atom atoms.push_back(mm_int4(atom2, atom1, atom3, 0)); params.push_back(mm_float2(dist12, dist13)); } else if (dist13 == dist23) { // atom3 is the central atom atoms.push_back(mm_int4(atom3, atom1, atom2, 0)); params.push_back(mm_float2(dist13, dist12)); } else throw OpenMMException("Two of the three distances constrained with SETTLE must be the same."); isShakeAtom[atom1] = true; isShakeAtom[atom2] = true; isShakeAtom[atom3] = true; } settleAtoms = new OpenCLArray(context, atoms.size(), "settleAtoms"); settleParams = new OpenCLArray(context, params.size(), "settleParams"); settleAtoms->upload(atoms); settleParams->upload(params); } // Find clusters consisting of a central atom with up to three peripheral atoms. map clusters; vector invalidForShake(numAtoms, false); for (int i = 0; i < (int) atom1.size(); i++) { if (isShakeAtom[atom1[i]]) continue; // This is being taken care of with SETTLE. // Determine which is the central atom. bool firstIsCentral; if (constraintCount[atom1[i]] > 1) firstIsCentral = true; else if (constraintCount[atom2[i]] > 1) firstIsCentral = false; else if (atom1[i] < atom2[i]) firstIsCentral = true; else firstIsCentral = false; int centralID, peripheralID; if (firstIsCentral) { centralID = atom1[i]; peripheralID = atom2[i]; } else { centralID = atom2[i]; peripheralID = atom1[i]; } // Add it to the cluster. if (clusters.find(centralID) == clusters.end()) { clusters[centralID] = ShakeCluster(centralID, 1.0/system.getParticleMass(centralID)); } ShakeCluster& cluster = clusters[centralID]; cluster.addAtom(peripheralID, distance[i], 1.0/system.getParticleMass(peripheralID)); if (constraintCount[peripheralID] != 1 || invalidForShake[atom1[i]] || invalidForShake[atom2[i]]) { cluster.markInvalid(clusters, invalidForShake); map::iterator otherCluster = clusters.find(peripheralID); if (otherCluster != clusters.end() && otherCluster->second.valid) otherCluster->second.markInvalid(clusters, invalidForShake); } } int validShakeClusters = 0; for (map::iterator iter = clusters.begin(); iter != clusters.end(); ++iter) { ShakeCluster& cluster = iter->second; if (cluster.valid) { cluster.valid = !invalidForShake[cluster.centralID] && cluster.size == constraintCount[cluster.centralID]; for (int i = 0; i < cluster.size; i++) if (invalidForShake[cluster.peripheralID[i]]) cluster.valid = false; if (cluster.valid) ++validShakeClusters; } } // Record the SHAKE clusters. if (validShakeClusters > 0) { vector atoms; vector params; int index = 0; for (map::const_iterator iter = clusters.begin(); iter != clusters.end(); ++iter) { const ShakeCluster& cluster = iter->second; if (!cluster.valid) continue; atoms.push_back(mm_int4(cluster.centralID, cluster.peripheralID[0], (cluster.size > 1 ? cluster.peripheralID[1] : -1), (cluster.size > 2 ? cluster.peripheralID[2] : -1))); params.push_back(mm_float4((cl_float) cluster.centralInvMass, (cl_float) (0.5/(cluster.centralInvMass+cluster.peripheralInvMass)), (cl_float) (cluster.distance*cluster.distance), (cl_float) cluster.peripheralInvMass)); isShakeAtom[cluster.centralID] = true; isShakeAtom[cluster.peripheralID[0]] = true; if (cluster.size > 1) isShakeAtom[cluster.peripheralID[1]] = true; if (cluster.size > 2) isShakeAtom[cluster.peripheralID[2]] = true; ++index; } shakeAtoms = new OpenCLArray(context, atoms.size(), "shakeAtoms"); shakeParams = new OpenCLArray(context, params.size(), "shakeParams"); shakeAtoms->upload(atoms); shakeParams->upload(params); } // Find connected constraints for CCMA. vector ccmaConstraints; for (unsigned i = 0; i < atom1.size(); i++) if (!isShakeAtom[atom1[i]]) ccmaConstraints.push_back(i); // Record the connections between constraints. int numCCMA = (int) ccmaConstraints.size(); if (numCCMA > 0) { vector > atomConstraints(context.getNumAtoms()); for (int i = 0; i < numCCMA; i++) { atomConstraints[atom1[ccmaConstraints[i]]].push_back(i); atomConstraints[atom2[ccmaConstraints[i]]].push_back(i); } vector > linkedConstraints(numCCMA); for (unsigned atom = 0; atom < atomConstraints.size(); atom++) { for (unsigned i = 0; i < atomConstraints[atom].size(); i++) for (unsigned j = 0; j < i; j++) { int c1 = atomConstraints[atom][i]; int c2 = atomConstraints[atom][j]; linkedConstraints[c1].push_back(c2); linkedConstraints[c2].push_back(c1); } } int maxLinks = 0; for (unsigned i = 0; i < linkedConstraints.size(); i++) maxLinks = max(maxLinks, (int) linkedConstraints[i].size()); int maxAtomConstraints = 0; for (unsigned i = 0; i < atomConstraints.size(); i++) maxAtomConstraints = max(maxAtomConstraints, (int) atomConstraints[i].size()); // Compute the constraint coupling matrix vector > atomAngles(numAtoms); HarmonicAngleForce const* angleForce = NULL; for (int i = 0; i < system.getNumForces() && angleForce == NULL; i++) angleForce = dynamic_cast(&system.getForce(i)); if (angleForce != NULL) for (int i = 0; i < angleForce->getNumAngles(); i++) { int particle1, particle2, particle3; double angle, k; angleForce->getAngleParameters(i, particle1, particle2, particle3, angle, k); atomAngles[particle2].push_back(i); } vector > > matrix(numCCMA); for (int j = 0; j < numCCMA; j++) { for (int k = 0; k < numCCMA; k++) { if (j == k) { matrix[j].push_back(pair(j, 1.0)); continue; } double scale; int cj = ccmaConstraints[j]; int ck = ccmaConstraints[k]; int atomj0 = atom1[cj]; int atomj1 = atom2[cj]; int atomk0 = atom1[ck]; int atomk1 = atom2[ck]; int atoma, atomb, atomc; double imj0 = 1.0/system.getParticleMass(atomj0); double imj1 = 1.0/system.getParticleMass(atomj1); if (atomj0 == atomk0) { atoma = atomj1; atomb = atomj0; atomc = atomk1; scale = imj0/(imj0+imj1); } else if (atomj1 == atomk1) { atoma = atomj0; atomb = atomj1; atomc = atomk0; scale = imj1/(imj0+imj1); } else if (atomj0 == atomk1) { atoma = atomj1; atomb = atomj0; atomc = atomk0; scale = imj0/(imj0+imj1); } else if (atomj1 == atomk0) { atoma = atomj0; atomb = atomj1; atomc = atomk1; scale = imj1/(imj0+imj1); } else continue; // These constraints are not connected. // Look for a third constraint forming a triangle with these two. bool foundConstraint = false; for (int m = 0; m < numCCMA; m++) { int other = ccmaConstraints[m]; if ((atom1[other] == atoma && atom2[other] == atomc) || (atom1[other] == atomc && atom2[other] == atoma)) { double d1 = distance[cj]; double d2 = distance[ck]; double d3 = distance[other]; matrix[j].push_back(pair(k, scale*(d1*d1+d2*d2-d3*d3)/(2.0*d1*d2))); foundConstraint = true; break; } } if (!foundConstraint && angleForce != NULL) { // We didn't find one, so look for an angle force field term. const vector& angleCandidates = atomAngles[atomb]; for (vector::const_iterator iter = angleCandidates.begin(); iter != angleCandidates.end(); iter++) { int particle1, particle2, particle3; double angle, ka; angleForce->getAngleParameters(*iter, particle1, particle2, particle3, angle, ka); if ((particle1 == atoma && particle3 == atomc) || (particle3 == atoma && particle1 == atomc)) { matrix[j].push_back(pair(k, scale*cos(angle))); break; } } } } } // Invert it using QR. vector matrixRowStart; vector matrixColIndex; vector matrixValue; for (int i = 0; i < numCCMA; i++) { matrixRowStart.push_back(matrixValue.size()); for (int j = 0; j < (int) matrix[i].size(); j++) { pair element = matrix[i][j]; matrixColIndex.push_back(element.first); matrixValue.push_back(element.second); } } matrixRowStart.push_back(matrixValue.size()); int *qRowStart, *qColIndex, *rRowStart, *rColIndex; double *qValue, *rValue; int result = QUERN_compute_qr(numCCMA, numCCMA, &matrixRowStart[0], &matrixColIndex[0], &matrixValue[0], NULL, &qRowStart, &qColIndex, &qValue, &rRowStart, &rColIndex, &rValue); vector rhs(numCCMA); matrix.clear(); matrix.resize(numCCMA); for (int i = 0; i < numCCMA; i++) { // Extract column i of the inverse matrix. for (int j = 0; j < numCCMA; j++) rhs[j] = (i == j ? 1.0 : 0.0); result = QUERN_multiply_with_q_transpose(numCCMA, qRowStart, qColIndex, qValue, &rhs[0]); result = QUERN_solve_with_r(numCCMA, rRowStart, rColIndex, rValue, &rhs[0], &rhs[0]); for (int j = 0; j < numCCMA; j++) { double value = rhs[j]*distance[ccmaConstraints[i]]/distance[ccmaConstraints[j]]; if (abs(value) > 0.1) matrix[j].push_back(pair(i, value)); } } QUERN_free_result(qRowStart, qColIndex, qValue); QUERN_free_result(rRowStart, rColIndex, rValue); int maxRowElements = 0; for (unsigned i = 0; i < matrix.size(); i++) maxRowElements = max(maxRowElements, (int) matrix[i].size()); maxRowElements++; // Sort the constraints. vector constraintOrder(numCCMA); for (int i = 0; i < numCCMA; ++i) constraintOrder[i] = i; sort(constraintOrder.begin(), constraintOrder.end(), ConstraintOrderer(atom1, atom2, ccmaConstraints)); vector inverseOrder(numCCMA); for (int i = 0; i < numCCMA; ++i) inverseOrder[constraintOrder[i]] = i; for (int i = 0; i < (int)matrix.size(); ++i) for (int j = 0; j < (int)matrix[i].size(); ++j) matrix[i][j].first = inverseOrder[matrix[i][j].first]; // Record the CCMA data structures. ccmaAtoms = new OpenCLArray(context, numCCMA, "CcmaAtoms"); ccmaDistance = new OpenCLArray(context, numCCMA, "CcmaDistance"); ccmaAtomConstraints = new OpenCLArray(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints"); ccmaNumAtomConstraints = new OpenCLArray(context, numAtoms, "CcmaAtomConstraintsIndex"); ccmaDelta1 = new OpenCLArray(context, numCCMA, "CcmaDelta1"); ccmaDelta2 = new OpenCLArray(context, numCCMA, "CcmaDelta2"); ccmaConverged = new OpenCLArray(context, 2, "CcmaConverged"); ccmaConvergedBuffer = new cl::Buffer(context.getContext(), CL_MEM_ALLOC_HOST_PTR, 2*sizeof(cl_int)); ccmaConvergedMemory = (cl_int*) context.getQueue().enqueueMapBuffer(*ccmaConvergedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, 2*sizeof(cl_int)); ccmaReducedMass = new OpenCLArray(context, numCCMA, "CcmaReducedMass"); ccmaConstraintMatrixColumn = new OpenCLArray(context, numCCMA*maxRowElements, "ConstraintMatrixColumn"); ccmaConstraintMatrixValue = new OpenCLArray(context, numCCMA*maxRowElements, "ConstraintMatrixValue"); vector atomsVec(ccmaAtoms->getSize()); vector distanceVec(ccmaDistance->getSize()); vector atomConstraintsVec(ccmaAtomConstraints->getSize()); vector numAtomConstraintsVec(ccmaNumAtomConstraints->getSize()); vector reducedMassVec(ccmaReducedMass->getSize()); vector constraintMatrixColumnVec(ccmaConstraintMatrixColumn->getSize()); vector constraintMatrixValueVec(ccmaConstraintMatrixValue->getSize()); for (int i = 0; i < numCCMA; i++) { int index = constraintOrder[i]; int c = ccmaConstraints[index]; atomsVec[i].x = atom1[c]; atomsVec[i].y = atom2[c]; distanceVec[i].w = (float) distance[c]; reducedMassVec[i] = (float) (0.5/(1.0/system.getParticleMass(atom1[c])+1.0/system.getParticleMass(atom2[c]))); for (unsigned int j = 0; j < matrix[index].size(); j++) { constraintMatrixColumnVec[i+j*numCCMA] = matrix[index][j].first; constraintMatrixValueVec[i+j*numCCMA] = (float) matrix[index][j].second; } constraintMatrixColumnVec[i+matrix[index].size()*numCCMA] = numCCMA; } for (unsigned int i = 0; i < atomConstraints.size(); i++) { numAtomConstraintsVec[i] = atomConstraints[i].size(); for (unsigned int j = 0; j < atomConstraints[i].size(); j++) { bool forward = (atom1[ccmaConstraints[atomConstraints[i][j]]] == i); atomConstraintsVec[i+j*numAtoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1); } } ccmaAtoms->upload(atomsVec); ccmaDistance->upload(distanceVec); ccmaAtomConstraints->upload(atomConstraintsVec); ccmaNumAtomConstraints->upload(numAtomConstraintsVec); ccmaReducedMass->upload(reducedMassVec); ccmaConstraintMatrixColumn->upload(constraintMatrixColumnVec); ccmaConstraintMatrixValue->upload(constraintMatrixValueVec); // Create the CCMA kernels. map defines; defines["NUM_CONSTRAINTS"] = OpenCLExpressionUtilities::intToString(numCCMA); defines["NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(numAtoms); cl::Program ccmaProgram = context.createProgram(OpenCLKernelSources::ccma, defines); ccmaDirectionsKernel = cl::Kernel(ccmaProgram, "computeConstraintDirections"); ccmaForceKernel = cl::Kernel(ccmaProgram, "computeConstraintForce"); ccmaMultiplyKernel = cl::Kernel(ccmaProgram, "multiplyByConstraintMatrix"); ccmaUpdateKernel = cl::Kernel(ccmaProgram, "updateAtomPositions"); } } OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() { if (posDelta != NULL) delete posDelta; if (settleAtoms != NULL) delete settleAtoms; if (settleParams != NULL) delete settleParams; if (shakeAtoms != NULL) delete shakeAtoms; if (shakeParams != NULL) delete shakeParams; if (random != NULL) delete random; if (randomSeed != NULL) delete randomSeed; if (stepSize != NULL) delete stepSize; if (ccmaAtoms != NULL) delete ccmaAtoms; if (ccmaDistance != NULL) delete ccmaDistance; if (ccmaReducedMass != NULL) delete ccmaReducedMass; if (ccmaAtomConstraints != NULL) delete ccmaAtomConstraints; if (ccmaNumAtomConstraints != NULL) delete ccmaNumAtomConstraints; if (ccmaConstraintMatrixColumn != NULL) delete ccmaConstraintMatrixColumn; if (ccmaConstraintMatrixValue != NULL) delete ccmaConstraintMatrixValue; if (ccmaDelta1 != NULL) delete ccmaDelta1; if (ccmaDelta2 != NULL) delete ccmaDelta2; if (ccmaConverged != NULL) delete ccmaConverged; if (ccmaConvergedBuffer != NULL) delete ccmaConvergedBuffer; } void OpenCLIntegrationUtilities::applyConstraints(double tol) { if (settleAtoms != NULL) { if (!hasInitializedConstraintKernels) { settleKernel.setArg(0, settleAtoms->getSize()); settleKernel.setArg(2, context.getPosq().getDeviceBuffer()); settleKernel.setArg(3, posDelta->getDeviceBuffer()); settleKernel.setArg(4, posDelta->getDeviceBuffer()); settleKernel.setArg(5, context.getVelm().getDeviceBuffer()); settleKernel.setArg(6, settleAtoms->getDeviceBuffer()); settleKernel.setArg(7, settleParams->getDeviceBuffer()); } settleKernel.setArg(1, (cl_float) tol); context.executeKernel(settleKernel, settleAtoms->getSize()); } if (shakeAtoms != NULL) { if (!hasInitializedConstraintKernels) { shakeKernel.setArg(0, shakeAtoms->getSize()); shakeKernel.setArg(2, context.getPosq().getDeviceBuffer()); shakeKernel.setArg(3, posDelta->getDeviceBuffer()); shakeKernel.setArg(4, posDelta->getDeviceBuffer()); shakeKernel.setArg(5, shakeAtoms->getDeviceBuffer()); shakeKernel.setArg(6, shakeParams->getDeviceBuffer()); } shakeKernel.setArg(1, (cl_float) tol); context.executeKernel(shakeKernel, shakeAtoms->getSize()); } if (ccmaAtoms != NULL) { if (!hasInitializedConstraintKernels) { ccmaDirectionsKernel.setArg(0, ccmaAtoms->getDeviceBuffer()); ccmaDirectionsKernel.setArg(1, ccmaDistance->getDeviceBuffer()); ccmaDirectionsKernel.setArg(2, context.getPosq().getDeviceBuffer()); ccmaForceKernel.setArg(0, ccmaAtoms->getDeviceBuffer()); ccmaForceKernel.setArg(1, ccmaDistance->getDeviceBuffer()); ccmaForceKernel.setArg(2, posDelta->getDeviceBuffer()); ccmaForceKernel.setArg(3, ccmaReducedMass->getDeviceBuffer()); ccmaForceKernel.setArg(4, ccmaDelta1->getDeviceBuffer()); ccmaForceKernel.setArg(5, ccmaConverged->getDeviceBuffer()); ccmaMultiplyKernel.setArg(0, ccmaDelta1->getDeviceBuffer()); ccmaMultiplyKernel.setArg(1, ccmaDelta2->getDeviceBuffer()); ccmaMultiplyKernel.setArg(2, ccmaConstraintMatrixColumn->getDeviceBuffer()); ccmaMultiplyKernel.setArg(3, ccmaConstraintMatrixValue->getDeviceBuffer()); ccmaMultiplyKernel.setArg(4, ccmaConverged->getDeviceBuffer()); ccmaUpdateKernel.setArg(0, ccmaNumAtomConstraints->getDeviceBuffer()); ccmaUpdateKernel.setArg(1, ccmaAtomConstraints->getDeviceBuffer()); ccmaUpdateKernel.setArg(2, ccmaDistance->getDeviceBuffer()); ccmaUpdateKernel.setArg(3, posDelta->getDeviceBuffer()); ccmaUpdateKernel.setArg(4, context.getVelm().getDeviceBuffer()); ccmaUpdateKernel.setArg(5, ccmaDelta1->getDeviceBuffer()); ccmaUpdateKernel.setArg(6, ccmaDelta2->getDeviceBuffer()); ccmaUpdateKernel.setArg(7, ccmaConverged->getDeviceBuffer()); } ccmaForceKernel.setArg(6, (cl_float) tol); context.executeKernel(ccmaDirectionsKernel, ccmaAtoms->getSize()); const int checkInterval = 4; cl::Event event; for (int i = 0; i < 150; i++) { ccmaForceKernel.setArg(7, i); if (i == 0) { ccmaConvergedMemory[0] = 1; ccmaConvergedMemory[1] = 0; context.getQueue().enqueueWriteBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(cl_int), ccmaConvergedMemory); } context.executeKernel(ccmaForceKernel, ccmaAtoms->getSize()); if ((i+1)%checkInterval == 0) context.getQueue().enqueueReadBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(cl_int), ccmaConvergedMemory, NULL, &event); ccmaMultiplyKernel.setArg(5, i); context.executeKernel(ccmaMultiplyKernel, ccmaAtoms->getSize()); ccmaUpdateKernel.setArg(8, i); context.executeKernel(ccmaUpdateKernel, context.getNumAtoms()); if ((i+1)%checkInterval == 0) { event.wait(); if (ccmaConvergedMemory[i%2]) break; } } } hasInitializedConstraintKernels = true; } void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumberSeed) { if (random != NULL) { if (randomNumberSeed != lastSeed) throw OpenMMException("OpenCLIntegrationUtilities::initRandomNumberGenerator(): Requested two different values for the random number seed"); return; } // Create the random number arrays. lastSeed = randomNumberSeed; random = new OpenCLArray(context, 32*context.getPaddedNumAtoms(), "random"); randomSeed = new OpenCLArray(context, context.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed"); randomPos = random->getSize(); // Use a quick and dirty RNG to pick seeds for the real random number generator. vector seed(randomSeed->getSize()); unsigned int r = randomNumberSeed; for (int i = 0; i < randomSeed->getSize(); i++) { seed[i].x = r = (1664525*r + 1013904223) & 0xFFFFFFFF; seed[i].y = r = (1664525*r + 1013904223) & 0xFFFFFFFF; seed[i].z = r = (1664525*r + 1013904223) & 0xFFFFFFFF; seed[i].w = r = (1664525*r + 1013904223) & 0xFFFFFFFF; } randomSeed->upload(seed); // Create the kernel. cl::Program randomProgram = context.createProgram(OpenCLKernelSources::random); randomKernel = cl::Kernel(randomProgram, "generateRandomNumbers"); } int OpenCLIntegrationUtilities::prepareRandomNumbers(int numValues) { if (randomPos+numValues <= random->getSize()) { int oldPos = randomPos; randomPos += numValues; return oldPos; } if (numValues > random->getSize()) { delete random; random = new OpenCLArray(context, numValues, "random"); } randomKernel.setArg(0, random->getSize()); randomKernel.setArg(1, random->getDeviceBuffer()); randomKernel.setArg(2, randomSeed->getDeviceBuffer()); context.executeKernel(randomKernel, random->getSize()); randomPos = numValues; return 0; }