CudaIntegrationUtilities.cpp 39.5 KB
Newer Older
1
2
3
4
5
6
7
8
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
Peter Eastman's avatar
Peter Eastman committed
9
 * Portions copyright (c) 2009-2018 Stanford University and the Authors.      *
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

#include "CudaIntegrationUtilities.h"
#include "CudaArray.h"
#include "CudaKernelSources.h"
30
#include "openmm/internal/OSRngSeed.h"
31
32
33
34
#include "openmm/HarmonicAngleForce.h"
#include "openmm/VirtualSite.h"
#include "quern.h"
#include "CudaExpressionUtilities.h"
35
#include "ReferenceCCMAAlgorithm.h"
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <map>

using namespace OpenMM;
using namespace std;

#define CHECK_RESULT(result) CHECK_RESULT2(result, errorMessage);
#define CHECK_RESULT2(result, prefix) \
    if (result != CUDA_SUCCESS) { \
        std::stringstream m; \
        m<<prefix<<": "<<context.getErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
        throw OpenMMException(m.str());\
    }

struct CudaIntegrationUtilities::ShakeCluster {
    int centralID;
    int peripheralID[3];
    int size;
    bool valid;
    double distance;
    double centralInvMass, peripheralInvMass;
    ShakeCluster() : valid(true) {
    }
    ShakeCluster(int centralID, double invMass) : centralID(centralID), centralInvMass(invMass), size(0), valid(true) {
    }
    void addAtom(int id, double dist, double invMass) {
        if (size == 3 || (size > 0 && abs(dist-distance)/distance > 1e-8) || (size > 0 && abs(invMass-peripheralInvMass)/peripheralInvMass > 1e-8))
            valid = false;
        else {
            peripheralID[size++] = id;
            distance = dist;
            peripheralInvMass = invMass;
        }
    }
    void markInvalid(map<int, ShakeCluster>& allClusters, vector<bool>& invalidForShake)
    {
        valid = false;
        invalidForShake[centralID] = true;
        for (int i = 0; i < size; i++) {
            invalidForShake[peripheralID[i]] = true;
            map<int, ShakeCluster>::iterator otherCluster = allClusters.find(peripheralID[i]);
            if (otherCluster != allClusters.end() && otherCluster->second.valid)
                otherCluster->second.markInvalid(allClusters, invalidForShake);
        }
    }
};

struct CudaIntegrationUtilities::ConstraintOrderer : public binary_function<int, int, bool> {
    const vector<int>& atom1;
    const vector<int>& atom2;
    const vector<int>& constraints;
    ConstraintOrderer(const vector<int>& atom1, const vector<int>& atom2, const vector<int>& constraints) : atom1(atom1), atom2(atom2), constraints(constraints) {
    }
    bool operator()(int x, int y) {
        int ix = constraints[x];
        int iy = constraints[y];
        if (atom1[ix] != atom1[iy])
            return atom1[ix] < atom1[iy];
        return atom2[ix] < atom2[iy];
    }
};

CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const System& system) : context(context),
101
        randomPos(0), ccmaConvergedMemory(NULL) {
102
103
    // Create workspace arrays.

104
    lastStepSize = make_double2(0.0, 0.0);
105
    if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) {
Peter Eastman's avatar
Peter Eastman committed
106
107
108
109
110
        posDelta.initialize<double4>(context, context.getPaddedNumAtoms(), "posDelta");
        vector<double4> deltas(posDelta.getSize(), make_double4(0.0, 0.0, 0.0, 0.0));
        posDelta.upload(deltas);
        stepSize.initialize<double2>(context, 1, "stepSize");
        stepSize.upload(&lastStepSize);
111
112
    }
    else {
Peter Eastman's avatar
Peter Eastman committed
113
114
115
116
        posDelta.initialize<float4>(context, context.getPaddedNumAtoms(), "posDelta");
        vector<float4> deltas(posDelta.getSize(), make_float4(0.0f, 0.0f, 0.0f, 0.0f));
        posDelta.upload(deltas);
        stepSize.initialize<float2>(context, 1, "stepSize");
117
        float2 lastStepSizeFloat = make_float2(0.0f, 0.0f);
Peter Eastman's avatar
Peter Eastman committed
118
        stepSize.upload(&lastStepSizeFloat);
119
120
121
122
    }

    // Record the set of constraints and how many constraints each atom is involved in.

123
124
125
    vector<int> atom1;
    vector<int> atom2;
    vector<double> distance;
126
    vector<int> constraintCount(context.getNumAtoms(), 0);
127
128
129
130
131
132
133
134
135
136
137
    for (int i = 0; i < system.getNumConstraints(); i++) {
        int p1, p2;
        double d;
        system.getConstraintParameters(i, p1, p2, d);
        if (system.getParticleMass(p1) != 0 || system.getParticleMass(p2) != 0) {
            atom1.push_back(p1);
            atom2.push_back(p2);
            distance.push_back(d);
            constraintCount[p1]++;
            constraintCount[p2]++;
        }
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
    }

    // Identify clusters of three atoms that can be treated with SETTLE.  First, for every
    // atom that might be part of such a cluster, make a list of the two other atoms it is
    // connected to.

    int numAtoms = system.getNumParticles();
    vector<map<int, float> > settleConstraints(numAtoms);
    for (int i = 0; i < (int)atom1.size(); i++) {
        if (constraintCount[atom1[i]] == 2 && constraintCount[atom2[i]] == 2) {
            settleConstraints[atom1[i]][atom2[i]] = (float) distance[i];
            settleConstraints[atom2[i]][atom1[i]] = (float) distance[i];
        }
    }

    // Now remove the ones that don't actually form closed loops of three atoms.

    vector<int> settleClusters;
    for (int i = 0; i < (int)settleConstraints.size(); i++) {
        if (settleConstraints[i].size() == 2) {
            int partner1 = settleConstraints[i].begin()->first;
            int partner2 = (++settleConstraints[i].begin())->first;
            if (settleConstraints[partner1].size() != 2 || settleConstraints[partner2].size() != 2 ||
                    settleConstraints[partner1].find(partner2) == settleConstraints[partner1].end())
                settleConstraints[i].clear();
            else if (i < partner1 && i < partner2)
                settleClusters.push_back(i);
        }
        else
            settleConstraints[i].clear();
    }

    // Record the SETTLE clusters.

    vector<bool> isShakeAtom(numAtoms, false);
    if (settleClusters.size() > 0) {
        vector<int4> atoms;
        vector<float2> params;
        for (int i = 0; i < (int) settleClusters.size(); i++) {
            int atom1 = settleClusters[i];
            int atom2 = settleConstraints[atom1].begin()->first;
            int atom3 = (++settleConstraints[atom1].begin())->first;
            float dist12 = settleConstraints[atom1].find(atom2)->second;
            float dist13 = settleConstraints[atom1].find(atom3)->second;
            float dist23 = settleConstraints[atom2].find(atom3)->second;
            if (dist12 == dist13) {
                // atom1 is the central atom
                atoms.push_back(make_int4(atom1, atom2, atom3, 0));
                params.push_back(make_float2(dist12, dist23));
            }
            else if (dist12 == dist23) {
                // atom2 is the central atom
                atoms.push_back(make_int4(atom2, atom1, atom3, 0));
                params.push_back(make_float2(dist12, dist13));
            }
            else if (dist13 == dist23) {
                // atom3 is the central atom
                atoms.push_back(make_int4(atom3, atom1, atom2, 0));
                params.push_back(make_float2(dist13, dist12));
            }
            else
199
                continue; // We can't handle this with SETTLE
200
201
202
203
            isShakeAtom[atom1] = true;
            isShakeAtom[atom2] = true;
            isShakeAtom[atom3] = true;
        }
204
        if (atoms.size() > 0) {
Peter Eastman's avatar
Peter Eastman committed
205
206
207
208
            settleAtoms.initialize<int4>(context, atoms.size(), "settleAtoms");
            settleParams.initialize<float2>(context, params.size(), "settleParams");
            settleAtoms.upload(atoms);
            settleParams.upload(params);
209
        }
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
    }

    // Find clusters consisting of a central atom with up to three peripheral atoms.

    map<int, ShakeCluster> clusters;
    vector<bool> invalidForShake(numAtoms, false);
    for (int i = 0; i < (int) atom1.size(); i++) {
        if (isShakeAtom[atom1[i]])
            continue; // This is being taken care of with SETTLE.

        // Determine which is the central atom.

        bool firstIsCentral;
        if (constraintCount[atom1[i]] > 1)
            firstIsCentral = true;
        else if (constraintCount[atom2[i]] > 1)
            firstIsCentral = false;
        else if (atom1[i] < atom2[i])
            firstIsCentral = true;
        else
            firstIsCentral = false;
        int centralID, peripheralID;
        if (firstIsCentral) {
            centralID = atom1[i];
            peripheralID = atom2[i];
        }
        else {
            centralID = atom2[i];
            peripheralID = atom1[i];
        }

        // Add it to the cluster.

        if (clusters.find(centralID) == clusters.end()) {
            clusters[centralID] = ShakeCluster(centralID, 1.0/system.getParticleMass(centralID));
        }
        ShakeCluster& cluster = clusters[centralID];
        cluster.addAtom(peripheralID, distance[i], 1.0/system.getParticleMass(peripheralID));
        if (constraintCount[peripheralID] != 1 || invalidForShake[atom1[i]] || invalidForShake[atom2[i]]) {
            cluster.markInvalid(clusters, invalidForShake);
            map<int, ShakeCluster>::iterator otherCluster = clusters.find(peripheralID);
            if (otherCluster != clusters.end() && otherCluster->second.valid)
                otherCluster->second.markInvalid(clusters, invalidForShake);
        }
    }
    int validShakeClusters = 0;
    for (map<int, ShakeCluster>::iterator iter = clusters.begin(); iter != clusters.end(); ++iter) {
        ShakeCluster& cluster = iter->second;
        if (cluster.valid) {
            cluster.valid = !invalidForShake[cluster.centralID] && cluster.size == constraintCount[cluster.centralID];
            for (int i = 0; i < cluster.size; i++)
                if (invalidForShake[cluster.peripheralID[i]])
                    cluster.valid = false;
            if (cluster.valid)
                ++validShakeClusters;
        }
    }

    // Record the SHAKE clusters.

    if (validShakeClusters > 0) {
        vector<int4> atoms;
        vector<float4> params;
        int index = 0;
        for (map<int, ShakeCluster>::const_iterator iter = clusters.begin(); iter != clusters.end(); ++iter) {
            const ShakeCluster& cluster = iter->second;
            if (!cluster.valid)
                continue;
            atoms.push_back(make_int4(cluster.centralID, cluster.peripheralID[0], (cluster.size > 1 ? cluster.peripheralID[1] : -1), (cluster.size > 2 ? cluster.peripheralID[2] : -1)));
            params.push_back(make_float4((float) cluster.centralInvMass, (float) (0.5/(cluster.centralInvMass+cluster.peripheralInvMass)), (float) (cluster.distance*cluster.distance), (float) cluster.peripheralInvMass));
            isShakeAtom[cluster.centralID] = true;
            isShakeAtom[cluster.peripheralID[0]] = true;
            if (cluster.size > 1)
                isShakeAtom[cluster.peripheralID[1]] = true;
            if (cluster.size > 2)
                isShakeAtom[cluster.peripheralID[2]] = true;
            ++index;
        }
Peter Eastman's avatar
Peter Eastman committed
288
289
290
291
        shakeAtoms.initialize<int4>(context, atoms.size(), "shakeAtoms");
        shakeParams.initialize<float4>(context, params.size(), "shakeParams");
        shakeAtoms.upload(atoms);
        shakeParams.upload(params);
292
293
294
295
296
297
298
299
300
301
302
303
304
    }

    // Find connected constraints for CCMA.

    vector<int> ccmaConstraints;
    for (unsigned i = 0; i < atom1.size(); i++)
        if (!isShakeAtom[atom1[i]])
            ccmaConstraints.push_back(i);

    // Record the connections between constraints.

    int numCCMA = (int) ccmaConstraints.size();
    if (numCCMA > 0) {
305
306
307
        // Record information needed by ReferenceCCMAAlgorithm.
        
        vector<pair<int, int> > refIndices(numCCMA);
peastman's avatar
peastman committed
308
        vector<double> refDistance(numCCMA);
309
        for (int i = 0; i < numCCMA; i++) {
310
311
312
            int index = ccmaConstraints[i];
            refIndices[i] = make_pair(atom1[index], atom2[index]);
            refDistance[i] = distance[index];
313
        }
peastman's avatar
peastman committed
314
        vector<double> refMasses(numAtoms);
315
        for (int i = 0; i < numAtoms; ++i)
peastman's avatar
peastman committed
316
            refMasses[i] = system.getParticleMass(i);
317
318
319
320
321
322
323
324
325
326
327

        // Look up angles for CCMA.
        
        vector<ReferenceCCMAAlgorithm::AngleInfo> angles;
        for (int i = 0; i < system.getNumForces(); i++) {
            const HarmonicAngleForce* force = dynamic_cast<const HarmonicAngleForce*>(&system.getForce(i));
            if (force != NULL) {
                for (int j = 0; j < force->getNumAngles(); j++) {
                    int atom1, atom2, atom3;
                    double angle, k;
                    force->getAngleParameters(j, atom1, atom2, atom3, angle, k);
peastman's avatar
peastman committed
328
                    angles.push_back(ReferenceCCMAAlgorithm::AngleInfo(atom1, atom2, atom3, angle));
329
330
331
                }
            }
        }
332
333
334
335
336
        
        // Create a ReferenceCCMAAlgorithm.  It will build and invert the constraint matrix for us.
        
        ReferenceCCMAAlgorithm ccma(numAtoms, numCCMA, refIndices, refDistance, refMasses, angles, 0.1);
        vector<vector<pair<int, double> > > matrix = ccma.getMatrix();
337
338
339
340
341
        int maxRowElements = 0;
        for (unsigned i = 0; i < matrix.size(); i++)
            maxRowElements = max(maxRowElements, (int) matrix[i].size());
        maxRowElements++;

342
343
344
345
346
347
348
349
350
351
352
        // Build the list of constraints for each atom.

        vector<vector<int> > atomConstraints(context.getNumAtoms());
        for (int i = 0; i < numCCMA; i++) {
            atomConstraints[atom1[ccmaConstraints[i]]].push_back(i);
            atomConstraints[atom2[ccmaConstraints[i]]].push_back(i);
        }
        int maxAtomConstraints = 0;
        for (unsigned i = 0; i < atomConstraints.size(); i++)
            maxAtomConstraints = max(maxAtomConstraints, (int) atomConstraints[i].size());

353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
        // Sort the constraints.

        vector<int> constraintOrder(numCCMA);
        for (int i = 0; i < numCCMA; ++i)
            constraintOrder[i] = i;
        sort(constraintOrder.begin(), constraintOrder.end(), ConstraintOrderer(atom1, atom2, ccmaConstraints));
        vector<int> inverseOrder(numCCMA);
        for (int i = 0; i < numCCMA; ++i)
            inverseOrder[constraintOrder[i]] = i;
        for (int i = 0; i < (int)matrix.size(); ++i)
            for (int j = 0; j < (int)matrix[i].size(); ++j)
                matrix[i][j].first = inverseOrder[matrix[i][j].first];

        // Record the CCMA data structures.

Peter Eastman's avatar
Peter Eastman committed
368
369
370
371
372
        ccmaAtoms.initialize<int2>(context, numCCMA, "CcmaAtoms");
        ccmaAtomConstraints.initialize<int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints");
        ccmaNumAtomConstraints.initialize<int>(context, numAtoms, "CcmaAtomConstraintsIndex");
        ccmaConstraintMatrixColumn.initialize<int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
        ccmaConverged.initialize<int>(context, 2, "ccmaConverged");
Peter Eastman's avatar
Peter Eastman committed
373
374
        CHECK_RESULT2(cuMemHostAlloc((void**) &ccmaConvergedMemory, sizeof(int), CU_MEMHOSTALLOC_DEVICEMAP), "Error allocating pinned memory");
        CHECK_RESULT2(cuMemHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory");
Peter Eastman's avatar
Peter Eastman committed
375
376
377
378
        vector<int2> atomsVec(ccmaAtoms.getSize());
        vector<int> atomConstraintsVec(ccmaAtomConstraints.getSize());
        vector<int> numAtomConstraintsVec(ccmaNumAtomConstraints.getSize());
        vector<int> constraintMatrixColumnVec(ccmaConstraintMatrixColumn.getSize());
Peter Eastman's avatar
Peter Eastman committed
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
        int elementSize = (context.getUseDoublePrecision() || context.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
        ccmaDistance.initialize(context, numCCMA, 4*elementSize, "CcmaDistance");
        ccmaDelta1.initialize(context, numCCMA, elementSize, "CcmaDelta1");
        ccmaDelta2.initialize(context, numCCMA, elementSize, "CcmaDelta2");
        ccmaReducedMass.initialize(context, numCCMA, elementSize, "CcmaReducedMass");
        ccmaConstraintMatrixValue.initialize(context, numCCMA*maxRowElements, elementSize, "ConstraintMatrixValue");
        vector<double4> distanceVec(ccmaDistance.getSize());
        vector<double> reducedMassVec(ccmaReducedMass.getSize());
        vector<double> constraintMatrixValueVec(ccmaConstraintMatrixValue.getSize());
        for (int i = 0; i < numCCMA; i++) {
            int index = constraintOrder[i];
            int c = ccmaConstraints[index];
            atomsVec[i].x = atom1[c];
            atomsVec[i].y = atom2[c];
            distanceVec[i].w = distance[c];
            reducedMassVec[i] = (0.5/(1.0/system.getParticleMass(atom1[c])+1.0/system.getParticleMass(atom2[c])));
            for (unsigned int j = 0; j < matrix[index].size(); j++) {
                constraintMatrixColumnVec[i+j*numCCMA] = matrix[index][j].first;
                constraintMatrixValueVec[i+j*numCCMA] = matrix[index][j].second;
398
            }
Peter Eastman's avatar
Peter Eastman committed
399
            constraintMatrixColumnVec[i+matrix[index].size()*numCCMA] = numCCMA;
400
        }
Peter Eastman's avatar
Peter Eastman committed
401
402
403
        ccmaDistance.upload(distanceVec, true);
        ccmaReducedMass.upload(reducedMassVec, true);
        ccmaConstraintMatrixValue.upload(constraintMatrixValueVec, true);
404
405
406
407
408
409
410
        for (unsigned int i = 0; i < atomConstraints.size(); i++) {
            numAtomConstraintsVec[i] = atomConstraints[i].size();
            for (unsigned int j = 0; j < atomConstraints[i].size(); j++) {
                bool forward = (atom1[ccmaConstraints[atomConstraints[i][j]]] == i);
                atomConstraintsVec[i+j*numAtoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1);
            }
        }
Peter Eastman's avatar
Peter Eastman committed
411
412
413
414
        ccmaAtoms.upload(atomsVec);
        ccmaAtomConstraints.upload(atomConstraintsVec);
        ccmaNumAtomConstraints.upload(numAtomConstraintsVec);
        ccmaConstraintMatrixColumn.upload(constraintMatrixColumnVec);
415
416
417
418
419
    }
    
    // Build the list of virtual sites.
    
    vector<int4> vsite2AvgAtomVec;
420
    vector<double2> vsite2AvgWeightVec;
421
    vector<int4> vsite3AvgAtomVec;
422
    vector<double4> vsite3AvgWeightVec;
423
    vector<int4> vsiteOutOfPlaneAtomVec;
424
    vector<double4> vsiteOutOfPlaneWeightVec;
425
426
427
428
429
    vector<int> vsiteLocalCoordsIndexVec;
    vector<int> vsiteLocalCoordsAtomVec;
    vector<int> vsiteLocalCoordsStartVec;
    vector<double> vsiteLocalCoordsWeightVec;
    vector<double4> vsiteLocalCoordsPosVec;
430
431
432
433
434
435
436
    for (int i = 0; i < numAtoms; i++) {
        if (system.isVirtualSite(i)) {
            if (dynamic_cast<const TwoParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) {
                // A two particle average.
                
                const TwoParticleAverageSite& site = dynamic_cast<const TwoParticleAverageSite&>(system.getVirtualSite(i));
                vsite2AvgAtomVec.push_back(make_int4(i, site.getParticle(0), site.getParticle(1), 0));
437
                vsite2AvgWeightVec.push_back(make_double2(site.getWeight(0), site.getWeight(1)));
438
439
440
441
442
443
            }
            else if (dynamic_cast<const ThreeParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) {
                // A three particle average.
                
                const ThreeParticleAverageSite& site = dynamic_cast<const ThreeParticleAverageSite&>(system.getVirtualSite(i));
                vsite3AvgAtomVec.push_back(make_int4(i, site.getParticle(0), site.getParticle(1), site.getParticle(2)));
444
                vsite3AvgWeightVec.push_back(make_double4(site.getWeight(0), site.getWeight(1), site.getWeight(2), 0.0));
445
446
447
448
449
450
            }
            else if (dynamic_cast<const OutOfPlaneSite*>(&system.getVirtualSite(i)) != NULL) {
                // An out of plane site.
                
                const OutOfPlaneSite& site = dynamic_cast<const OutOfPlaneSite&>(system.getVirtualSite(i));
                vsiteOutOfPlaneAtomVec.push_back(make_int4(i, site.getParticle(0), site.getParticle(1), site.getParticle(2)));
451
                vsiteOutOfPlaneWeightVec.push_back(make_double4(site.getWeight12(), site.getWeight13(), site.getWeightCross(), 0.0));
452
            }
453
            else if (dynamic_cast<const LocalCoordinatesSite*>(&system.getVirtualSite(i)) != NULL) {
454
                // A local coordinates site.
455
456
                
                const LocalCoordinatesSite& site = dynamic_cast<const LocalCoordinatesSite&>(system.getVirtualSite(i));
457
458
459
460
461
462
463
464
465
466
467
468
469
                int numParticles = site.getNumParticles();
                vector<double> origin, x, y;
                site.getOriginWeights(origin);
                site.getXWeights(x);
                site.getYWeights(y);
                vsiteLocalCoordsIndexVec.push_back(i);
                vsiteLocalCoordsStartVec.push_back(vsiteLocalCoordsAtomVec.size());
                for (int j = 0; j < numParticles; j++) {
                    vsiteLocalCoordsAtomVec.push_back(site.getParticle(j));
                    vsiteLocalCoordsWeightVec.push_back(origin[j]);
                    vsiteLocalCoordsWeightVec.push_back(x[j]);
                    vsiteLocalCoordsWeightVec.push_back(y[j]);
                }
470
                Vec3 pos = site.getLocalPosition();
471
                vsiteLocalCoordsPosVec.push_back(make_double4(pos[0], pos[1], pos[2], 0.0));
472
            }
473
474
        }
    }
475
    vsiteLocalCoordsStartVec.push_back(vsiteLocalCoordsAtomVec.size());
476
477
478
    int num2Avg = vsite2AvgAtomVec.size();
    int num3Avg = vsite3AvgAtomVec.size();
    int numOutOfPlane = vsiteOutOfPlaneAtomVec.size();
479
    int numLocalCoords = vsiteLocalCoordsPosVec.size();
Peter Eastman's avatar
Peter Eastman committed
480
481
482
483
484
485
    vsite2AvgAtoms.initialize<int4>(context, max(1, num2Avg), "vsite2AvgAtoms");
    vsite3AvgAtoms.initialize<int4>(context, max(1, num3Avg), "vsite3AvgAtoms");
    vsiteOutOfPlaneAtoms.initialize<int4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms");
    vsiteLocalCoordsIndex.initialize<int>(context, max(1, (int) vsiteLocalCoordsIndexVec.size()), "vsiteLocalCoordsIndex");
    vsiteLocalCoordsAtoms.initialize<int>(context, max(1, (int) vsiteLocalCoordsAtomVec.size()), "vsiteLocalCoordsAtoms");
    vsiteLocalCoordsStartIndex.initialize<int>(context, max(1, (int) vsiteLocalCoordsStartVec.size()), "vsiteLocalCoordsStartIndex");
486
    if (num2Avg > 0)
Peter Eastman's avatar
Peter Eastman committed
487
        vsite2AvgAtoms.upload(vsite2AvgAtomVec);
488
    if (num3Avg > 0)
Peter Eastman's avatar
Peter Eastman committed
489
        vsite3AvgAtoms.upload(vsite3AvgAtomVec);
490
    if (numOutOfPlane > 0)
Peter Eastman's avatar
Peter Eastman committed
491
        vsiteOutOfPlaneAtoms.upload(vsiteOutOfPlaneAtomVec);
492
    if (numLocalCoords > 0) {
Peter Eastman's avatar
Peter Eastman committed
493
494
495
        vsiteLocalCoordsIndex.upload(vsiteLocalCoordsIndexVec);
        vsiteLocalCoordsAtoms.upload(vsiteLocalCoordsAtomVec);
        vsiteLocalCoordsStartIndex.upload(vsiteLocalCoordsStartVec);
496
    }
Peter Eastman's avatar
Peter Eastman committed
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
    int elementSize = (context.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
    vsite2AvgWeights.initialize(context, max(1, num2Avg), 2*elementSize, "vsite2AvgWeights");
    vsite3AvgWeights.initialize(context, max(1, num3Avg), 4*elementSize, "vsite3AvgWeights");
    vsiteOutOfPlaneWeights.initialize(context, max(1, numOutOfPlane), 4*elementSize, "vsiteOutOfPlaneWeights");
    vsiteLocalCoordsWeights.initialize(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), elementSize, "vsiteLocalCoordsWeights");
    vsiteLocalCoordsPos.initialize(context, max(1, (int) vsiteLocalCoordsPosVec.size()), 4*elementSize, "vsiteLocalCoordsPos");
    if (num2Avg > 0)
        vsite2AvgWeights.upload(vsite2AvgWeightVec, true);
    if (num3Avg > 0)
        vsite3AvgWeights.upload(vsite3AvgWeightVec, true);
    if (numOutOfPlane > 0)
        vsiteOutOfPlaneWeights.upload(vsiteOutOfPlaneWeightVec, true);
    if (numLocalCoords > 0) {
        vsiteLocalCoordsWeights.upload(vsiteLocalCoordsWeightVec, true);
        vsiteLocalCoordsPos.upload(vsiteLocalCoordsPosVec, true);
512
513
    }

514
    // Create the kernels used by this class.
515
516

    map<string, string> defines;
517
518
    defines["NUM_CCMA_CONSTRAINTS"] = context.intToString(numCCMA);
    defines["NUM_ATOMS"] = context.intToString(numAtoms);
519
520
521
    defines["NUM_2_AVERAGE"] = context.intToString(num2Avg);
    defines["NUM_3_AVERAGE"] = context.intToString(num3Avg);
    defines["NUM_OUT_OF_PLANE"] = context.intToString(numOutOfPlane);
522
    defines["NUM_LOCAL_COORDS"] = context.intToString(numLocalCoords);
523
    defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
524
525
526
527
528
529
530
531
532
533
534
535
536
    CUmodule module = context.createModule(CudaKernelSources::vectorOps+CudaKernelSources::integrationUtilities, defines);
    settlePosKernel = context.getKernel(module, "applySettleToPositions");
    settleVelKernel = context.getKernel(module, "applySettleToVelocities");
    shakePosKernel = context.getKernel(module, "applyShakeToPositions");
    shakeVelKernel = context.getKernel(module, "applyShakeToVelocities");
    ccmaDirectionsKernel = context.getKernel(module, "computeCCMAConstraintDirections");
    ccmaPosForceKernel = context.getKernel(module, "computeCCMAPositionConstraintForce");
    ccmaVelForceKernel = context.getKernel(module, "computeCCMAVelocityConstraintForce");
    ccmaMultiplyKernel = context.getKernel(module, "multiplyByCCMAConstraintMatrix");
    ccmaUpdateKernel = context.getKernel(module, "updateCCMAAtomPositions");
    CHECK_RESULT2(cuEventCreate(&ccmaEvent, CU_EVENT_DISABLE_TIMING), "Error creating event for CCMA");
    vsitePositionKernel = context.getKernel(module, "computeVirtualSites");
    vsiteForceKernel = context.getKernel(module, "distributeVirtualSiteForces");
537
    numVsites = num2Avg+num3Avg+numOutOfPlane+numLocalCoords;
538
    randomKernel = context.getKernel(module, "generateRandomNumbers");
539
    timeShiftKernel = context.getKernel(module, "timeShiftVelocities");
540
541
542
}

CudaIntegrationUtilities::~CudaIntegrationUtilities() {
543
    context.setAsCurrent();
Peter Eastman's avatar
Peter Eastman committed
544
545
    if (ccmaConvergedMemory != NULL)
        cuMemFreeHost(ccmaConvergedMemory);
546
547
}

548
549
550
551
void CudaIntegrationUtilities::setNextStepSize(double size) {
    if (size != lastStepSize.x || size != lastStepSize.y) {
        lastStepSize = make_double2(size, size);
        if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
Peter Eastman's avatar
Peter Eastman committed
552
            stepSize.upload(&lastStepSize);
553
554
        else {
            float2 lastStepSizeFloat = make_float2((float) size, (float) size);
Peter Eastman's avatar
Peter Eastman committed
555
            stepSize.upload(&lastStepSizeFloat);
556
557
558
559
560
561
        }
    }
}

double CudaIntegrationUtilities::getLastStepSize() {
    if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
Peter Eastman's avatar
Peter Eastman committed
562
        stepSize.download(&lastStepSize);
563
564
    else {
        float2 lastStepSizeFloat;
Peter Eastman's avatar
Peter Eastman committed
565
        stepSize.download(&lastStepSizeFloat);
566
567
568
569
570
        lastStepSize = make_double2(lastStepSizeFloat.x, lastStepSizeFloat.y);
    }
    return lastStepSize.y;
}

571
572
573
574
575
576
577
578
579
void CudaIntegrationUtilities::applyConstraints(double tol) {
    applyConstraints(false, tol);
}

void CudaIntegrationUtilities::applyVelocityConstraints(double tol) {
    applyConstraints(true, tol);
}

void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double tol) {
580
    CUfunction settleKernel, shakeKernel, ccmaForceKernel;
581
582
583
584
585
586
587
588
589
590
591
    if (constrainVelocities) {
        settleKernel = settleVelKernel;
        shakeKernel = shakeVelKernel;
        ccmaForceKernel = ccmaVelForceKernel;
    }
    else {
        settleKernel = settlePosKernel;
        shakeKernel = shakePosKernel;
        ccmaForceKernel = ccmaPosForceKernel;
    }
    float floatTol = (float) tol;
592
593
    void* tolPointer = (context.getUseDoublePrecision() || context.getUseMixedPrecision() ? (void*) &tol : (void*) &floatTol);
    CUdeviceptr posCorrection = (context.getUseMixedPrecision() ? context.getPosqCorrection().getDevicePointer() : 0);
Peter Eastman's avatar
Peter Eastman committed
594
595
    if (settleAtoms.isInitialized()) {
        int numClusters = settleAtoms.getSize();
596
        void* args[] = {&numClusters, tolPointer, &context.getPosq().getDevicePointer(), &posCorrection,
Peter Eastman's avatar
Peter Eastman committed
597
598
599
                &posDelta.getDevicePointer(), &context.getVelm().getDevicePointer(),
                &settleAtoms.getDevicePointer(), &settleParams.getDevicePointer()};
        context.executeKernel(settleKernel, args, settleAtoms.getSize());
600
    }
Peter Eastman's avatar
Peter Eastman committed
601
602
    if (shakeAtoms.isInitialized()) {
        int numClusters = shakeAtoms.getSize();
603
        void* args[] = {&numClusters, tolPointer, &context.getPosq().getDevicePointer(), &posCorrection,
Peter Eastman's avatar
Peter Eastman committed
604
605
606
                constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta.getDevicePointer(),
                &shakeAtoms.getDevicePointer(), &shakeParams.getDevicePointer()};
        context.executeKernel(shakeKernel, args, shakeAtoms.getSize());
607
    }
Peter Eastman's avatar
Peter Eastman committed
608
609
610
    if (ccmaAtoms.isInitialized()) {
        void* directionsArgs[] = {&ccmaAtoms.getDevicePointer(), &ccmaDistance.getDevicePointer(), &context.getPosq().getDevicePointer(), &posCorrection, &ccmaConverged.getDevicePointer()};
        context.executeKernel(ccmaDirectionsKernel, directionsArgs, ccmaAtoms.getSize());
611
        int i;
Peter Eastman's avatar
Peter Eastman committed
612
613
614
        void* forceArgs[] = {&ccmaAtoms.getDevicePointer(), &ccmaDistance.getDevicePointer(),
                constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta.getDevicePointer(),
                &ccmaReducedMass.getDevicePointer(), &ccmaDelta1.getDevicePointer(), &ccmaConverged.getDevicePointer(),
Peter Eastman's avatar
Peter Eastman committed
615
                &ccmaConvergedDeviceMemory, tolPointer, &i};
Peter Eastman's avatar
Peter Eastman committed
616
617
618
619
620
621
        void* multiplyArgs[] = {&ccmaDelta1.getDevicePointer(), &ccmaDelta2.getDevicePointer(),
                &ccmaConstraintMatrixColumn.getDevicePointer(), &ccmaConstraintMatrixValue.getDevicePointer(), &ccmaConverged.getDevicePointer(), &i};
        void* updateArgs[] = {&ccmaNumAtomConstraints.getDevicePointer(), &ccmaAtomConstraints.getDevicePointer(), &ccmaDistance.getDevicePointer(),
                constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta.getDevicePointer(),
                &context.getVelm().getDevicePointer(), &ccmaDelta1.getDevicePointer(), &ccmaDelta2.getDevicePointer(),
                &ccmaConverged.getDevicePointer(), &i};
622
        const int checkInterval = 4;
Peter Eastman's avatar
Peter Eastman committed
623
        ccmaConvergedMemory[0] = 0;
624
        for (i = 0; i < 150; i++) {
Peter Eastman's avatar
Peter Eastman committed
625
            context.executeKernel(ccmaForceKernel, forceArgs, ccmaAtoms.getSize());
Peter Eastman's avatar
Peter Eastman committed
626
            if ((i+1)%checkInterval == 0)
627
                CHECK_RESULT2(cuEventRecord(ccmaEvent, 0), "Error recording event for CCMA");
Peter Eastman's avatar
Peter Eastman committed
628
            context.executeKernel(ccmaMultiplyKernel, multiplyArgs, ccmaAtoms.getSize());
629
630
631
            context.executeKernel(ccmaUpdateKernel, updateArgs, context.getNumAtoms());
            if ((i+1)%checkInterval == 0) {
                CHECK_RESULT2(cuEventSynchronize(ccmaEvent), "Error synchronizing on event for CCMA");
Peter Eastman's avatar
Peter Eastman committed
632
                if (ccmaConvergedMemory[0])
633
634
635
636
                    break;
            }
        }
    }
637
638
639
}

void CudaIntegrationUtilities::computeVirtualSites() {
640
    if (numVsites > 0) {
641
        CUdeviceptr posCorrection = (context.getUseMixedPrecision() ? context.getPosqCorrection().getDevicePointer() : 0);
Peter Eastman's avatar
Peter Eastman committed
642
643
644
645
646
647
        void* args[] = {&context.getPosq().getDevicePointer(), &posCorrection, &vsite2AvgAtoms.getDevicePointer(), &vsite2AvgWeights.getDevicePointer(),
                &vsite3AvgAtoms.getDevicePointer(), &vsite3AvgWeights.getDevicePointer(),
                &vsiteOutOfPlaneAtoms.getDevicePointer(), &vsiteOutOfPlaneWeights.getDevicePointer(),
                &vsiteLocalCoordsIndex.getDevicePointer(), &vsiteLocalCoordsAtoms.getDevicePointer(),
                &vsiteLocalCoordsWeights.getDevicePointer(), &vsiteLocalCoordsPos.getDevicePointer(),
                &vsiteLocalCoordsStartIndex.getDevicePointer()};
648
649
        context.executeKernel(vsitePositionKernel, args, numVsites);
    }
650
651
652
}

void CudaIntegrationUtilities::distributeForcesFromVirtualSites() {
653
    if (numVsites > 0) {
654
655
        CUdeviceptr posCorrection = (context.getUseMixedPrecision() ? context.getPosqCorrection().getDevicePointer() : 0);
        void* args[] = {&context.getPosq().getDevicePointer(), &posCorrection, &context.getForce().getDevicePointer(),
Peter Eastman's avatar
Peter Eastman committed
656
657
658
659
660
661
                &vsite2AvgAtoms.getDevicePointer(), &vsite2AvgWeights.getDevicePointer(),
                &vsite3AvgAtoms.getDevicePointer(), &vsite3AvgWeights.getDevicePointer(),
                &vsiteOutOfPlaneAtoms.getDevicePointer(), &vsiteOutOfPlaneWeights.getDevicePointer(),
                &vsiteLocalCoordsIndex.getDevicePointer(), &vsiteLocalCoordsAtoms.getDevicePointer(),
                &vsiteLocalCoordsWeights.getDevicePointer(), &vsiteLocalCoordsPos.getDevicePointer(),
                &vsiteLocalCoordsStartIndex.getDevicePointer()};
662
663
        context.executeKernel(vsiteForceKernel, args, numVsites);
    }
664
}
665
666

void CudaIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumberSeed) {
Peter Eastman's avatar
Peter Eastman committed
667
    if (random.isInitialized()) {
668
669
670
671
672
673
674
675
        if (randomNumberSeed != lastSeed)
           throw OpenMMException("CudaIntegrationUtilities::initRandomNumberGenerator(): Requested two different values for the random number seed");
        return;
    }

    // Create the random number arrays.

    lastSeed = randomNumberSeed;
Peter Eastman's avatar
Peter Eastman committed
676
677
678
    random.initialize<float4>(context, 4*context.getPaddedNumAtoms(), "random");
    randomSeed.initialize<int4>(context, context.getNumThreadBlocks()*CudaContext::ThreadBlockSize, "randomSeed");
    randomPos = random.getSize();
679
680
681

    // Use a quick and dirty RNG to pick seeds for the real random number generator.

Peter Eastman's avatar
Peter Eastman committed
682
    vector<int4> seed(randomSeed.getSize());
683
    unsigned int r = randomNumberSeed;
684
    if (r == 0) r = (unsigned int) osrngseed();
Peter Eastman's avatar
Peter Eastman committed
685
    for (int i = 0; i < randomSeed.getSize(); i++) {
686
687
688
689
690
        seed[i].x = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
        seed[i].y = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
        seed[i].z = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
        seed[i].w = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
    }
Peter Eastman's avatar
Peter Eastman committed
691
    randomSeed.upload(seed);
692
693
694
}

int CudaIntegrationUtilities::prepareRandomNumbers(int numValues) {
Peter Eastman's avatar
Peter Eastman committed
695
    if (randomPos+numValues <= random.getSize()) {
696
697
698
699
        int oldPos = randomPos;
        randomPos += numValues;
        return oldPos;
    }
Peter Eastman's avatar
Peter Eastman committed
700
701
702
703
704
    if (numValues > random.getSize())
        random.resize(numValues);
    int size = random.getSize();
    void* args[] = {&size, &random.getDevicePointer(), &randomSeed.getDevicePointer()};
    context.executeKernel(randomKernel, args, random.getSize());
705
706
707
708
709
    randomPos = numValues;
    return 0;
}

void CudaIntegrationUtilities::createCheckpoint(ostream& stream) {
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
    size_t numChains = noseHooverChainState.size();
    bool useDouble = context.getUseDoublePrecision() || context.getUseMixedPrecision();
    stream.write((char*) &numChains, sizeof(size_t));
    for (auto &chainState: noseHooverChainState){
        int chainID = chainState.first;
        size_t chainLength = chainState.second.getSize();
        stream.write((char*) &chainID, sizeof(int));
        stream.write((char*) &chainLength, sizeof(size_t));
        if (useDouble) {
            vector<double2> stateVec;
            chainState.second.download(stateVec);
            stream.write((char*) stateVec.data(), sizeof(double2)*chainLength);
        } else {
            vector<float2> stateVec;
            chainState.second.download(stateVec);
            stream.write((char*) stateVec.data(), sizeof(float2)*chainLength);
        }
    }
Peter Eastman's avatar
Peter Eastman committed
728
    if (!random.isInitialized()) 
Yutong Zhao's avatar
Yutong Zhao committed
729
        return;
730
731
    stream.write((char*) &randomPos, sizeof(int));
    vector<float4> randomVec;
Peter Eastman's avatar
Peter Eastman committed
732
733
    random.download(randomVec);
    stream.write((char*) &randomVec[0], sizeof(float4)*random.getSize());
734
    vector<int4> randomSeedVec;
Peter Eastman's avatar
Peter Eastman committed
735
736
    randomSeed.download(randomSeedVec);
    stream.write((char*) &randomSeedVec[0], sizeof(int4)*randomSeed.getSize());
737
738
739
}

void CudaIntegrationUtilities::loadCheckpoint(istream& stream) {
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
    size_t numChains, chainLength;
    bool useDouble = context.getUseDoublePrecision() || context.getUseMixedPrecision();
    stream.read((char*) &numChains, sizeof(size_t));
    noseHooverChainState.clear();
    for (size_t i=0; i<numChains; i++){
        int chainID;
        stream.read((char*) &chainID, sizeof(int));
        stream.read((char*) &chainLength, sizeof(size_t));
        if (useDouble) {
            noseHooverChainState[chainID] = CudaArray();
            noseHooverChainState[chainID].initialize<double2>(context, chainLength, "chainState" + std::to_string(chainID));
            std::vector<double2> stateVec(chainLength);
            stream.read((char*) &stateVec[0], sizeof(double2)*chainLength);
            noseHooverChainState[chainID].upload(stateVec);
        } else {
            noseHooverChainState[chainID] = CudaArray();
            noseHooverChainState[chainID].initialize<float2>(context, chainLength, "chainState" + std::to_string(chainID));
            std::vector<float2> stateVec(chainLength);
            stream.read((char*) &stateVec[0], sizeof(float2)*chainLength);
            noseHooverChainState[chainID].upload(stateVec);
        }
    }
Peter Eastman's avatar
Peter Eastman committed
762
    if (!random.isInitialized()) 
Yutong Zhao's avatar
Yutong Zhao committed
763
        return;
764
    stream.read((char*) &randomPos, sizeof(int));
Peter Eastman's avatar
Peter Eastman committed
765
766
767
768
769
770
    vector<float4> randomVec(random.getSize());
    stream.read((char*) &randomVec[0], sizeof(float4)*random.getSize());
    random.upload(randomVec);
    vector<int4> randomSeedVec(randomSeed.getSize());
    stream.read((char*) &randomSeedVec[0], sizeof(int4)*randomSeed.getSize());
    randomSeed.upload(randomSeedVec);
771
}
772
773
774

double CudaIntegrationUtilities::computeKineticEnergy(double timeShift) {
    int numParticles = context.getNumAtoms();
775
776
777
778
779
780
    if (timeShift != 0) {
        float timeShiftFloat = (float) timeShift;
        void* timeShiftPtr = (context.getUseDoublePrecision() ? (void*) &timeShift : (void*) &timeShiftFloat);

        // Copy the velocities into the posDelta array while we temporarily modify them.

Peter Eastman's avatar
Peter Eastman committed
781
        context.getVelm().copyTo(posDelta);
782
783
784
785
786
787
788
789
790
791

        // Apply the time shift.

        void* args[] = {&context.getVelm().getDevicePointer(), &context.getForce().getDevicePointer(), timeShiftPtr};
        context.executeKernel(timeShiftKernel, args, numParticles);
        applyConstraints(true, 1e-4);
    }
    
    // Compute the kinetic energy.
    
792
793
794
795
796
797
    double energy = 0.0;
    if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) {
        vector<double4> velm;
        context.getVelm().download(velm);
        for (int i = 0; i < numParticles; i++) {
            double4 v = velm[i];
798
            if (v.w != 0)
799
800
801
802
803
804
805
                energy += (v.x*v.x+v.y*v.y+v.z*v.z)/v.w;
        }
    }
    else {
        vector<float4> velm;
        context.getVelm().download(velm);
        for (int i = 0; i < numParticles; i++) {
806
807
            float4 v = velm[i];
            if (v.w != 0)
808
809
810
                energy += (v.x*v.x+v.y*v.y+v.z*v.z)/v.w;
        }
    }
811
812
813
    // Restore the velocities.
    
    if (timeShift != 0)
Peter Eastman's avatar
Peter Eastman committed
814
        posDelta.copyTo(context.getVelm());
815
816
    return 0.5*energy;
}
817

818
std::map<int, CudaArray>& CudaIntegrationUtilities::getNoseHooverChainState(){
819
820
        return noseHooverChainState;
};