CudaKernels.cpp 33.2 KB
Newer Older
1
2
3
4
5
6
7
8
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
9
 * Portions copyright (c) 2008-2009 Stanford University and the Authors.      *
10
11
12
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
13
14
15
16
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
17
 *                                                                            *
18
19
20
21
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
22
 *                                                                            *
23
24
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
25
26
27
 * -------------------------------------------------------------------------- */

#include "CudaKernels.h"
28
#include "openmm/LangevinIntegrator.h"
29
30
#include "openmm/Context.h"
#include "openmm/internal/ContextImpl.h"
31
#include "kernels/gputypes.h"
32
#include "kernels/cudaKernels.h"
33
34
35
36
37
38
39
#include <cmath>

extern "C" int gpuSetConstants( gpuContext gpu );

using namespace OpenMM;
using namespace std;

40
41
42
43
void CudaCalcForcesAndEnergyKernel::initialize(const System& system) {
}

void CudaCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context) {
44
    _gpuContext* gpu = data.gpu;
45
    if (data.nonbondedMethod != NO_CUTOFF && data.computeForceCount%100 == 0)
46
        gpuReorderAtoms(gpu);
47
    data.computeForceCount++;
48
    kClearForces(gpu);
49
50
51
52
}

void CudaCalcForcesAndEnergyKernel::finishForceComputation(ContextImpl& context) {
    _gpuContext* gpu = data.gpu;
53
    if (gpu->bIncludeGBSA) {
54
        gpu->bRecalculateBornRadii = true;
55
56
57
58
        kCalculateCDLJObcGbsaForces1(gpu);
        kReduceObcGbsaBornForces(gpu);
        kCalculateObcGbsaForces2(gpu);
    }
59
    else if (data.hasNonbonded)
60
        kCalculateCDLJForces(gpu);
61
62
    if (data.hasCustomNonbonded)
        kCalculateCustomNonbondedForces(gpu, data.hasNonbonded);
63
    kCalculateLocalForces(gpu);
64
    kReduceForces(gpu);
65
66
}

67
void CudaCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& context) {
68
    _gpuContext* gpu = data.gpu;
69
70
71
72
    if (data.nonbondedMethod != NO_CUTOFF && data.stepCount%100 == 0)
        gpuReorderAtoms(gpu);
    data.stepCount++;
    kClearEnergy(gpu);
73
74
75
76
}

double CudaCalcForcesAndEnergyKernel::finishEnergyComputation(ContextImpl& context) {
    _gpuContext* gpu = data.gpu;
77
78
79
80
81
    if (gpu->bIncludeGBSA) {
        gpu->bRecalculateBornRadii = true;
        kCalculateCDLJObcGbsaForces1(gpu);
        kReduceObcGbsaBornForces(gpu);
        kCalculateObcGbsaForces2(gpu);
82
    }
83
84
85
86
87
88
89
90
    else if (data.hasNonbonded)
        kCalculateCDLJForces(gpu);
    if (data.hasCustomNonbonded)
        kCalculateCustomNonbondedForces(gpu, data.hasNonbonded);
    kCalculateLocalForces(gpu);
    if (gpu->bIncludeGBSA)
        kReduceBornSumAndForces(gpu);
    return kReduceEnergy(gpu)+data.ewaldSelfEnergy;
91
92
}

93
void CudaUpdateStateDataKernel::initialize(const System& system) {
94
95
}

96
double CudaUpdateStateDataKernel::getTime(const ContextImpl& context) const {
97
98
99
    return data.time;
}

100
void CudaUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
101
102
103
    data.time = time;
}

104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, std::vector<Vec3>& positions) {
    _gpuContext* gpu = data.gpu;
    gpu->psPosq4->Download();
    int* order = gpu->psAtomIndex->_pSysData;
    int numParticles = context.getSystem().getNumParticles();
    positions.resize(numParticles);
    for (int i = 0; i < numParticles; ++i) {
        float4 pos = (*gpu->psPosq4)[i];
        int3 offset = gpu->posCellOffsets[i];
        positions[order[i]] = Vec3(pos.x-offset.x*gpu->sim.periodicBoxSizeX, pos.y-offset.y*gpu->sim.periodicBoxSizeY, pos.z-offset.z*gpu->sim.periodicBoxSizeZ);
    }
}

void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const std::vector<Vec3>& positions) {
    _gpuContext* gpu = data.gpu;
    int* order = gpu->psAtomIndex->_pSysData;
    int numParticles = context.getSystem().getNumParticles();
    for (int i = 0; i < numParticles; ++i) {
        float4& pos = (*gpu->psPosq4)[i];
        const Vec3& p = positions[order[i]];
        pos.x = p[0];
        pos.y = p[1];
        pos.z = p[2];
    }
    gpu->psPosq4->Upload();
    for (int i = 0; i < gpu->posCellOffsets.size(); i++)
        gpu->posCellOffsets[i] = make_int3(0, 0, 0);
}

void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, std::vector<Vec3>& velocities) {
    _gpuContext* gpu = data.gpu;
    gpu->psVelm4->Download();
    int* order = gpu->psAtomIndex->_pSysData;
    int numParticles = context.getSystem().getNumParticles();
    velocities.resize(numParticles);
    for (int i = 0; i < numParticles; ++i) {
        float4 vel = (*gpu->psVelm4)[i];
        velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
    }
}

void CudaUpdateStateDataKernel::setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities) {
    _gpuContext* gpu = data.gpu;
    int* order = gpu->psAtomIndex->_pSysData;
    int numParticles = context.getSystem().getNumParticles();
    for (int i = 0; i < numParticles; ++i) {
        float4& vel = (*gpu->psVelm4)[i];
        const Vec3& v = velocities[order[i]];
        vel.x = v[0];
        vel.y = v[1];
        vel.z = v[2];
    }
    gpu->psVelm4->Upload();
}

void CudaUpdateStateDataKernel::getForces(ContextImpl& context, std::vector<Vec3>& forces) {
    _gpuContext* gpu = data.gpu;
    int* order = gpu->psAtomIndex->_pSysData;
    gpu->psForce4->Download();
    int numParticles = context.getSystem().getNumParticles();
    forces.resize(numParticles);
    for (int i = 0; i < numParticles; ++i) {
        float4 force = (*gpu->psForce4)[i];
        forces[order[i]] = Vec3(force.x, force.y, force.z);
    }
}

171
172
173
174
175
CudaCalcHarmonicBondForceKernel::~CudaCalcHarmonicBondForceKernel() {
}

void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
    data.hasBonds = true;
176
    numBonds = force.getNumBonds();
Peter Eastman's avatar
Peter Eastman committed
177
178
    vector<int> particle1(numBonds);
    vector<int> particle2(numBonds);
179
180
181
182
    vector<float> length(numBonds);
    vector<float> k(numBonds);
    for (int i = 0; i < numBonds; i++) {
        double lengthValue, kValue;
Peter Eastman's avatar
Peter Eastman committed
183
        force.getBondParameters(i, particle1[i], particle2[i], lengthValue, kValue);
184
185
186
        length[i] = (float) lengthValue;
        k[i] = (float) kValue;
    }
Peter Eastman's avatar
Peter Eastman committed
187
    gpuSetBondParameters(data.gpu, particle1, particle2, length, k);
188
189
}

190
void CudaCalcHarmonicBondForceKernel::executeForces(ContextImpl& context) {
191
192
}

193
double CudaCalcHarmonicBondForceKernel::executeEnergy(ContextImpl& context) {
194
195
196
197
198
199
200
201
    return 0.0;
}

CudaCalcHarmonicAngleForceKernel::~CudaCalcHarmonicAngleForceKernel() {
}

void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
    data.hasAngles = true;
202
    numAngles = force.getNumAngles();
203
    const float RadiansToDegrees = (float) (180.0/3.14159265);
Peter Eastman's avatar
Peter Eastman committed
204
205
206
    vector<int> particle1(numAngles);
    vector<int> particle2(numAngles);
    vector<int> particle3(numAngles);
207
208
209
210
    vector<float> angle(numAngles);
    vector<float> k(numAngles);
    for (int i = 0; i < numAngles; i++) {
        double angleValue, kValue;
Peter Eastman's avatar
Peter Eastman committed
211
        force.getAngleParameters(i, particle1[i], particle2[i], particle3[i], angleValue, kValue);
212
213
        angle[i] = (float) (angleValue*RadiansToDegrees);
        k[i] = (float) kValue;
214
    }
Peter Eastman's avatar
Peter Eastman committed
215
    gpuSetBondAngleParameters(data.gpu, particle1, particle2, particle3, angle, k);
216
}
217

218
void CudaCalcHarmonicAngleForceKernel::executeForces(ContextImpl& context) {
219
220
}

221
double CudaCalcHarmonicAngleForceKernel::executeEnergy(ContextImpl& context) {
222
223
224
225
226
227
228
229
230
    return 0.0;
}

CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() {
}

void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
    data.hasPeriodicTorsions = true;
    numTorsions = force.getNumTorsions();
231
    const float RadiansToDegrees = (float)(180.0/3.14159265);
Peter Eastman's avatar
Peter Eastman committed
232
233
234
235
    vector<int> particle1(numTorsions);
    vector<int> particle2(numTorsions);
    vector<int> particle3(numTorsions);
    vector<int> particle4(numTorsions);
236
237
238
239
240
    vector<float> k(numTorsions);
    vector<float> phase(numTorsions);
    vector<int> periodicity(numTorsions);
    for (int i = 0; i < numTorsions; i++) {
        double kValue, phaseValue;
Peter Eastman's avatar
Peter Eastman committed
241
        force.getTorsionParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], periodicity[i], phaseValue, kValue);
242
243
        k[i] = (float) kValue;
        phase[i] = (float) (phaseValue*RadiansToDegrees);
244
    }
Peter Eastman's avatar
Peter Eastman committed
245
    gpuSetDihedralParameters(data.gpu, particle1, particle2, particle3, particle4, k, phase, periodicity);
246
247
}

248
void CudaCalcPeriodicTorsionForceKernel::executeForces(ContextImpl& context) {
249
250
}

251
double CudaCalcPeriodicTorsionForceKernel::executeEnergy(ContextImpl& context) {
252
253
254
255
256
257
258
259
260
    return 0.0;
}

CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() {
}

void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
    data.hasRB = true;
    numTorsions = force.getNumTorsions();
Peter Eastman's avatar
Peter Eastman committed
261
262
263
264
    vector<int> particle1(numTorsions);
    vector<int> particle2(numTorsions);
    vector<int> particle3(numTorsions);
    vector<int> particle4(numTorsions);
265
266
267
268
269
270
271
272
    vector<float> c0(numTorsions);
    vector<float> c1(numTorsions);
    vector<float> c2(numTorsions);
    vector<float> c3(numTorsions);
    vector<float> c4(numTorsions);
    vector<float> c5(numTorsions);
    for (int i = 0; i < numTorsions; i++) {
        double c[6];
Peter Eastman's avatar
Peter Eastman committed
273
        force.getTorsionParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], c[0], c[1], c[2], c[3], c[4], c[5]);
274
275
276
277
278
279
        c0[i] = (float) c[0];
        c1[i] = (float) c[1];
        c2[i] = (float) c[2];
        c3[i] = (float) c[3];
        c4[i] = (float) c[4];
        c5[i] = (float) c[5];
280
    }
Peter Eastman's avatar
Peter Eastman committed
281
    gpuSetRbDihedralParameters(data.gpu, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
282
283
}

284
void CudaCalcRBTorsionForceKernel::executeForces(ContextImpl& context) {
285
286
}

287
double CudaCalcRBTorsionForceKernel::executeEnergy(ContextImpl& context) {
288
289
290
291
292
293
    return 0.0;
}

CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
}

294
void CudaCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
295
    data.hasNonbonded = true;
Peter Eastman's avatar
Peter Eastman committed
296
    numParticles = force.getNumParticles();
297
    _gpuContext* gpu = data.gpu;
298
299
300
301

    // Identify which exceptions are 1-4 interactions.

    vector<pair<int, int> > exclusions;
302
    vector<int> exceptions;
303
304
305
306
307
308
    for (int i = 0; i < force.getNumExceptions(); i++) {
        int particle1, particle2;
        double chargeProd, sigma, epsilon;
        force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
        exclusions.push_back(pair<int, int>(particle1, particle2));
        if (chargeProd != 0.0 || epsilon != 0.0)
309
            exceptions.push_back(i);
310
311
    }

312
313
    // Initialize nonbonded interactions.
    
314
    {
Peter Eastman's avatar
Peter Eastman committed
315
316
317
318
        vector<int> particle(numParticles);
        vector<float> c6(numParticles);
        vector<float> c12(numParticles);
        vector<float> q(numParticles);
319
        vector<char> symbol;
Peter Eastman's avatar
Peter Eastman committed
320
321
        vector<vector<int> > exclusionList(numParticles);
        for (int i = 0; i < numParticles; i++) {
322
            double charge, radius, depth;
Peter Eastman's avatar
Peter Eastman committed
323
324
            force.getParticleParameters(i, charge, radius, depth);
            particle[i] = i;
325
326
327
            q[i] = (float) charge;
            c6[i] = (float) (4*depth*pow(radius, 6.0));
            c12[i] = (float) (4*depth*pow(radius, 12.0));
328
329
            exclusionList[i].push_back(i);
        }
330
        for (int i = 0; i < (int)exclusions.size(); i++) {
331
332
333
            exclusionList[exclusions[i].first].push_back(exclusions[i].second);
            exclusionList[exclusions[i].second].push_back(exclusions[i].first);
        }
Peter Eastman's avatar
Peter Eastman committed
334
        Vec3 boxVectors[3];
335
        system.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
Peter Eastman's avatar
Peter Eastman committed
336
        gpuSetPeriodicBoxSize(gpu, (float)boxVectors[0][0], (float)boxVectors[1][1], (float)boxVectors[2][2]);
337
338
        CudaNonbondedMethod method = NO_CUTOFF;
        if (force.getNonbondedMethod() != NonbondedForce::NoCutoff) {
339
            gpuSetNonbondedCutoff(gpu, (float)force.getCutoffDistance(), force.getReactionFieldDielectric());
340
341
342
343
344
            method = CUTOFF;
        }
        if (force.getNonbondedMethod() == NonbondedForce::CutoffPeriodic) {
            method = PERIODIC;
        }
345
        if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
346
347
348
349
350
351
            double ewaldErrorTol = force.getEwaldErrorTolerance();
            double alpha = (1.0/force.getCutoffDistance())*std::sqrt(-std::log(ewaldErrorTol));
            double mx = boxVectors[0][0]/force.getCutoffDistance();
            double my = boxVectors[1][1]/force.getCutoffDistance();
            double mz = boxVectors[2][2]/force.getCutoffDistance();
            double pi = 3.1415926535897932385;
Peter Eastman's avatar
Peter Eastman committed
352
353
354
            int kmaxx = (int)std::ceil(-(mx/pi)*std::log(ewaldErrorTol));
            int kmaxy = (int)std::ceil(-(my/pi)*std::log(ewaldErrorTol));
            int kmaxz = (int)std::ceil(-(mz/pi)*std::log(ewaldErrorTol));
355
356
357
358
359
360
361
362
363
364
365
            if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
                if (kmaxx%2 == 0)
                    kmaxx++;
                if (kmaxy%2 == 0)
                    kmaxy++;
                if (kmaxz%2 == 0)
                    kmaxz++;
                gpuSetEwaldParameters(gpu, (float) alpha, kmaxx, kmaxy, kmaxz);
                method = EWALD;
            }
            else {
366
367
368
                int gridSizeX = -0.5*kmaxx*std::log(ewaldErrorTol);
                int gridSizeY = -0.5*kmaxy*std::log(ewaldErrorTol);
                int gridSizeZ = -0.5*kmaxz*std::log(ewaldErrorTol);
Peter Eastman's avatar
Peter Eastman committed
369
                gpuSetPMEParameters(gpu, (float) alpha, gridSizeX, gridSizeY, gridSizeZ);
370
371
                method = PARTICLE_MESH_EWALD;
            }
372
        }
373
374
        data.nonbondedMethod = method;
        gpuSetCoulombParameters(gpu, 138.935485f, particle, c6, c12, q, symbol, exclusionList, method);
375
376
377
378

        // Compute the Ewald self energy.

        data.ewaldSelfEnergy = 0.0;
379
380
381
382
383
        if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
            double selfEnergyScale = gpu->sim.epsfac*gpu->sim.alphaEwald/std::sqrt(PI);
                for (int i = 0; i < numParticles; i++)
                    data.ewaldSelfEnergy -= selfEnergyScale*q[i]*q[i];
        }
384
385
386
387
    }

    // Initialize 1-4 nonbonded interactions.
    
388
    {
389
390
391
392
393
394
395
396
        int numExceptions = exceptions.size();
        vector<int> particle1(numExceptions);
        vector<int> particle2(numExceptions);
        vector<float> c6(numExceptions);
        vector<float> c12(numExceptions);
        vector<float> q1(numExceptions);
        vector<float> q2(numExceptions);
        for (int i = 0; i < numExceptions; i++) {
397
            double charge, sig, eps;
398
            force.getExceptionParameters(exceptions[i], particle1[i], particle2[i], charge, sig, eps);
399
400
            c6[i] = (float) (4*eps*pow(sig, 6.0));
            c12[i] = (float) (4*eps*pow(sig, 12.0));
Peter Eastman's avatar
Peter Eastman committed
401
402
            q1[i] = (float) charge;
            q2[i] = 1.0f;
403
        }
Peter Eastman's avatar
Peter Eastman committed
404
        gpuSetLJ14Parameters(gpu, 138.935485f, 1.0f, particle1, particle2, c6, c12, q1, q2);
405
406
407
    }
}

408
void CudaCalcNonbondedForceKernel::executeForces(ContextImpl& context) {
409
410
}

411
double CudaCalcNonbondedForceKernel::executeEnergy(ContextImpl& context) {
412
    return 0.0;
413
414
}

415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
}

void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
    data.hasCustomNonbonded = true;
    numParticles = force.getNumParticles();
    _gpuContext* gpu = data.gpu;

    // Identify which exceptions are actual interactions.

    vector<pair<int, int> > exclusions;
    vector<int> exceptions;
    {
        vector<double> parameters;
        for (int i = 0; i < force.getNumExceptions(); i++) {
            int particle1, particle2;
            force.getExceptionParameters(i, particle1, particle2, parameters);
            exclusions.push_back(pair<int, int>(particle1, particle2));
            if (parameters.size() > 0)
                exceptions.push_back(i);
        }
    }

    // Initialize nonbonded interactions.

    vector<int> particle(numParticles);
    vector<vector<double> > parameters(numParticles);
    vector<vector<int> > exclusionList(numParticles);
    for (int i = 0; i < numParticles; i++) {
        force.getParticleParameters(i, parameters[i]);
        particle[i] = i;
        exclusionList[i].push_back(i);
    }
    for (int i = 0; i < (int)exclusions.size(); i++) {
        exclusionList[exclusions[i].first].push_back(exclusions[i].second);
        exclusionList[exclusions[i].second].push_back(exclusions[i].first);
    }
452
453
454
    Vec3 boxVectors[3];
    system.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
    gpuSetPeriodicBoxSize(gpu, (float)boxVectors[0][0], (float)boxVectors[1][1], (float)boxVectors[2][2]);
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
    CudaNonbondedMethod method = NO_CUTOFF;
    if (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff)
        method = CUTOFF;
    if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic) {
        method = PERIODIC;
    }
    data.customNonbondedMethod = method;

    // Initialize exceptions.

    int numExceptions = exceptions.size();
    vector<int> exceptionParticle1(numExceptions);
    vector<int> exceptionParticle2(numExceptions);
    vector<vector<double> > exceptionParams(numExceptions);
    for (int i = 0; i < numExceptions; i++)
        force.getExceptionParameters(exceptions[i], exceptionParticle1[i], exceptionParticle2[i], exceptionParams[i]);

472
473
474
475
476
477
478
479
480
481
482
    // Record the tabulated functions.

    for (int i = 0; i < force.getNumFunctions(); i++) {
        string name;
        vector<double> values;
        double min, max;
        bool interpolating;
        force.getFunctionParameters(i, name, values, min, max, interpolating);
        gpuSetTabulatedFunction(gpu, i, name, values, min, max, interpolating);
    }

483
484
485
486
487
488
489
490
    // Record information for the expressions.

    vector<string> paramNames;
    vector<string> combiningRules;
    for (int i = 0; i < force.getNumParameters(); i++) {
        paramNames.push_back(force.getParameterName(i));
        combiningRules.push_back(force.getParameterCombiningRule(i));
    }
491
492
493
494
495
496
    globalParamNames.resize(force.getNumGlobalParameters());
    globalParamValues.resize(force.getNumGlobalParameters());
    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
        globalParamNames[i] = force.getGlobalParameterName(i);
        globalParamValues[i] = force.getGlobalParameterDefaultValue(i);
    }
497
    gpuSetCustomNonbondedParameters(gpu, parameters, exclusionList, exceptionParticle1, exceptionParticle2, exceptionParams, method,
498
499
            (float)force.getCutoffDistance(), force.getEnergyFunction(), combiningRules, paramNames, globalParamNames);
    if (globalParamValues.size() > 0)
500
        SetCustomNonbondedGlobalParams(&globalParamValues[0]);
501
502
503
}

void CudaCalcCustomNonbondedForceKernel::executeForces(ContextImpl& context) {
504
    updateGlobalParams(context);
505
506
507
}

double CudaCalcCustomNonbondedForceKernel::executeEnergy(ContextImpl& context) {
508
    updateGlobalParams(context);
509
510
511
    return 0.0;
}

512
513
514
515
516
517
518
519
520
void CudaCalcCustomNonbondedForceKernel::updateGlobalParams(ContextImpl& context) {
    bool changed = false;
    for (int i = 0; i < globalParamNames.size(); i++) {
        float value = (float) context.getParameter(globalParamNames[i]);
        if (value != globalParamValues[i])
            changed = true;
        globalParamValues[i] = value;
    }
    if (changed)
521
        SetCustomNonbondedGlobalParams(&globalParamValues[0]);
522
523
}

524
CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
525
526
}

527
void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
528

Peter Eastman's avatar
Peter Eastman committed
529
    int numParticles = system.getNumParticles();
530
    _gpuContext* gpu = data.gpu;
Peter Eastman's avatar
Peter Eastman committed
531
532
    vector<float> radius(numParticles);
    vector<float> scale(numParticles);
533
    vector<float> charge(numParticles);
Peter Eastman's avatar
Peter Eastman committed
534
    for (int i = 0; i < numParticles; i++) {
535
536
        double particleCharge, particleRadius, scalingFactor;
        force.getParticleParameters(i, particleCharge, particleRadius, scalingFactor);
Peter Eastman's avatar
Peter Eastman committed
537
        radius[i] = (float) particleRadius;
538
        scale[i] = (float) scalingFactor;
539
        charge[i] = (float) particleCharge;
540
    }
541
    gpuSetObcParameters(gpu, (float) force.getSoluteDielectric(), (float) force.getSolventDielectric(), radius, scale, charge);
542
543
}

544
void CudaCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
545
546
}

547
static void initializeIntegration(const System& system, CudaPlatform::PlatformData& data, const Integrator& integrator) {
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564

    // Initialize any terms that haven't already been handled by a Force.

    _gpuContext* gpu = data.gpu;
    if (!data.hasBonds)
        gpuSetBondParameters(gpu, vector<int>(), vector<int>(), vector<float>(), vector<float>());
    if (!data.hasAngles)
        gpuSetBondAngleParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>());
    if (!data.hasPeriodicTorsions)
        gpuSetDihedralParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>(), vector<int>());
    if (!data.hasRB)
        gpuSetRbDihedralParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>(),
                vector<float>(), vector<float>(), vector<float>(), vector<float>());
    if (!data.hasNonbonded) {
        gpuSetCoulombParameters(gpu, 138.935485f, vector<int>(), vector<float>(), vector<float>(), vector<float>(), vector<char>(), vector<vector<int> >(), NO_CUTOFF);
        gpuSetLJ14Parameters(gpu, 138.935485f, 1.0f, vector<int>(), vector<int>(), vector<float>(), vector<float>(), vector<float>(), vector<float>());
    }
565
566
567
    
    // Set masses.
    
Peter Eastman's avatar
Peter Eastman committed
568
569
570
571
    int numParticles = system.getNumParticles();
    vector<float> mass(numParticles);
    for (int i = 0; i < numParticles; i++)
        mass[i] = (float) system.getParticleMass(i);
572
573
574
575
    gpuSetMass(gpu, mass);
    
    // Set constraints.
    
576
    int numConstraints = system.getNumConstraints();
Peter Eastman's avatar
Peter Eastman committed
577
578
    vector<int> particle1(numConstraints);
    vector<int> particle2(numConstraints);
579
580
581
582
    vector<float> distance(numConstraints);
    vector<float> invMass1(numConstraints);
    vector<float> invMass2(numConstraints);
    for (int i = 0; i < numConstraints; i++) {
Peter Eastman's avatar
Peter Eastman committed
583
        int particle1Index, particle2Index;
584
        double constraintDistance;
Peter Eastman's avatar
Peter Eastman committed
585
586
587
        system.getConstraintParameters(i, particle1Index, particle2Index, constraintDistance);
        particle1[i] = particle1Index;
        particle2[i] = particle2Index;
588
        distance[i] = (float) constraintDistance;
Peter Eastman's avatar
Peter Eastman committed
589
590
        invMass1[i] = 1.0f/mass[particle1Index];
        invMass2[i] = 1.0f/mass[particle2Index];
591
    }
592
    gpuSetConstraintParameters(gpu, particle1, particle2, distance, invMass1, invMass2, (float)integrator.getConstraintTolerance());
593
594
    
    // Finish initialization.
595

596
597
598
    gpuBuildThreadBlockWorkList(gpu);
    gpuBuildExclusionList(gpu);
    gpuBuildOutputBuffers(gpu);
599
    gpuSetConstants(gpu);
600
601
602
    kClearBornForces(gpu);
    kClearForces(gpu);
    cudaThreadSynchronize();
603
604
}

605
double CudaCalcGBSAOBCForceKernel::executeEnergy(ContextImpl& context) {
606
	return 0.0;
607
608
609
610
611
612
}

CudaIntegrateVerletStepKernel::~CudaIntegrateVerletStepKernel() {
}

void CudaIntegrateVerletStepKernel::initialize(const System& system, const VerletIntegrator& integrator) {
613
    initializeIntegration(system, data, integrator);
614
615
616
    prevStepSize = -1.0;
}

617
void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIntegrator& integrator) {
618
619
620
621
622
    _gpuContext* gpu = data.gpu;
    double stepSize = integrator.getStepSize();
    if (stepSize != prevStepSize) {
        // Initialize the GPU parameters.
        
623
        gpuSetVerletIntegrationParameters(gpu, (float) stepSize, 0.0f);
624
625
626
627
628
        gpuSetConstants(gpu);
        prevStepSize = stepSize;
    }
    kVerletUpdatePart1(gpu);
    kApplyFirstShake(gpu);
629
    kApplyFirstSettle(gpu);
630
    kApplyFirstCCMA(gpu);
631
632
    if (data.removeCM)
        if (data.stepCount%data.cmMotionFrequency == 0)
633
634
            gpu->bCalculateCM = true;
    kVerletUpdatePart2(gpu);
635
    data.time += stepSize;
636
    data.stepCount++;
637
638
639
640
641
642
}

CudaIntegrateLangevinStepKernel::~CudaIntegrateLangevinStepKernel() {
}

void CudaIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) {
643
    initializeIntegration(system, data, integrator);
644
645
646
    _gpuContext* gpu = data.gpu;
    gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
    gpuInitializeRandoms(gpu);
647
648
649
    prevStepSize = -1.0;
}

650
void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const LangevinIntegrator& integrator) {
651
    _gpuContext* gpu = data.gpu;
652
653
654
    double temperature = integrator.getTemperature();
    double friction = integrator.getFriction();
    double stepSize = integrator.getStepSize();
655
656
657
658
    if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
        // Initialize the GPU parameters.
        
        double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
659
        gpuSetLangevinIntegrationParameters(gpu, (float) tau, (float) stepSize, (float) temperature, 0.0f);
660
661
662
663
664
665
        gpuSetConstants(gpu);
        kGenerateRandoms(gpu);
        prevTemp = temperature;
        prevFriction = friction;
        prevStepSize = stepSize;
    }
666
    kLangevinUpdatePart1(gpu);
667
    kApplyFirstShake(gpu);
668
    kApplyFirstSettle(gpu);
669
    kApplyFirstCCMA(gpu);
670
671
    if (data.removeCM)
        if (data.stepCount%data.cmMotionFrequency == 0)
672
            gpu->bCalculateCM = true;
673
    kLangevinUpdatePart2(gpu);
674
    kApplySecondShake(gpu);
675
    kApplySecondSettle(gpu);
676
    kApplySecondCCMA(gpu);
677
    data.time += stepSize;
678
    data.stepCount++;
679
}
680
681
682
683
684

CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() {
}

void CudaIntegrateBrownianStepKernel::initialize(const System& system, const BrownianIntegrator& integrator) {
685
    initializeIntegration(system, data, integrator);
686
687
688
    _gpuContext* gpu = data.gpu;
    gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
    gpuInitializeRandoms(gpu);
689
690
691
    prevStepSize = -1.0;
}

692
void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const BrownianIntegrator& integrator) {
693
694
695
696
697
698
699
700
    _gpuContext* gpu = data.gpu;
    double temperature = integrator.getTemperature();
    double friction = integrator.getFriction();
    double stepSize = integrator.getStepSize();
    if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
        // Initialize the GPU parameters.
        
        double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
701
        gpuSetBrownianIntegrationParameters(gpu, (float) tau, (float) stepSize, (float) temperature);
702
703
704
705
706
707
708
709
        gpuSetConstants(gpu);
        kGenerateRandoms(gpu);
        prevTemp = temperature;
        prevFriction = friction;
        prevStepSize = stepSize;
    }
    kBrownianUpdatePart1(gpu);
    kApplyFirstShake(gpu);
710
    kApplyFirstSettle(gpu);
711
    kApplyFirstCCMA(gpu);
712
713
    if (data.removeCM)
        if (data.stepCount%data.cmMotionFrequency == 0)
714
715
            gpu->bCalculateCM = true;
    kBrownianUpdatePart2(gpu);
716
    data.time += stepSize;
717
718
719
720
721
722
723
724
725
726
727
    data.stepCount++;
}

CudaIntegrateVariableVerletStepKernel::~CudaIntegrateVariableVerletStepKernel() {
}

void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, const VariableVerletIntegrator& integrator) {
    initializeIntegration(system, data, integrator);
    prevErrorTol = -1.0;
}

728
void CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime) {
729
730
731
732
733
734
735
736
737
    _gpuContext* gpu = data.gpu;
    double errorTol = integrator.getErrorTolerance();
    if (errorTol != prevErrorTol) {
        // Initialize the GPU parameters.

        gpuSetVerletIntegrationParameters(gpu, 0.0f, (float) errorTol);
        gpuSetConstants(gpu);
        prevErrorTol = errorTol;
    }
738
    float maxStepSize = (float)(maxTime-data.time);
739
    kSelectVerletStepSize(gpu, maxStepSize);
740
741
742
743
744
745
746
747
748
749
    kVerletUpdatePart1(gpu);
    kApplyFirstShake(gpu);
    kApplyFirstSettle(gpu);
    kApplyFirstCCMA(gpu);
    if (data.removeCM)
        if (data.stepCount%data.cmMotionFrequency == 0)
            gpu->bCalculateCM = true;
    kVerletUpdatePart2(gpu);
    gpu->psStepSize->Download();
    data.time += (*gpu->psStepSize)[0].y;
750
751
    if ((*gpu->psStepSize)[0].y == maxStepSize)
        data.time = maxTime; // Avoid round-off error
752
    data.stepCount++;
753
}
754

755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
CudaIntegrateVariableLangevinStepKernel::~CudaIntegrateVariableLangevinStepKernel() {
}

void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) {
    initializeIntegration(system, data, integrator);
    _gpuContext* gpu = data.gpu;
    gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
    gpuInitializeRandoms(gpu);
    prevErrorTol = -1.0;
}

void CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
    _gpuContext* gpu = data.gpu;
    double temperature = integrator.getTemperature();
    double friction = integrator.getFriction();
    double errorTol = integrator.getErrorTolerance();
    if (temperature != prevTemp || friction != prevFriction || errorTol != prevErrorTol) {
        // Initialize the GPU parameters.

        double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
        gpuSetLangevinIntegrationParameters(gpu, (float) tau, 0.0f, (float) temperature, errorTol);
        gpuSetConstants(gpu);
        kGenerateRandoms(gpu);
        prevTemp = temperature;
        prevFriction = friction;
        prevErrorTol = errorTol;
    }
    float maxStepSize = (float)(maxTime-data.time);
    kSelectLangevinStepSize(gpu, maxStepSize);
    kLangevinUpdatePart1(gpu);
    kApplyFirstShake(gpu);
    kApplyFirstSettle(gpu);
    kApplyFirstCCMA(gpu);
    if (data.removeCM)
        if (data.stepCount%data.cmMotionFrequency == 0)
            gpu->bCalculateCM = true;
    kLangevinUpdatePart2(gpu);
    kApplySecondShake(gpu);
    kApplySecondSettle(gpu);
    kApplySecondCCMA(gpu);
    gpu->psStepSize->Download();
    data.time += (*gpu->psStepSize)[0].y;
    if ((*gpu->psStepSize)[0].y == maxStepSize)
        data.time = maxTime; // Avoid round-off error
    data.stepCount++;
}

802
803
804
805
CudaApplyAndersenThermostatKernel::~CudaApplyAndersenThermostatKernel() {
}

void CudaApplyAndersenThermostatKernel::initialize(const System& system, const AndersenThermostat& thermostat) {
806
807
808
    _gpuContext* gpu = data.gpu;
    gpu->seed = (unsigned long) thermostat.getRandomNumberSeed();
    gpuInitializeRandoms(gpu);
809
810
811
    prevStepSize = -1.0;
}

812
void CudaApplyAndersenThermostatKernel::execute(ContextImpl& context) {
813
    _gpuContext* gpu = data.gpu;
814
815
    double temperature = context.getParameter(AndersenThermostat::Temperature());
    double frequency = context.getParameter(AndersenThermostat::CollisionFrequency());
816
817
818
819
    double stepSize = context.getIntegrator().getStepSize();
    if (temperature != prevTemp || frequency != prevFrequency || stepSize != prevStepSize) {
        // Initialize the GPU parameters.
        
820
        gpuSetAndersenThermostatParameters(gpu, (float) temperature, frequency);
821
822
823
824
825
826
827
828
        gpuSetConstants(gpu);
        kGenerateRandoms(gpu);
        prevTemp = temperature;
        prevFrequency = frequency;
        prevStepSize = stepSize;
    }
    kCalculateAndersenThermostat(gpu);
}
829

830
void CudaCalcKineticEnergyKernel::initialize(const System& system) {
Peter Eastman's avatar
Peter Eastman committed
831
832
    int numParticles = system.getNumParticles();
    masses.resize(numParticles);
833
    for (int i = 0; i < numParticles; ++i)
Peter Eastman's avatar
Peter Eastman committed
834
        masses[i] = system.getParticleMass(i);
835
836
}

837
double CudaCalcKineticEnergyKernel::execute(ContextImpl& context) {
838
839
840
    // We don't currently have a GPU kernel to do this, so we retrieve the velocities and calculate the energy
    // on the CPU.
    
841
842
    _gpuContext* gpu = data.gpu;
    gpu->psVelm4->Download();
843
    double energy = 0.0;
844
845
846
847
    for (int i = 0; i < (int) masses.size(); ++i) {
        float4 v = (*gpu->psVelm4)[i];
        energy += masses[i]*(v.x*v.x+v.y*v.y+v.z*v.z);
    }
848
849
    return 0.5*energy;
}
850

851
void CudaRemoveCMMotionKernel::initialize(const System& system, const CMMotionRemover& force) {
852
    data.removeCM = true;
853
    data.cmMotionFrequency = force.getFrequency();
854
855
}

856
void CudaRemoveCMMotionKernel::execute(ContextImpl& context) {
857
}