CudaKernels.cpp 45.3 KB
Newer Older
1
2
3
4
5
6
7
8
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
9
 * Portions copyright (c) 2008-2009 Stanford University and the Authors.      *
10
11
12
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
13
14
15
16
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
17
 *                                                                            *
18
19
20
21
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
22
 *                                                                            *
23
24
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
25
26
27
 * -------------------------------------------------------------------------- */

#include "CudaKernels.h"
28
#include "openmm/LangevinIntegrator.h"
29
#include "openmm/Context.h"
30
#include "openmm/OpenMMException.h"
31
#include "openmm/internal/CMAPTorsionForceImpl.h"
32
#include "openmm/internal/ContextImpl.h"
33
#include "openmm/internal/NonbondedForceImpl.h"
34
#include "kernels/gputypes.h"
35
#include "kernels/cudaKernels.h"
36
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
37
38
#include <cmath>

39
extern "C" int OPENMMCUDA_EXPORT gpuSetConstants( gpuContext gpu );
40
41
42
43

using namespace OpenMM;
using namespace std;

44
45
46
void CudaCalcForcesAndEnergyKernel::initialize(const System& system) {
}

47
void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy) {
48
    _gpuContext* gpu = data.gpu;
49
    if (data.nonbondedMethod != NO_CUTOFF && data.computeForceCount%100 == 0)
50
        gpuReorderAtoms(gpu);
51
52
    if (includeForces)
        data.computeForceCount++;
53
54
    if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
        kClearBornSumAndForces(gpu);
55
    else if (includeForces)
56
        kClearForces(gpu);
57
58
59
    if (includeEnergy) {
        data.stepCount++;
        kClearEnergy(gpu);
60
    }
61
62
}

63
double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy) {
64
    _gpuContext* gpu = data.gpu;
Mark Friedrichs's avatar
Mark Friedrichs committed
65
    if (gpu->bIncludeGBSA || gpu->bIncludeGBVI) {
66
67
68
        gpu->bRecalculateBornRadii = true;
        kCalculateCDLJObcGbsaForces1(gpu);
        kReduceObcGbsaBornForces(gpu);
Mark Friedrichs's avatar
Mark Friedrichs committed
69
70
71
72
73
        if (gpu->bIncludeGBSA ) {
           kCalculateObcGbsaForces2(gpu);
        } else {
           kCalculateGBVIForces2(gpu);
        }
74
    }
75
76
77
78
79
    else if (data.hasNonbonded)
        kCalculateCDLJForces(gpu);
    if (data.hasCustomNonbonded)
        kCalculateCustomNonbondedForces(gpu, data.hasNonbonded);
    kCalculateLocalForces(gpu);
80
81
82
83
84
85
86
87
    if (includeForces)
        kReduceForces(gpu);
    double energy = 0.0;
    if (includeEnergy) {
        energy = kReduceEnergy(gpu)+data.ewaldSelfEnergy;
        if (data.dispersionCoefficient != 0.0)
            energy += data.dispersionCoefficient/(gpu->sim.periodicBoxSizeX*gpu->sim.periodicBoxSizeY*gpu->sim.periodicBoxSizeZ);
    }
88
    return energy;
89
90
}

91
void CudaUpdateStateDataKernel::initialize(const System& system) {
92
93
}

94
double CudaUpdateStateDataKernel::getTime(const ContextImpl& context) const {
95
96
97
    return data.time;
}

98
void CudaUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
99
100
101
    data.time = time;
}

102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, std::vector<Vec3>& positions) {
    _gpuContext* gpu = data.gpu;
    gpu->psPosq4->Download();
    int* order = gpu->psAtomIndex->_pSysData;
    int numParticles = context.getSystem().getNumParticles();
    positions.resize(numParticles);
    for (int i = 0; i < numParticles; ++i) {
        float4 pos = (*gpu->psPosq4)[i];
        int3 offset = gpu->posCellOffsets[i];
        positions[order[i]] = Vec3(pos.x-offset.x*gpu->sim.periodicBoxSizeX, pos.y-offset.y*gpu->sim.periodicBoxSizeY, pos.z-offset.z*gpu->sim.periodicBoxSizeZ);
    }
}

void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const std::vector<Vec3>& positions) {
    _gpuContext* gpu = data.gpu;
    int* order = gpu->psAtomIndex->_pSysData;
    int numParticles = context.getSystem().getNumParticles();
    for (int i = 0; i < numParticles; ++i) {
        float4& pos = (*gpu->psPosq4)[i];
        const Vec3& p = positions[order[i]];
122
123
124
        pos.x = (float) p[0];
        pos.y = (float) p[1];
        pos.z = (float) p[2];
125
126
    }
    gpu->psPosq4->Upload();
127
    for (int i = 0; i < (int) gpu->posCellOffsets.size(); i++)
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
        gpu->posCellOffsets[i] = make_int3(0, 0, 0);
}

void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, std::vector<Vec3>& velocities) {
    _gpuContext* gpu = data.gpu;
    gpu->psVelm4->Download();
    int* order = gpu->psAtomIndex->_pSysData;
    int numParticles = context.getSystem().getNumParticles();
    velocities.resize(numParticles);
    for (int i = 0; i < numParticles; ++i) {
        float4 vel = (*gpu->psVelm4)[i];
        velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
    }
}

void CudaUpdateStateDataKernel::setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities) {
    _gpuContext* gpu = data.gpu;
    int* order = gpu->psAtomIndex->_pSysData;
    int numParticles = context.getSystem().getNumParticles();
    for (int i = 0; i < numParticles; ++i) {
        float4& vel = (*gpu->psVelm4)[i];
        const Vec3& v = velocities[order[i]];
150
151
152
        vel.x = (float) v[0];
        vel.y = (float) v[1];
        vel.z = (float) v[2];
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
    }
    gpu->psVelm4->Upload();
}

void CudaUpdateStateDataKernel::getForces(ContextImpl& context, std::vector<Vec3>& forces) {
    _gpuContext* gpu = data.gpu;
    int* order = gpu->psAtomIndex->_pSysData;
    gpu->psForce4->Download();
    int numParticles = context.getSystem().getNumParticles();
    forces.resize(numParticles);
    for (int i = 0; i < numParticles; ++i) {
        float4 force = (*gpu->psForce4)[i];
        forces[order[i]] = Vec3(force.x, force.y, force.z);
    }
}

169
170
171
172
173
174
175
176
177
178
179
180
181
void CudaUpdateStateDataKernel::getPeriodicBoxVectors(ContextImpl& context, Vec3& a, Vec3& b, Vec3& c) const {
    _gpuContext* gpu = data.gpu;
    a = Vec3(gpu->sim.periodicBoxSizeX, 0, 0);
    b = Vec3(0, gpu->sim.periodicBoxSizeY, 0);
    c = Vec3(0, 0, gpu->sim.periodicBoxSizeZ);
}

void CudaUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) const {
    _gpuContext* gpu = data.gpu;
    gpuSetPeriodicBoxSize(gpu, a[0], b[1], c[2]);
    gpuSetConstants(gpu);
}

182
183
184
185
186
187
188
void CudaApplyConstraintsKernel::initialize(const System& system) {
}

void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
    kApplyConstraints(data.gpu);
}

189
190
191
192
193
CudaCalcHarmonicBondForceKernel::~CudaCalcHarmonicBondForceKernel() {
}

void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
    data.hasBonds = true;
194
    numBonds = force.getNumBonds();
Peter Eastman's avatar
Peter Eastman committed
195
196
    vector<int> particle1(numBonds);
    vector<int> particle2(numBonds);
197
198
199
200
    vector<float> length(numBonds);
    vector<float> k(numBonds);
    for (int i = 0; i < numBonds; i++) {
        double lengthValue, kValue;
Peter Eastman's avatar
Peter Eastman committed
201
        force.getBondParameters(i, particle1[i], particle2[i], lengthValue, kValue);
202
203
204
        length[i] = (float) lengthValue;
        k[i] = (float) kValue;
    }
Peter Eastman's avatar
Peter Eastman committed
205
    gpuSetBondParameters(data.gpu, particle1, particle2, length, k);
206
207
}

208
double CudaCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
209
210
211
    return 0.0;
}

212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() {
}

void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
    numBonds = force.getNumBonds();
    vector<int> particle1(numBonds);
    vector<int> particle2(numBonds);
    vector<vector<double> > params(numBonds);
    for (int i = 0; i < numBonds; i++)
        force.getBondParameters(i, particle1[i], particle2[i], params[i]);
    vector<string> paramNames;
    for (int i = 0; i < force.getNumPerBondParameters(); i++)
        paramNames.push_back(force.getPerBondParameterName(i));
    globalParamNames.resize(force.getNumGlobalParameters());
    globalParamValues.resize(force.getNumGlobalParameters());
    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
        globalParamNames[i] = force.getGlobalParameterName(i);
        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
    }
    gpuSetCustomBondParameters(data.gpu, particle1, particle2, params, force.getEnergyFunction(), paramNames, globalParamNames);
    if (globalParamValues.size() > 0)
Peter Eastman's avatar
Peter Eastman committed
233
        SetCustomBondGlobalParams(globalParamValues);
234
235
}

236
double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
237
238
239
240
241
242
243
244
245
246
247
248
249
250
    updateGlobalParams(context);
    kCalculateCustomBondForces(data.gpu);
    return 0.0;
}

void CudaCalcCustomBondForceKernel::updateGlobalParams(ContextImpl& context) {
    bool changed = false;
    for (int i = 0; i < (int) globalParamNames.size(); i++) {
        float value = (float) context.getParameter(globalParamNames[i]);
        if (value != globalParamValues[i])
            changed = true;
        globalParamValues[i] = value;
    }
    if (changed)
Peter Eastman's avatar
Peter Eastman committed
251
        SetCustomBondGlobalParams(globalParamValues);
252
253
}

254
255
256
257
258
CudaCalcHarmonicAngleForceKernel::~CudaCalcHarmonicAngleForceKernel() {
}

void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
    data.hasAngles = true;
259
    numAngles = force.getNumAngles();
260
    const float RadiansToDegrees = (float) (180.0/3.14159265);
Peter Eastman's avatar
Peter Eastman committed
261
262
263
    vector<int> particle1(numAngles);
    vector<int> particle2(numAngles);
    vector<int> particle3(numAngles);
264
265
266
267
    vector<float> angle(numAngles);
    vector<float> k(numAngles);
    for (int i = 0; i < numAngles; i++) {
        double angleValue, kValue;
Peter Eastman's avatar
Peter Eastman committed
268
        force.getAngleParameters(i, particle1[i], particle2[i], particle3[i], angleValue, kValue);
269
270
        angle[i] = (float) (angleValue*RadiansToDegrees);
        k[i] = (float) kValue;
271
    }
Peter Eastman's avatar
Peter Eastman committed
272
    gpuSetBondAngleParameters(data.gpu, particle1, particle2, particle3, angle, k);
273
}
274

275
double CudaCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
276
277
278
    return 0.0;
}

279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() {
}

void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
    numAngles = force.getNumAngles();
    vector<int> particle1(numAngles);
    vector<int> particle2(numAngles);
    vector<int> particle3(numAngles);
    vector<vector<double> > params(numAngles);
    for (int i = 0; i < numAngles; i++)
        force.getAngleParameters(i, particle1[i], particle2[i], particle3[i], params[i]);
    vector<string> paramNames;
    for (int i = 0; i < force.getNumPerAngleParameters(); i++)
        paramNames.push_back(force.getPerAngleParameterName(i));
    globalParamNames.resize(force.getNumGlobalParameters());
    globalParamValues.resize(force.getNumGlobalParameters());
    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
        globalParamNames[i] = force.getGlobalParameterName(i);
        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
    }
    gpuSetCustomAngleParameters(data.gpu, particle1, particle2, particle3, params, force.getEnergyFunction(), paramNames, globalParamNames);
    if (globalParamValues.size() > 0)
        SetCustomAngleGlobalParams(globalParamValues);
}

304
double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
    updateGlobalParams(context);
    kCalculateCustomAngleForces(data.gpu);
    return 0.0;
}

void CudaCalcCustomAngleForceKernel::updateGlobalParams(ContextImpl& context) {
    bool changed = false;
    for (int i = 0; i < (int) globalParamNames.size(); i++) {
        float value = (float) context.getParameter(globalParamNames[i]);
        if (value != globalParamValues[i])
            changed = true;
        globalParamValues[i] = value;
    }
    if (changed)
        SetCustomAngleGlobalParams(globalParamValues);
}

322
323
324
325
326
327
CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() {
}

void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
    data.hasPeriodicTorsions = true;
    numTorsions = force.getNumTorsions();
328
    const float RadiansToDegrees = (float)(180.0/3.14159265);
Peter Eastman's avatar
Peter Eastman committed
329
330
331
332
    vector<int> particle1(numTorsions);
    vector<int> particle2(numTorsions);
    vector<int> particle3(numTorsions);
    vector<int> particle4(numTorsions);
333
334
335
336
337
    vector<float> k(numTorsions);
    vector<float> phase(numTorsions);
    vector<int> periodicity(numTorsions);
    for (int i = 0; i < numTorsions; i++) {
        double kValue, phaseValue;
Peter Eastman's avatar
Peter Eastman committed
338
        force.getTorsionParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], periodicity[i], phaseValue, kValue);
339
340
        k[i] = (float) kValue;
        phase[i] = (float) (phaseValue*RadiansToDegrees);
341
    }
Peter Eastman's avatar
Peter Eastman committed
342
    gpuSetDihedralParameters(data.gpu, particle1, particle2, particle3, particle4, k, phase, periodicity);
343
344
}

345
double CudaCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
346
347
348
349
350
351
352
353
354
    return 0.0;
}

CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() {
}

void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
    data.hasRB = true;
    numTorsions = force.getNumTorsions();
Peter Eastman's avatar
Peter Eastman committed
355
356
357
358
    vector<int> particle1(numTorsions);
    vector<int> particle2(numTorsions);
    vector<int> particle3(numTorsions);
    vector<int> particle4(numTorsions);
359
360
361
362
363
364
365
366
    vector<float> c0(numTorsions);
    vector<float> c1(numTorsions);
    vector<float> c2(numTorsions);
    vector<float> c3(numTorsions);
    vector<float> c4(numTorsions);
    vector<float> c5(numTorsions);
    for (int i = 0; i < numTorsions; i++) {
        double c[6];
Peter Eastman's avatar
Peter Eastman committed
367
        force.getTorsionParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], c[0], c[1], c[2], c[3], c[4], c[5]);
368
369
370
371
372
373
        c0[i] = (float) c[0];
        c1[i] = (float) c[1];
        c2[i] = (float) c[2];
        c3[i] = (float) c[3];
        c4[i] = (float) c[4];
        c5[i] = (float) c[5];
374
    }
Peter Eastman's avatar
Peter Eastman committed
375
    gpuSetRbDihedralParameters(data.gpu, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
376
377
}

378
double CudaCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
379
380
381
    return 0.0;
}

382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
CudaCalcCMAPTorsionForceKernel::~CudaCalcCMAPTorsionForceKernel() {
    if (coefficients != NULL)
        delete coefficients;
    if (mapPositions != NULL)
        delete mapPositions;
    if (torsionMaps != NULL)
        delete torsionMaps;
    if (torsionIndices != NULL)
        delete torsionIndices;
}

void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
    numTorsions = force.getNumTorsions();
    if (numTorsions == 0)
        return;
    int numMaps = force.getNumMaps();
    vector<float4> coeffVec;
    vector<int2> mapPositionsVec(numMaps);
    vector<double> energy;
    vector<vector<double> > c;
    int currentPosition = 0;
    mapPositions = new CUDAStream<int2>(numMaps, 1, "cmapTorsionMapPositions");
    for (int i = 0; i < numMaps; i++) {
        int size;
        force.getMapParameters(i, size, energy);
        CMAPTorsionForceImpl::calcMapDerivatives(size, energy, c);
        (*mapPositions)[i] = make_int2(currentPosition, size);
        currentPosition += 4*size*size;
        for (int j = 0; j < size*size; j++) {
            coeffVec.push_back(make_float4(c[j][0], c[j][1], c[j][2], c[j][3]));
            coeffVec.push_back(make_float4(c[j][4], c[j][5], c[j][6], c[j][7]));
            coeffVec.push_back(make_float4(c[j][8], c[j][9], c[j][10], c[j][11]));
            coeffVec.push_back(make_float4(c[j][12], c[j][13], c[j][14], c[j][15]));
        }
    }
    coefficients = new CUDAStream<float4>((int) coeffVec.size(), 1, "cmapTorsionCoefficients");;
    for (int i = 0; i < (int) coeffVec.size(); i++)
        (*coefficients)[i] = coeffVec[i];
    torsionMaps = new CUDAStream<int>(numTorsions, 1, "cmapTorsionMaps");
    torsionIndices = new CUDAStream<int4>(4*numTorsions, 1, "cmapTorsionIndices");
    vector<int> forceBufferCounter(system.getNumParticles(), 0);
    for (int i = 0; i < numTorsions; i++) {
        int map, a1, a2, a3, a4, b1, b2, b3, b4;
        force.getTorsionParameters(i, map, a1, a2, a3, a4, b1, b2, b3, b4);
        (*torsionMaps)[i] = map;
        (*torsionIndices)[i*4] = make_int4(a1, a2, a3, a4);
        (*torsionIndices)[i*4+1] = make_int4(b1, b2, b3, b4);
        (*torsionIndices)[i*4+2] = make_int4(forceBufferCounter[a1]++, forceBufferCounter[a2]++, forceBufferCounter[a3]++, forceBufferCounter[a4]++);
        (*torsionIndices)[i*4+3] = make_int4(forceBufferCounter[b1]++, forceBufferCounter[b2]++, forceBufferCounter[b3]++, forceBufferCounter[b4]++);
    }
    coefficients->Upload();
    mapPositions->Upload();
    torsionMaps->Upload();
    torsionIndices->Upload();
    int maxBuffers = 1;
    for (int i = 0; i < (int) forceBufferCounter.size(); i++)
        maxBuffers = max(maxBuffers, forceBufferCounter[i]);
    if (maxBuffers > data.gpu->sim.outputBuffers)
        data.gpu->sim.outputBuffers = maxBuffers;
}

443
double CudaCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
444
445
446
447
    kCalculateCMAPTorsionForces(data.gpu, *coefficients, *mapPositions, *torsionIndices, *torsionMaps);
    return 0.0;
}

448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() {
}

void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
    numTorsions = force.getNumTorsions();
    vector<int> particle1(numTorsions);
    vector<int> particle2(numTorsions);
    vector<int> particle3(numTorsions);
    vector<int> particle4(numTorsions);
    vector<vector<double> > params(numTorsions);
    for (int i = 0; i < numTorsions; i++)
        force.getTorsionParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], params[i]);
    vector<string> paramNames;
    for (int i = 0; i < force.getNumPerTorsionParameters(); i++)
        paramNames.push_back(force.getPerTorsionParameterName(i));
    globalParamNames.resize(force.getNumGlobalParameters());
    globalParamValues.resize(force.getNumGlobalParameters());
    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
        globalParamNames[i] = force.getGlobalParameterName(i);
        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
    }
    gpuSetCustomTorsionParameters(data.gpu, particle1, particle2, particle3, particle4, params, force.getEnergyFunction(), paramNames, globalParamNames);
    if (globalParamValues.size() > 0)
        SetCustomTorsionGlobalParams(globalParamValues);
}

474
double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
    updateGlobalParams(context);
    kCalculateCustomTorsionForces(data.gpu);
    return 0.0;
}

void CudaCalcCustomTorsionForceKernel::updateGlobalParams(ContextImpl& context) {
    bool changed = false;
    for (int i = 0; i < (int) globalParamNames.size(); i++) {
        float value = (float) context.getParameter(globalParamNames[i]);
        if (value != globalParamValues[i])
            changed = true;
        globalParamValues[i] = value;
    }
    if (changed)
        SetCustomTorsionGlobalParams(globalParamValues);
}

492
493
494
CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
}

495
void CudaCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
496
    data.hasNonbonded = true;
Peter Eastman's avatar
Peter Eastman committed
497
    numParticles = force.getNumParticles();
498
    _gpuContext* gpu = data.gpu;
499
500
501
502

    // Identify which exceptions are 1-4 interactions.

    vector<pair<int, int> > exclusions;
503
    vector<int> exceptions;
504
505
506
507
508
509
    for (int i = 0; i < force.getNumExceptions(); i++) {
        int particle1, particle2;
        double chargeProd, sigma, epsilon;
        force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
        exclusions.push_back(pair<int, int>(particle1, particle2));
        if (chargeProd != 0.0 || epsilon != 0.0)
510
            exceptions.push_back(i);
511
512
    }

513
514
    // Initialize nonbonded interactions.
    
515
    {
Peter Eastman's avatar
Peter Eastman committed
516
517
518
519
        vector<int> particle(numParticles);
        vector<float> c6(numParticles);
        vector<float> c12(numParticles);
        vector<float> q(numParticles);
520
        vector<char> symbol;
Peter Eastman's avatar
Peter Eastman committed
521
522
        vector<vector<int> > exclusionList(numParticles);
        for (int i = 0; i < numParticles; i++) {
523
            double charge, radius, depth;
Peter Eastman's avatar
Peter Eastman committed
524
525
            force.getParticleParameters(i, charge, radius, depth);
            particle[i] = i;
526
527
528
            q[i] = (float) charge;
            c6[i] = (float) (4*depth*pow(radius, 6.0));
            c12[i] = (float) (4*depth*pow(radius, 12.0));
529
530
            exclusionList[i].push_back(i);
        }
531
        for (int i = 0; i < (int)exclusions.size(); i++) {
532
533
534
            exclusionList[exclusions[i].first].push_back(exclusions[i].second);
            exclusionList[exclusions[i].second].push_back(exclusions[i].first);
        }
535
536
        CudaNonbondedMethod method = NO_CUTOFF;
        if (force.getNonbondedMethod() != NonbondedForce::NoCutoff) {
537
            gpuSetNonbondedCutoff(gpu, (float) force.getCutoffDistance(), (float) force.getReactionFieldDielectric());
538
539
540
541
542
            method = CUTOFF;
        }
        if (force.getNonbondedMethod() == NonbondedForce::CutoffPeriodic) {
            method = PERIODIC;
        }
543
544
        if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
            if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
545
546
547
                double alpha;
                int kmaxx, kmaxy, kmaxz;
                NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz);
548
549
550
551
                gpuSetEwaldParameters(gpu, (float) alpha, kmaxx, kmaxy, kmaxz);
                method = EWALD;
            }
            else {
552
553
554
                double alpha;
                int gridSizeX, gridSizeY, gridSizeZ;
                NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
Peter Eastman's avatar
Peter Eastman committed
555
                gpuSetPMEParameters(gpu, (float) alpha, gridSizeX, gridSizeY, gridSizeZ);
556
557
                method = PARTICLE_MESH_EWALD;
            }
558
        }
559
        data.nonbondedMethod = method;
560
        gpuSetCoulombParameters(gpu, (float) ONE_4PI_EPS0, particle, c6, c12, q, symbol, exclusionList, method);
561
562
563
564

        // Compute the Ewald self energy.

        data.ewaldSelfEnergy = 0.0;
565
566
567
568
569
        if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
            double selfEnergyScale = gpu->sim.epsfac*gpu->sim.alphaEwald/std::sqrt(PI);
                for (int i = 0; i < numParticles; i++)
                    data.ewaldSelfEnergy -= selfEnergyScale*q[i]*q[i];
        }
570
571
572
573
574
575
576

        // Compute the long range dispersion correction.

        if (force.getUseDispersionCorrection())
            data.dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
        else
            data.dispersionCoefficient = 0.0;
577
578
579
580
    }

    // Initialize 1-4 nonbonded interactions.
    
581
    {
582
583
584
585
586
587
588
589
        int numExceptions = exceptions.size();
        vector<int> particle1(numExceptions);
        vector<int> particle2(numExceptions);
        vector<float> c6(numExceptions);
        vector<float> c12(numExceptions);
        vector<float> q1(numExceptions);
        vector<float> q2(numExceptions);
        for (int i = 0; i < numExceptions; i++) {
590
            double charge, sig, eps;
591
            force.getExceptionParameters(exceptions[i], particle1[i], particle2[i], charge, sig, eps);
592
593
            c6[i] = (float) (4*eps*pow(sig, 6.0));
            c12[i] = (float) (4*eps*pow(sig, 12.0));
Peter Eastman's avatar
Peter Eastman committed
594
595
            q1[i] = (float) charge;
            q2[i] = 1.0f;
596
        }
597
        gpuSetLJ14Parameters(gpu, (float) ONE_4PI_EPS0, 1.0f, particle1, particle2, c6, c12, q1, q2);
598
599
600
    }
}

601
double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
602
    return 0.0;
603
604
}

605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
}

void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
    data.hasCustomNonbonded = true;
    numParticles = force.getNumParticles();
    _gpuContext* gpu = data.gpu;

    // Initialize nonbonded interactions.

    vector<int> particle(numParticles);
    vector<vector<double> > parameters(numParticles);
    vector<vector<int> > exclusionList(numParticles);
    for (int i = 0; i < numParticles; i++) {
        force.getParticleParameters(i, parameters[i]);
        particle[i] = i;
        exclusionList[i].push_back(i);
    }
623
624
625
626
627
    for (int i = 0; i < force.getNumExclusions(); i++) {
        int particle1, particle2;
        force.getExclusionParticles(i, particle1, particle2);
        exclusionList[particle1].push_back(particle2);
        exclusionList[particle2].push_back(particle1);
628
629
630
631
632
633
634
635
636
    }
    CudaNonbondedMethod method = NO_CUTOFF;
    if (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff)
        method = CUTOFF;
    if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic) {
        method = PERIODIC;
    }
    data.customNonbondedMethod = method;

637
638
639
640
641
642
643
644
645
646
647
    // Record the tabulated functions.

    for (int i = 0; i < force.getNumFunctions(); i++) {
        string name;
        vector<double> values;
        double min, max;
        bool interpolating;
        force.getFunctionParameters(i, name, values, min, max, interpolating);
        gpuSetTabulatedFunction(gpu, i, name, values, min, max, interpolating);
    }

648
649
650
    // Record information for the expressions.

    vector<string> paramNames;
651
652
    for (int i = 0; i < force.getNumPerParticleParameters(); i++)
        paramNames.push_back(force.getPerParticleParameterName(i));
653
654
655
656
    globalParamNames.resize(force.getNumGlobalParameters());
    globalParamValues.resize(force.getNumGlobalParameters());
    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
        globalParamNames[i] = force.getGlobalParameterName(i);
657
        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
658
    }
659
    gpuSetCustomNonbondedParameters(gpu, parameters, exclusionList, method, (float) force.getCutoffDistance(), force.getEnergyFunction(), paramNames, globalParamNames);
660
    if (globalParamValues.size() > 0)
Peter Eastman's avatar
Peter Eastman committed
661
        SetCustomNonbondedGlobalParams(globalParamValues);
662
663
}

664
double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
665
    updateGlobalParams(context);
666
667
668
    return 0.0;
}

669
670
void CudaCalcCustomNonbondedForceKernel::updateGlobalParams(ContextImpl& context) {
    bool changed = false;
671
    for (int i = 0; i < (int) globalParamNames.size(); i++) {
672
673
674
675
676
677
        float value = (float) context.getParameter(globalParamNames[i]);
        if (value != globalParamValues[i])
            changed = true;
        globalParamValues[i] = value;
    }
    if (changed)
Peter Eastman's avatar
Peter Eastman committed
678
        SetCustomNonbondedGlobalParams(globalParamValues);
679
680
}

681
CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
682
683
}

684
void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
685

Peter Eastman's avatar
Peter Eastman committed
686
    int numParticles = system.getNumParticles();
687
    _gpuContext* gpu = data.gpu;
Peter Eastman's avatar
Peter Eastman committed
688
689
    vector<float> radius(numParticles);
    vector<float> scale(numParticles);
690
    vector<float> charge(numParticles);
Peter Eastman's avatar
Peter Eastman committed
691
    for (int i = 0; i < numParticles; i++) {
692
693
        double particleCharge, particleRadius, scalingFactor;
        force.getParticleParameters(i, particleCharge, particleRadius, scalingFactor);
Peter Eastman's avatar
Peter Eastman committed
694
        radius[i] = (float) particleRadius;
695
        scale[i] = (float) scalingFactor;
696
        charge[i] = (float) particleCharge;
697
    }
698
    gpuSetObcParameters(gpu, (float) force.getSoluteDielectric(), (float) force.getSolventDielectric(), radius, scale, charge);
699
700
}

701
702
double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
	return 0.0;
703
704
}

Mark Friedrichs's avatar
Mark Friedrichs committed
705
706
707
708
709
710
711
712
713
714
715
716
717
718
CudaCalcGBVIForceKernel::~CudaCalcGBVIForceKernel() {
}

void CudaCalcGBVIForceKernel::initialize(const System& system, const GBVIForce& force, const std::vector<double> & inputScaledRadii) {

    int numParticles = system.getNumParticles();
    _gpuContext* gpu = data.gpu;

    vector<int> particle(numParticles);
    vector<float> radius(numParticles);
    vector<float> scaledRadii(numParticles);
    vector<float> gammas(numParticles);

    for (int i = 0; i < numParticles; i++) {
719
        double charge, particleRadius, gamma;
Mark Friedrichs's avatar
Mark Friedrichs committed
720
721
722
723
724
725
726
727
728
729
        force.getParticleParameters(i, charge, particleRadius, gamma );
        particle[i]                  = i;
        radius[i]                    = (float) particleRadius;
        gammas[i]                    = (float) gamma;
        scaledRadii[i]               = (float) inputScaledRadii[i];
    }
    gpuSetGBVIParameters(gpu, (float) force.getSoluteDielectric(), (float) force.getSolventDielectric(), particle,
                         radius, gammas, scaledRadii );
}

730
double CudaCalcGBVIForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
Mark Friedrichs's avatar
Mark Friedrichs committed
731
732
733
    return 0.0;
}

734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() {
}

void CudaCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
    numParticles = force.getNumParticles();
    vector<int> particle(numParticles);
    vector<vector<double> > params(numParticles);
    for (int i = 0; i < numParticles; i++)
        force.getParticleParameters(i, particle[i], params[i]);
    vector<string> paramNames;
    for (int i = 0; i < force.getNumPerParticleParameters(); i++)
        paramNames.push_back(force.getPerParticleParameterName(i));
    globalParamNames.resize(force.getNumGlobalParameters());
    globalParamValues.resize(force.getNumGlobalParameters());
    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
        globalParamNames[i] = force.getGlobalParameterName(i);
        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
    }
    gpuSetCustomExternalParameters(data.gpu, particle, params, force.getEnergyFunction(), paramNames, globalParamNames);
    if (globalParamValues.size() > 0)
Peter Eastman's avatar
Peter Eastman committed
754
        SetCustomExternalGlobalParams(globalParamValues);
755
756
}

757
double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
758
759
760
761
762
763
764
765
766
767
768
769
770
771
    updateGlobalParams(context);
    kCalculateCustomExternalForces(data.gpu);
    return 0.0;
}

void CudaCalcCustomExternalForceKernel::updateGlobalParams(ContextImpl& context) {
    bool changed = false;
    for (int i = 0; i < (int) globalParamNames.size(); i++) {
        float value = (float) context.getParameter(globalParamNames[i]);
        if (value != globalParamValues[i])
            changed = true;
        globalParamValues[i] = value;
    }
    if (changed)
Peter Eastman's avatar
Peter Eastman committed
772
        SetCustomExternalGlobalParams(globalParamValues);
773
774
}

775
void OPENMMCUDA_EXPORT OpenMM::cudaOpenMMInitializeIntegration(const System& system, CudaPlatform::PlatformData& data, const Integrator& integrator) {
776
777
778
779
780
781
782
783
784
785
786
787
788
789

    // Initialize any terms that haven't already been handled by a Force.

    _gpuContext* gpu = data.gpu;
    if (!data.hasBonds)
        gpuSetBondParameters(gpu, vector<int>(), vector<int>(), vector<float>(), vector<float>());
    if (!data.hasAngles)
        gpuSetBondAngleParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>());
    if (!data.hasPeriodicTorsions)
        gpuSetDihedralParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>(), vector<int>());
    if (!data.hasRB)
        gpuSetRbDihedralParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>(),
                vector<float>(), vector<float>(), vector<float>(), vector<float>());
    if (!data.hasNonbonded) {
790
791
        gpuSetCoulombParameters(gpu, (float) ONE_4PI_EPS0, vector<int>(), vector<float>(), vector<float>(), vector<float>(), vector<char>(), vector<vector<int> >(), NO_CUTOFF);
        gpuSetLJ14Parameters(gpu, (float) ONE_4PI_EPS0, 1.0f, vector<int>(), vector<int>(), vector<float>(), vector<float>(), vector<float>(), vector<float>());
792
793
        if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
            throw OpenMMException("CudaPlatform requires GBSAOBCForce and GBVIForce to be used with a NonbondedForce");
794
    }
795
796
797
    
    // Set masses.
    
Peter Eastman's avatar
Peter Eastman committed
798
799
800
801
    int numParticles = system.getNumParticles();
    vector<float> mass(numParticles);
    for (int i = 0; i < numParticles; i++)
        mass[i] = (float) system.getParticleMass(i);
802
803
804
805
    gpuSetMass(gpu, mass);
    
    // Set constraints.
    
806
    int numConstraints = system.getNumConstraints();
Peter Eastman's avatar
Peter Eastman committed
807
808
    vector<int> particle1(numConstraints);
    vector<int> particle2(numConstraints);
809
810
811
812
    vector<float> distance(numConstraints);
    vector<float> invMass1(numConstraints);
    vector<float> invMass2(numConstraints);
    for (int i = 0; i < numConstraints; i++) {
Peter Eastman's avatar
Peter Eastman committed
813
        int particle1Index, particle2Index;
814
        double constraintDistance;
Peter Eastman's avatar
Peter Eastman committed
815
816
817
        system.getConstraintParameters(i, particle1Index, particle2Index, constraintDistance);
        particle1[i] = particle1Index;
        particle2[i] = particle2Index;
818
        distance[i] = (float) constraintDistance;
Peter Eastman's avatar
Peter Eastman committed
819
820
        invMass1[i] = 1.0f/mass[particle1Index];
        invMass2[i] = 1.0f/mass[particle2Index];
821
    }
822
    gpuSetConstraintParameters(gpu, particle1, particle2, distance, invMass1, invMass2, (float)integrator.getConstraintTolerance());
823
824
    
    // Finish initialization.
825

826
827
828
    gpuBuildThreadBlockWorkList(gpu);
    gpuBuildExclusionList(gpu);
    gpuBuildOutputBuffers(gpu);
829
    gpuSetConstants(gpu);
830
831
832
833
    if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
        kClearBornSumAndForces(gpu);
    else
        kClearForces(gpu);
834
    cudaThreadSynchronize();
835
836
837
838
839
840
}

CudaIntegrateVerletStepKernel::~CudaIntegrateVerletStepKernel() {
}

void CudaIntegrateVerletStepKernel::initialize(const System& system, const VerletIntegrator& integrator) {
841
    cudaOpenMMInitializeIntegration(system, data, integrator);
842
843
844
    prevStepSize = -1.0;
}

845
void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIntegrator& integrator) {
846
847
848
849
850
    _gpuContext* gpu = data.gpu;
    double stepSize = integrator.getStepSize();
    if (stepSize != prevStepSize) {
        // Initialize the GPU parameters.
        
851
        gpuSetVerletIntegrationParameters(gpu, (float) stepSize, 0.0f);
852
853
854
855
856
        gpuSetConstants(gpu);
        prevStepSize = stepSize;
    }
    kVerletUpdatePart1(gpu);
    kApplyFirstShake(gpu);
857
    kApplyFirstSettle(gpu);
858
    kApplyFirstCCMA(gpu);
859
860
    if (data.removeCM)
        if (data.stepCount%data.cmMotionFrequency == 0)
861
862
            gpu->bCalculateCM = true;
    kVerletUpdatePart2(gpu);
863
    data.time += stepSize;
864
    data.stepCount++;
865
866
867
868
869
870
}

CudaIntegrateLangevinStepKernel::~CudaIntegrateLangevinStepKernel() {
}

void CudaIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) {
871
    cudaOpenMMInitializeIntegration(system, data, integrator);
872
873
874
    _gpuContext* gpu = data.gpu;
    gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
    gpuInitializeRandoms(gpu);
875
876
    prevTemp = -1.0;
    prevFriction = -1.0;
877
878
879
    prevStepSize = -1.0;
}

880
void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const LangevinIntegrator& integrator) {
881
    _gpuContext* gpu = data.gpu;
882
883
884
    double temperature = integrator.getTemperature();
    double friction = integrator.getFriction();
    double stepSize = integrator.getStepSize();
885
886
    if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
        // Initialize the GPU parameters.
887
        
888
        double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
889
        gpuSetLangevinIntegrationParameters(gpu, (float) tau, (float) stepSize, (float) temperature, 0.0f);
890
891
892
893
894
895
        gpuSetConstants(gpu);
        kGenerateRandoms(gpu);
        prevTemp = temperature;
        prevFriction = friction;
        prevStepSize = stepSize;
    }
896
    kLangevinUpdatePart1(gpu);
897
898
    if (data.removeCM)
        if (data.stepCount%data.cmMotionFrequency == 0)
899
            gpu->bCalculateCM = true;
900
    kLangevinUpdatePart2(gpu);
901
    kApplySecondShake(gpu);
902
    kApplySecondSettle(gpu);
903
    kApplySecondCCMA(gpu);
904
    kSetVelocitiesFromPositions(gpu);
905
    data.time += stepSize;
906
    data.stepCount++;
907
}
908
909
910
911
912

CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() {
}

void CudaIntegrateBrownianStepKernel::initialize(const System& system, const BrownianIntegrator& integrator) {
913
    cudaOpenMMInitializeIntegration(system, data, integrator);
914
915
916
    _gpuContext* gpu = data.gpu;
    gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
    gpuInitializeRandoms(gpu);
917
918
    prevTemp = -1.0;
    prevFriction = -1.0;
919
920
921
    prevStepSize = -1.0;
}

922
void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const BrownianIntegrator& integrator) {
923
924
925
926
927
928
929
930
    _gpuContext* gpu = data.gpu;
    double temperature = integrator.getTemperature();
    double friction = integrator.getFriction();
    double stepSize = integrator.getStepSize();
    if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
        // Initialize the GPU parameters.
        
        double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
931
        gpuSetBrownianIntegrationParameters(gpu, (float) tau, (float) stepSize, (float) temperature);
932
933
934
935
936
937
938
939
        gpuSetConstants(gpu);
        kGenerateRandoms(gpu);
        prevTemp = temperature;
        prevFriction = friction;
        prevStepSize = stepSize;
    }
    kBrownianUpdatePart1(gpu);
    kApplyFirstShake(gpu);
940
    kApplyFirstSettle(gpu);
941
    kApplyFirstCCMA(gpu);
942
943
    if (data.removeCM)
        if (data.stepCount%data.cmMotionFrequency == 0)
944
945
            gpu->bCalculateCM = true;
    kBrownianUpdatePart2(gpu);
946
    data.time += stepSize;
947
948
949
950
951
952
953
    data.stepCount++;
}

CudaIntegrateVariableVerletStepKernel::~CudaIntegrateVariableVerletStepKernel() {
}

void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, const VariableVerletIntegrator& integrator) {
954
    cudaOpenMMInitializeIntegration(system, data, integrator);
955
956
957
    prevErrorTol = -1.0;
}

958
void CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime) {
959
960
961
962
963
964
965
966
967
    _gpuContext* gpu = data.gpu;
    double errorTol = integrator.getErrorTolerance();
    if (errorTol != prevErrorTol) {
        // Initialize the GPU parameters.

        gpuSetVerletIntegrationParameters(gpu, 0.0f, (float) errorTol);
        gpuSetConstants(gpu);
        prevErrorTol = errorTol;
    }
968
    float maxStepSize = (float)(maxTime-data.time);
969
    kSelectVerletStepSize(gpu, maxStepSize);
970
971
972
973
974
975
976
977
978
979
    kVerletUpdatePart1(gpu);
    kApplyFirstShake(gpu);
    kApplyFirstSettle(gpu);
    kApplyFirstCCMA(gpu);
    if (data.removeCM)
        if (data.stepCount%data.cmMotionFrequency == 0)
            gpu->bCalculateCM = true;
    kVerletUpdatePart2(gpu);
    gpu->psStepSize->Download();
    data.time += (*gpu->psStepSize)[0].y;
980
981
    if ((*gpu->psStepSize)[0].y == maxStepSize)
        data.time = maxTime; // Avoid round-off error
982
    data.stepCount++;
983
}
984

985
986
987
988
CudaIntegrateVariableLangevinStepKernel::~CudaIntegrateVariableLangevinStepKernel() {
}

void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) {
989
    cudaOpenMMInitializeIntegration(system, data, integrator);
990
991
992
    _gpuContext* gpu = data.gpu;
    gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
    gpuInitializeRandoms(gpu);
993
994
    prevTemp = -1.0;
    prevFriction = -1.0;
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
    prevErrorTol = -1.0;
}

void CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
    _gpuContext* gpu = data.gpu;
    double temperature = integrator.getTemperature();
    double friction = integrator.getFriction();
    double errorTol = integrator.getErrorTolerance();
    if (temperature != prevTemp || friction != prevFriction || errorTol != prevErrorTol) {
        // Initialize the GPU parameters.

        double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
1007
        gpuSetLangevinIntegrationParameters(gpu, (float) tau, 0.0f, (float) temperature, (float) errorTol);
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
        gpuSetConstants(gpu);
        kGenerateRandoms(gpu);
        prevTemp = temperature;
        prevFriction = friction;
        prevErrorTol = errorTol;
    }
    float maxStepSize = (float)(maxTime-data.time);
    kSelectLangevinStepSize(gpu, maxStepSize);
    kLangevinUpdatePart1(gpu);
    if (data.removeCM)
        if (data.stepCount%data.cmMotionFrequency == 0)
            gpu->bCalculateCM = true;
    kLangevinUpdatePart2(gpu);
    kApplySecondShake(gpu);
    kApplySecondSettle(gpu);
    kApplySecondCCMA(gpu);
1024
    kSetVelocitiesFromPositions(gpu);
1025
1026
1027
1028
1029
1030
1031
    gpu->psStepSize->Download();
    data.time += (*gpu->psStepSize)[0].y;
    if ((*gpu->psStepSize)[0].y == maxStepSize)
        data.time = maxTime; // Avoid round-off error
    data.stepCount++;
}

1032
1033
1034
1035
CudaApplyAndersenThermostatKernel::~CudaApplyAndersenThermostatKernel() {
}

void CudaApplyAndersenThermostatKernel::initialize(const System& system, const AndersenThermostat& thermostat) {
1036
1037
1038
    _gpuContext* gpu = data.gpu;
    gpu->seed = (unsigned long) thermostat.getRandomNumberSeed();
    gpuInitializeRandoms(gpu);
1039
1040
    prevTemp = -1.0;
    prevFrequency = -1.0;
1041
1042
1043
    prevStepSize = -1.0;
}

1044
void CudaApplyAndersenThermostatKernel::execute(ContextImpl& context) {
1045
    _gpuContext* gpu = data.gpu;
1046
1047
    double temperature = context.getParameter(AndersenThermostat::Temperature());
    double frequency = context.getParameter(AndersenThermostat::CollisionFrequency());
1048
1049
1050
1051
    double stepSize = context.getIntegrator().getStepSize();
    if (temperature != prevTemp || frequency != prevFrequency || stepSize != prevStepSize) {
        // Initialize the GPU parameters.
        
1052
        gpuSetAndersenThermostatParameters(gpu, (float) temperature, (float) frequency);
1053
1054
1055
1056
1057
1058
1059
1060
        gpuSetConstants(gpu);
        kGenerateRandoms(gpu);
        prevTemp = temperature;
        prevFrequency = frequency;
        prevStepSize = stepSize;
    }
    kCalculateAndersenThermostat(gpu);
}
1061

1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
CudaApplyMonteCarloBarostatKernel::~CudaApplyMonteCarloBarostatKernel() {
    if (moleculeAtoms != NULL)
        delete moleculeAtoms;
    if (moleculeStartIndex != NULL)
        delete moleculeStartIndex;
}

void CudaApplyMonteCarloBarostatKernel::initialize(const System& system, const MonteCarloBarostat& thermostat) {
}

void CudaApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, double scale) {
    if (!hasInitializedMolecules) {
        hasInitializedMolecules = true;

        // Create the arrays with the molecule definitions.

        vector<vector<int> > molecules = context.getMolecules();
        numMolecules = molecules.size();
        moleculeAtoms = new CUDAStream<int>(context.getSystem().getNumParticles(), 1, "moleculeAtoms");
        moleculeStartIndex = new CUDAStream<int>(numMolecules+1, 1, "moleculeStartIndex");
        int index = 0;
        for (int i = 0; i < numMolecules; i++) {
            (*moleculeStartIndex)[i] = index;
            for (int j = 0; j < (int) molecules[i].size(); j++)
                (*moleculeAtoms)[index++] = molecules[i][j];
        }
        (*moleculeStartIndex)[numMolecules] = index;
        moleculeAtoms->Upload();
        moleculeStartIndex->Upload();
    }
    _gpuContext* gpu = data.gpu;
    gpu->psPosqP4->CopyFrom(*gpu->psPosq4);
    kScaleAtomCoordinates(gpu, scale, *moleculeAtoms, *moleculeStartIndex);
1095
1096
    for (int i = 0; i < (int) gpu->posCellOffsets.size(); i++)
        gpu->posCellOffsets[i] = make_int3(0, 0, 0);
1097
1098
1099
1100
1101
1102
1103
}

void CudaApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
    _gpuContext* gpu = data.gpu;
    gpu->psPosq4->CopyFrom(*gpu->psPosqP4);
}

1104
void CudaCalcKineticEnergyKernel::initialize(const System& system) {
Peter Eastman's avatar
Peter Eastman committed
1105
1106
    int numParticles = system.getNumParticles();
    masses.resize(numParticles);
1107
    for (int i = 0; i < numParticles; ++i)
Peter Eastman's avatar
Peter Eastman committed
1108
        masses[i] = system.getParticleMass(i);
1109
1110
}

1111
double CudaCalcKineticEnergyKernel::execute(ContextImpl& context) {
1112
1113
1114
    // We don't currently have a GPU kernel to do this, so we retrieve the velocities and calculate the energy
    // on the CPU.
    
1115
1116
    _gpuContext* gpu = data.gpu;
    gpu->psVelm4->Download();
1117
    double energy = 0.0;
1118
1119
1120
1121
    for (int i = 0; i < (int) masses.size(); ++i) {
        float4 v = (*gpu->psVelm4)[i];
        energy += masses[i]*(v.x*v.x+v.y*v.y+v.z*v.z);
    }
1122
1123
    return 0.5*energy;
}
1124

1125
void CudaRemoveCMMotionKernel::initialize(const System& system, const CMMotionRemover& force) {
1126
    data.removeCM = true;
1127
    data.cmMotionFrequency = force.getFrequency();
1128
1129
}

1130
void CudaRemoveCMMotionKernel::execute(ContextImpl& context) {
1131
}