CudaKernels.cpp 45.2 KB
Newer Older
1
2
3
4
5
6
7
8
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
9
 * Portions copyright (c) 2008-2009 Stanford University and the Authors.      *
10
11
12
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
13
14
15
16
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
17
 *                                                                            *
18
19
20
21
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
22
 *                                                                            *
23
24
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
25
26
27
 * -------------------------------------------------------------------------- */

#include "CudaKernels.h"
28
#include "openmm/LangevinIntegrator.h"
29
#include "openmm/Context.h"
30
#include "openmm/OpenMMException.h"
31
#include "openmm/internal/CMAPTorsionForceImpl.h"
32
#include "openmm/internal/ContextImpl.h"
33
#include "openmm/internal/NonbondedForceImpl.h"
34
#include "kernels/gputypes.h"
35
#include "kernels/cudaKernels.h"
36
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
37
38
#include <cmath>

39
extern "C" int OPENMMCUDA_EXPORT gpuSetConstants( gpuContext gpu );
40
41
42
43

using namespace OpenMM;
using namespace std;

44
45
46
void CudaCalcForcesAndEnergyKernel::initialize(const System& system) {
}

47
void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy) {
48
    _gpuContext* gpu = data.gpu;
49
    if (data.nonbondedMethod != NO_CUTOFF && data.computeForceCount%100 == 0)
50
        gpuReorderAtoms(gpu);
Peter Eastman's avatar
Peter Eastman committed
51
    data.computeForceCount++;
52
53
    if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
        kClearBornSumAndForces(gpu);
54
    else if (includeForces)
55
        kClearForces(gpu);
Peter Eastman's avatar
Peter Eastman committed
56
    if (includeEnergy)
57
        kClearEnergy(gpu);
58
59
}

60
double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy) {
61
    _gpuContext* gpu = data.gpu;
Mark Friedrichs's avatar
Mark Friedrichs committed
62
    if (gpu->bIncludeGBSA || gpu->bIncludeGBVI) {
63
64
65
        gpu->bRecalculateBornRadii = true;
        kCalculateCDLJObcGbsaForces1(gpu);
        kReduceObcGbsaBornForces(gpu);
Mark Friedrichs's avatar
Mark Friedrichs committed
66
67
68
69
70
        if (gpu->bIncludeGBSA ) {
           kCalculateObcGbsaForces2(gpu);
        } else {
           kCalculateGBVIForces2(gpu);
        }
71
    }
72
73
74
75
76
    else if (data.hasNonbonded)
        kCalculateCDLJForces(gpu);
    if (data.hasCustomNonbonded)
        kCalculateCustomNonbondedForces(gpu, data.hasNonbonded);
    kCalculateLocalForces(gpu);
77
78
79
80
81
82
83
84
    if (includeForces)
        kReduceForces(gpu);
    double energy = 0.0;
    if (includeEnergy) {
        energy = kReduceEnergy(gpu)+data.ewaldSelfEnergy;
        if (data.dispersionCoefficient != 0.0)
            energy += data.dispersionCoefficient/(gpu->sim.periodicBoxSizeX*gpu->sim.periodicBoxSizeY*gpu->sim.periodicBoxSizeZ);
    }
85
    return energy;
86
87
}

88
void CudaUpdateStateDataKernel::initialize(const System& system) {
89
90
}

91
double CudaUpdateStateDataKernel::getTime(const ContextImpl& context) const {
92
93
94
    return data.time;
}

95
void CudaUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
96
97
98
    data.time = time;
}

99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, std::vector<Vec3>& positions) {
    _gpuContext* gpu = data.gpu;
    gpu->psPosq4->Download();
    int* order = gpu->psAtomIndex->_pSysData;
    int numParticles = context.getSystem().getNumParticles();
    positions.resize(numParticles);
    for (int i = 0; i < numParticles; ++i) {
        float4 pos = (*gpu->psPosq4)[i];
        int3 offset = gpu->posCellOffsets[i];
        positions[order[i]] = Vec3(pos.x-offset.x*gpu->sim.periodicBoxSizeX, pos.y-offset.y*gpu->sim.periodicBoxSizeY, pos.z-offset.z*gpu->sim.periodicBoxSizeZ);
    }
}

void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const std::vector<Vec3>& positions) {
    _gpuContext* gpu = data.gpu;
    int* order = gpu->psAtomIndex->_pSysData;
    int numParticles = context.getSystem().getNumParticles();
    for (int i = 0; i < numParticles; ++i) {
        float4& pos = (*gpu->psPosq4)[i];
        const Vec3& p = positions[order[i]];
119
120
121
        pos.x = (float) p[0];
        pos.y = (float) p[1];
        pos.z = (float) p[2];
122
123
    }
    gpu->psPosq4->Upload();
124
    for (int i = 0; i < (int) gpu->posCellOffsets.size(); i++)
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
        gpu->posCellOffsets[i] = make_int3(0, 0, 0);
}

void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, std::vector<Vec3>& velocities) {
    _gpuContext* gpu = data.gpu;
    gpu->psVelm4->Download();
    int* order = gpu->psAtomIndex->_pSysData;
    int numParticles = context.getSystem().getNumParticles();
    velocities.resize(numParticles);
    for (int i = 0; i < numParticles; ++i) {
        float4 vel = (*gpu->psVelm4)[i];
        velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
    }
}

void CudaUpdateStateDataKernel::setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities) {
    _gpuContext* gpu = data.gpu;
    int* order = gpu->psAtomIndex->_pSysData;
    int numParticles = context.getSystem().getNumParticles();
    for (int i = 0; i < numParticles; ++i) {
        float4& vel = (*gpu->psVelm4)[i];
        const Vec3& v = velocities[order[i]];
147
148
149
        vel.x = (float) v[0];
        vel.y = (float) v[1];
        vel.z = (float) v[2];
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
    }
    gpu->psVelm4->Upload();
}

void CudaUpdateStateDataKernel::getForces(ContextImpl& context, std::vector<Vec3>& forces) {
    _gpuContext* gpu = data.gpu;
    int* order = gpu->psAtomIndex->_pSysData;
    gpu->psForce4->Download();
    int numParticles = context.getSystem().getNumParticles();
    forces.resize(numParticles);
    for (int i = 0; i < numParticles; ++i) {
        float4 force = (*gpu->psForce4)[i];
        forces[order[i]] = Vec3(force.x, force.y, force.z);
    }
}

166
167
168
169
170
171
172
173
174
175
176
177
178
void CudaUpdateStateDataKernel::getPeriodicBoxVectors(ContextImpl& context, Vec3& a, Vec3& b, Vec3& c) const {
    _gpuContext* gpu = data.gpu;
    a = Vec3(gpu->sim.periodicBoxSizeX, 0, 0);
    b = Vec3(0, gpu->sim.periodicBoxSizeY, 0);
    c = Vec3(0, 0, gpu->sim.periodicBoxSizeZ);
}

void CudaUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) const {
    _gpuContext* gpu = data.gpu;
    gpuSetPeriodicBoxSize(gpu, a[0], b[1], c[2]);
    gpuSetConstants(gpu);
}

179
180
181
182
183
184
185
void CudaApplyConstraintsKernel::initialize(const System& system) {
}

void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
    kApplyConstraints(data.gpu);
}

186
187
188
189
190
CudaCalcHarmonicBondForceKernel::~CudaCalcHarmonicBondForceKernel() {
}

void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
    data.hasBonds = true;
191
    numBonds = force.getNumBonds();
Peter Eastman's avatar
Peter Eastman committed
192
193
    vector<int> particle1(numBonds);
    vector<int> particle2(numBonds);
194
195
196
197
    vector<float> length(numBonds);
    vector<float> k(numBonds);
    for (int i = 0; i < numBonds; i++) {
        double lengthValue, kValue;
Peter Eastman's avatar
Peter Eastman committed
198
        force.getBondParameters(i, particle1[i], particle2[i], lengthValue, kValue);
199
200
201
        length[i] = (float) lengthValue;
        k[i] = (float) kValue;
    }
Peter Eastman's avatar
Peter Eastman committed
202
    gpuSetBondParameters(data.gpu, particle1, particle2, length, k);
203
204
}

205
double CudaCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
206
207
208
    return 0.0;
}

209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() {
}

void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
    numBonds = force.getNumBonds();
    vector<int> particle1(numBonds);
    vector<int> particle2(numBonds);
    vector<vector<double> > params(numBonds);
    for (int i = 0; i < numBonds; i++)
        force.getBondParameters(i, particle1[i], particle2[i], params[i]);
    vector<string> paramNames;
    for (int i = 0; i < force.getNumPerBondParameters(); i++)
        paramNames.push_back(force.getPerBondParameterName(i));
    globalParamNames.resize(force.getNumGlobalParameters());
    globalParamValues.resize(force.getNumGlobalParameters());
    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
        globalParamNames[i] = force.getGlobalParameterName(i);
        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
    }
    gpuSetCustomBondParameters(data.gpu, particle1, particle2, params, force.getEnergyFunction(), paramNames, globalParamNames);
    if (globalParamValues.size() > 0)
Peter Eastman's avatar
Peter Eastman committed
230
        SetCustomBondGlobalParams(globalParamValues);
231
232
}

233
double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
234
235
236
237
238
239
240
241
242
243
244
245
246
247
    updateGlobalParams(context);
    kCalculateCustomBondForces(data.gpu);
    return 0.0;
}

void CudaCalcCustomBondForceKernel::updateGlobalParams(ContextImpl& context) {
    bool changed = false;
    for (int i = 0; i < (int) globalParamNames.size(); i++) {
        float value = (float) context.getParameter(globalParamNames[i]);
        if (value != globalParamValues[i])
            changed = true;
        globalParamValues[i] = value;
    }
    if (changed)
Peter Eastman's avatar
Peter Eastman committed
248
        SetCustomBondGlobalParams(globalParamValues);
249
250
}

251
252
253
254
255
CudaCalcHarmonicAngleForceKernel::~CudaCalcHarmonicAngleForceKernel() {
}

void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
    data.hasAngles = true;
256
    numAngles = force.getNumAngles();
257
    const float RadiansToDegrees = (float) (180.0/3.14159265);
Peter Eastman's avatar
Peter Eastman committed
258
259
260
    vector<int> particle1(numAngles);
    vector<int> particle2(numAngles);
    vector<int> particle3(numAngles);
261
262
263
264
    vector<float> angle(numAngles);
    vector<float> k(numAngles);
    for (int i = 0; i < numAngles; i++) {
        double angleValue, kValue;
Peter Eastman's avatar
Peter Eastman committed
265
        force.getAngleParameters(i, particle1[i], particle2[i], particle3[i], angleValue, kValue);
266
267
        angle[i] = (float) (angleValue*RadiansToDegrees);
        k[i] = (float) kValue;
268
    }
Peter Eastman's avatar
Peter Eastman committed
269
    gpuSetBondAngleParameters(data.gpu, particle1, particle2, particle3, angle, k);
270
}
271

272
double CudaCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
273
274
275
    return 0.0;
}

276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() {
}

void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
    numAngles = force.getNumAngles();
    vector<int> particle1(numAngles);
    vector<int> particle2(numAngles);
    vector<int> particle3(numAngles);
    vector<vector<double> > params(numAngles);
    for (int i = 0; i < numAngles; i++)
        force.getAngleParameters(i, particle1[i], particle2[i], particle3[i], params[i]);
    vector<string> paramNames;
    for (int i = 0; i < force.getNumPerAngleParameters(); i++)
        paramNames.push_back(force.getPerAngleParameterName(i));
    globalParamNames.resize(force.getNumGlobalParameters());
    globalParamValues.resize(force.getNumGlobalParameters());
    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
        globalParamNames[i] = force.getGlobalParameterName(i);
        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
    }
    gpuSetCustomAngleParameters(data.gpu, particle1, particle2, particle3, params, force.getEnergyFunction(), paramNames, globalParamNames);
    if (globalParamValues.size() > 0)
        SetCustomAngleGlobalParams(globalParamValues);
}

301
double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
    updateGlobalParams(context);
    kCalculateCustomAngleForces(data.gpu);
    return 0.0;
}

void CudaCalcCustomAngleForceKernel::updateGlobalParams(ContextImpl& context) {
    bool changed = false;
    for (int i = 0; i < (int) globalParamNames.size(); i++) {
        float value = (float) context.getParameter(globalParamNames[i]);
        if (value != globalParamValues[i])
            changed = true;
        globalParamValues[i] = value;
    }
    if (changed)
        SetCustomAngleGlobalParams(globalParamValues);
}

319
320
321
322
323
324
CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() {
}

void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
    data.hasPeriodicTorsions = true;
    numTorsions = force.getNumTorsions();
325
    const float RadiansToDegrees = (float)(180.0/3.14159265);
Peter Eastman's avatar
Peter Eastman committed
326
327
328
329
    vector<int> particle1(numTorsions);
    vector<int> particle2(numTorsions);
    vector<int> particle3(numTorsions);
    vector<int> particle4(numTorsions);
330
331
332
333
334
    vector<float> k(numTorsions);
    vector<float> phase(numTorsions);
    vector<int> periodicity(numTorsions);
    for (int i = 0; i < numTorsions; i++) {
        double kValue, phaseValue;
Peter Eastman's avatar
Peter Eastman committed
335
        force.getTorsionParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], periodicity[i], phaseValue, kValue);
336
337
        k[i] = (float) kValue;
        phase[i] = (float) (phaseValue*RadiansToDegrees);
338
    }
Peter Eastman's avatar
Peter Eastman committed
339
    gpuSetDihedralParameters(data.gpu, particle1, particle2, particle3, particle4, k, phase, periodicity);
340
341
}

342
double CudaCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
343
344
345
346
347
348
349
350
351
    return 0.0;
}

CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() {
}

void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
    data.hasRB = true;
    numTorsions = force.getNumTorsions();
Peter Eastman's avatar
Peter Eastman committed
352
353
354
355
    vector<int> particle1(numTorsions);
    vector<int> particle2(numTorsions);
    vector<int> particle3(numTorsions);
    vector<int> particle4(numTorsions);
356
357
358
359
360
361
362
363
    vector<float> c0(numTorsions);
    vector<float> c1(numTorsions);
    vector<float> c2(numTorsions);
    vector<float> c3(numTorsions);
    vector<float> c4(numTorsions);
    vector<float> c5(numTorsions);
    for (int i = 0; i < numTorsions; i++) {
        double c[6];
Peter Eastman's avatar
Peter Eastman committed
364
        force.getTorsionParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], c[0], c[1], c[2], c[3], c[4], c[5]);
365
366
367
368
369
370
        c0[i] = (float) c[0];
        c1[i] = (float) c[1];
        c2[i] = (float) c[2];
        c3[i] = (float) c[3];
        c4[i] = (float) c[4];
        c5[i] = (float) c[5];
371
    }
Peter Eastman's avatar
Peter Eastman committed
372
    gpuSetRbDihedralParameters(data.gpu, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
373
374
}

375
double CudaCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
376
377
378
    return 0.0;
}

379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
CudaCalcCMAPTorsionForceKernel::~CudaCalcCMAPTorsionForceKernel() {
    if (coefficients != NULL)
        delete coefficients;
    if (mapPositions != NULL)
        delete mapPositions;
    if (torsionMaps != NULL)
        delete torsionMaps;
    if (torsionIndices != NULL)
        delete torsionIndices;
}

void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
    numTorsions = force.getNumTorsions();
    if (numTorsions == 0)
        return;
    int numMaps = force.getNumMaps();
    vector<float4> coeffVec;
    vector<int2> mapPositionsVec(numMaps);
    vector<double> energy;
    vector<vector<double> > c;
    int currentPosition = 0;
    mapPositions = new CUDAStream<int2>(numMaps, 1, "cmapTorsionMapPositions");
    for (int i = 0; i < numMaps; i++) {
        int size;
        force.getMapParameters(i, size, energy);
        CMAPTorsionForceImpl::calcMapDerivatives(size, energy, c);
        (*mapPositions)[i] = make_int2(currentPosition, size);
        currentPosition += 4*size*size;
        for (int j = 0; j < size*size; j++) {
            coeffVec.push_back(make_float4(c[j][0], c[j][1], c[j][2], c[j][3]));
            coeffVec.push_back(make_float4(c[j][4], c[j][5], c[j][6], c[j][7]));
            coeffVec.push_back(make_float4(c[j][8], c[j][9], c[j][10], c[j][11]));
            coeffVec.push_back(make_float4(c[j][12], c[j][13], c[j][14], c[j][15]));
        }
    }
    coefficients = new CUDAStream<float4>((int) coeffVec.size(), 1, "cmapTorsionCoefficients");;
    for (int i = 0; i < (int) coeffVec.size(); i++)
        (*coefficients)[i] = coeffVec[i];
    torsionMaps = new CUDAStream<int>(numTorsions, 1, "cmapTorsionMaps");
    torsionIndices = new CUDAStream<int4>(4*numTorsions, 1, "cmapTorsionIndices");
    vector<int> forceBufferCounter(system.getNumParticles(), 0);
    for (int i = 0; i < numTorsions; i++) {
        int map, a1, a2, a3, a4, b1, b2, b3, b4;
        force.getTorsionParameters(i, map, a1, a2, a3, a4, b1, b2, b3, b4);
        (*torsionMaps)[i] = map;
        (*torsionIndices)[i*4] = make_int4(a1, a2, a3, a4);
        (*torsionIndices)[i*4+1] = make_int4(b1, b2, b3, b4);
        (*torsionIndices)[i*4+2] = make_int4(forceBufferCounter[a1]++, forceBufferCounter[a2]++, forceBufferCounter[a3]++, forceBufferCounter[a4]++);
        (*torsionIndices)[i*4+3] = make_int4(forceBufferCounter[b1]++, forceBufferCounter[b2]++, forceBufferCounter[b3]++, forceBufferCounter[b4]++);
    }
    coefficients->Upload();
    mapPositions->Upload();
    torsionMaps->Upload();
    torsionIndices->Upload();
    int maxBuffers = 1;
    for (int i = 0; i < (int) forceBufferCounter.size(); i++)
        maxBuffers = max(maxBuffers, forceBufferCounter[i]);
    if (maxBuffers > data.gpu->sim.outputBuffers)
        data.gpu->sim.outputBuffers = maxBuffers;
}

440
double CudaCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
441
442
443
444
    kCalculateCMAPTorsionForces(data.gpu, *coefficients, *mapPositions, *torsionIndices, *torsionMaps);
    return 0.0;
}

445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() {
}

void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
    numTorsions = force.getNumTorsions();
    vector<int> particle1(numTorsions);
    vector<int> particle2(numTorsions);
    vector<int> particle3(numTorsions);
    vector<int> particle4(numTorsions);
    vector<vector<double> > params(numTorsions);
    for (int i = 0; i < numTorsions; i++)
        force.getTorsionParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], params[i]);
    vector<string> paramNames;
    for (int i = 0; i < force.getNumPerTorsionParameters(); i++)
        paramNames.push_back(force.getPerTorsionParameterName(i));
    globalParamNames.resize(force.getNumGlobalParameters());
    globalParamValues.resize(force.getNumGlobalParameters());
    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
        globalParamNames[i] = force.getGlobalParameterName(i);
        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
    }
    gpuSetCustomTorsionParameters(data.gpu, particle1, particle2, particle3, particle4, params, force.getEnergyFunction(), paramNames, globalParamNames);
    if (globalParamValues.size() > 0)
        SetCustomTorsionGlobalParams(globalParamValues);
}

471
double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
    updateGlobalParams(context);
    kCalculateCustomTorsionForces(data.gpu);
    return 0.0;
}

void CudaCalcCustomTorsionForceKernel::updateGlobalParams(ContextImpl& context) {
    bool changed = false;
    for (int i = 0; i < (int) globalParamNames.size(); i++) {
        float value = (float) context.getParameter(globalParamNames[i]);
        if (value != globalParamValues[i])
            changed = true;
        globalParamValues[i] = value;
    }
    if (changed)
        SetCustomTorsionGlobalParams(globalParamValues);
}

489
490
491
CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
}

492
void CudaCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
493
    data.hasNonbonded = true;
Peter Eastman's avatar
Peter Eastman committed
494
    numParticles = force.getNumParticles();
495
    _gpuContext* gpu = data.gpu;
496
497
498
499

    // Identify which exceptions are 1-4 interactions.

    vector<pair<int, int> > exclusions;
500
    vector<int> exceptions;
501
502
503
504
505
506
    for (int i = 0; i < force.getNumExceptions(); i++) {
        int particle1, particle2;
        double chargeProd, sigma, epsilon;
        force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
        exclusions.push_back(pair<int, int>(particle1, particle2));
        if (chargeProd != 0.0 || epsilon != 0.0)
507
            exceptions.push_back(i);
508
509
    }

510
511
    // Initialize nonbonded interactions.
    
512
    {
Peter Eastman's avatar
Peter Eastman committed
513
514
515
516
        vector<int> particle(numParticles);
        vector<float> c6(numParticles);
        vector<float> c12(numParticles);
        vector<float> q(numParticles);
517
        vector<char> symbol;
Peter Eastman's avatar
Peter Eastman committed
518
519
        vector<vector<int> > exclusionList(numParticles);
        for (int i = 0; i < numParticles; i++) {
520
            double charge, radius, depth;
Peter Eastman's avatar
Peter Eastman committed
521
522
            force.getParticleParameters(i, charge, radius, depth);
            particle[i] = i;
523
524
525
            q[i] = (float) charge;
            c6[i] = (float) (4*depth*pow(radius, 6.0));
            c12[i] = (float) (4*depth*pow(radius, 12.0));
526
527
            exclusionList[i].push_back(i);
        }
528
        for (int i = 0; i < (int)exclusions.size(); i++) {
529
530
531
            exclusionList[exclusions[i].first].push_back(exclusions[i].second);
            exclusionList[exclusions[i].second].push_back(exclusions[i].first);
        }
532
533
        CudaNonbondedMethod method = NO_CUTOFF;
        if (force.getNonbondedMethod() != NonbondedForce::NoCutoff) {
534
            gpuSetNonbondedCutoff(gpu, (float) force.getCutoffDistance(), (float) force.getReactionFieldDielectric());
535
536
537
538
539
            method = CUTOFF;
        }
        if (force.getNonbondedMethod() == NonbondedForce::CutoffPeriodic) {
            method = PERIODIC;
        }
540
541
        if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
            if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
542
543
544
                double alpha;
                int kmaxx, kmaxy, kmaxz;
                NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz);
545
546
547
548
                gpuSetEwaldParameters(gpu, (float) alpha, kmaxx, kmaxy, kmaxz);
                method = EWALD;
            }
            else {
549
550
551
                double alpha;
                int gridSizeX, gridSizeY, gridSizeZ;
                NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
Peter Eastman's avatar
Peter Eastman committed
552
                gpuSetPMEParameters(gpu, (float) alpha, gridSizeX, gridSizeY, gridSizeZ);
553
554
                method = PARTICLE_MESH_EWALD;
            }
555
        }
556
        data.nonbondedMethod = method;
557
        gpuSetCoulombParameters(gpu, (float) ONE_4PI_EPS0, particle, c6, c12, q, symbol, exclusionList, method);
558
559
560
561

        // Compute the Ewald self energy.

        data.ewaldSelfEnergy = 0.0;
562
563
564
565
566
        if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
            double selfEnergyScale = gpu->sim.epsfac*gpu->sim.alphaEwald/std::sqrt(PI);
                for (int i = 0; i < numParticles; i++)
                    data.ewaldSelfEnergy -= selfEnergyScale*q[i]*q[i];
        }
567
568
569
570
571
572
573

        // Compute the long range dispersion correction.

        if (force.getUseDispersionCorrection())
            data.dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
        else
            data.dispersionCoefficient = 0.0;
574
575
576
577
    }

    // Initialize 1-4 nonbonded interactions.
    
578
    {
579
580
581
582
583
584
585
586
        int numExceptions = exceptions.size();
        vector<int> particle1(numExceptions);
        vector<int> particle2(numExceptions);
        vector<float> c6(numExceptions);
        vector<float> c12(numExceptions);
        vector<float> q1(numExceptions);
        vector<float> q2(numExceptions);
        for (int i = 0; i < numExceptions; i++) {
587
            double charge, sig, eps;
588
            force.getExceptionParameters(exceptions[i], particle1[i], particle2[i], charge, sig, eps);
589
590
            c6[i] = (float) (4*eps*pow(sig, 6.0));
            c12[i] = (float) (4*eps*pow(sig, 12.0));
Peter Eastman's avatar
Peter Eastman committed
591
592
            q1[i] = (float) charge;
            q2[i] = 1.0f;
593
        }
594
        gpuSetLJ14Parameters(gpu, (float) ONE_4PI_EPS0, 1.0f, particle1, particle2, c6, c12, q1, q2);
595
596
597
    }
}

598
double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
599
    return 0.0;
600
601
}

602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
}

void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
    data.hasCustomNonbonded = true;
    numParticles = force.getNumParticles();
    _gpuContext* gpu = data.gpu;

    // Initialize nonbonded interactions.

    vector<int> particle(numParticles);
    vector<vector<double> > parameters(numParticles);
    vector<vector<int> > exclusionList(numParticles);
    for (int i = 0; i < numParticles; i++) {
        force.getParticleParameters(i, parameters[i]);
        particle[i] = i;
        exclusionList[i].push_back(i);
    }
620
621
622
623
624
    for (int i = 0; i < force.getNumExclusions(); i++) {
        int particle1, particle2;
        force.getExclusionParticles(i, particle1, particle2);
        exclusionList[particle1].push_back(particle2);
        exclusionList[particle2].push_back(particle1);
625
626
627
628
629
630
631
632
633
    }
    CudaNonbondedMethod method = NO_CUTOFF;
    if (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff)
        method = CUTOFF;
    if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic) {
        method = PERIODIC;
    }
    data.customNonbondedMethod = method;

634
635
636
637
638
639
640
641
642
643
644
    // Record the tabulated functions.

    for (int i = 0; i < force.getNumFunctions(); i++) {
        string name;
        vector<double> values;
        double min, max;
        bool interpolating;
        force.getFunctionParameters(i, name, values, min, max, interpolating);
        gpuSetTabulatedFunction(gpu, i, name, values, min, max, interpolating);
    }

645
646
647
    // Record information for the expressions.

    vector<string> paramNames;
648
649
    for (int i = 0; i < force.getNumPerParticleParameters(); i++)
        paramNames.push_back(force.getPerParticleParameterName(i));
650
651
652
653
    globalParamNames.resize(force.getNumGlobalParameters());
    globalParamValues.resize(force.getNumGlobalParameters());
    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
        globalParamNames[i] = force.getGlobalParameterName(i);
654
        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
655
    }
656
    gpuSetCustomNonbondedParameters(gpu, parameters, exclusionList, method, (float) force.getCutoffDistance(), force.getEnergyFunction(), paramNames, globalParamNames);
657
    if (globalParamValues.size() > 0)
Peter Eastman's avatar
Peter Eastman committed
658
        SetCustomNonbondedGlobalParams(globalParamValues);
659
660
}

661
double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
662
    updateGlobalParams(context);
663
664
665
    return 0.0;
}

666
667
void CudaCalcCustomNonbondedForceKernel::updateGlobalParams(ContextImpl& context) {
    bool changed = false;
668
    for (int i = 0; i < (int) globalParamNames.size(); i++) {
669
670
671
672
673
674
        float value = (float) context.getParameter(globalParamNames[i]);
        if (value != globalParamValues[i])
            changed = true;
        globalParamValues[i] = value;
    }
    if (changed)
Peter Eastman's avatar
Peter Eastman committed
675
        SetCustomNonbondedGlobalParams(globalParamValues);
676
677
}

678
CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
679
680
}

681
void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
682

Peter Eastman's avatar
Peter Eastman committed
683
    int numParticles = system.getNumParticles();
684
    _gpuContext* gpu = data.gpu;
Peter Eastman's avatar
Peter Eastman committed
685
686
    vector<float> radius(numParticles);
    vector<float> scale(numParticles);
687
    vector<float> charge(numParticles);
Peter Eastman's avatar
Peter Eastman committed
688
    for (int i = 0; i < numParticles; i++) {
689
690
        double particleCharge, particleRadius, scalingFactor;
        force.getParticleParameters(i, particleCharge, particleRadius, scalingFactor);
Peter Eastman's avatar
Peter Eastman committed
691
        radius[i] = (float) particleRadius;
692
        scale[i] = (float) scalingFactor;
693
        charge[i] = (float) particleCharge;
694
    }
695
    gpuSetObcParameters(gpu, (float) force.getSoluteDielectric(), (float) force.getSolventDielectric(), radius, scale, charge);
696
697
}

698
699
double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
	return 0.0;
700
701
}

Mark Friedrichs's avatar
Mark Friedrichs committed
702
703
704
705
706
707
708
709
710
711
712
713
714
715
CudaCalcGBVIForceKernel::~CudaCalcGBVIForceKernel() {
}

void CudaCalcGBVIForceKernel::initialize(const System& system, const GBVIForce& force, const std::vector<double> & inputScaledRadii) {

    int numParticles = system.getNumParticles();
    _gpuContext* gpu = data.gpu;

    vector<int> particle(numParticles);
    vector<float> radius(numParticles);
    vector<float> scaledRadii(numParticles);
    vector<float> gammas(numParticles);

    for (int i = 0; i < numParticles; i++) {
716
        double charge, particleRadius, gamma;
Mark Friedrichs's avatar
Mark Friedrichs committed
717
718
719
720
721
722
723
724
725
726
        force.getParticleParameters(i, charge, particleRadius, gamma );
        particle[i]                  = i;
        radius[i]                    = (float) particleRadius;
        gammas[i]                    = (float) gamma;
        scaledRadii[i]               = (float) inputScaledRadii[i];
    }
    gpuSetGBVIParameters(gpu, (float) force.getSoluteDielectric(), (float) force.getSolventDielectric(), particle,
                         radius, gammas, scaledRadii );
}

727
double CudaCalcGBVIForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
Mark Friedrichs's avatar
Mark Friedrichs committed
728
729
730
    return 0.0;
}

731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() {
}

void CudaCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
    numParticles = force.getNumParticles();
    vector<int> particle(numParticles);
    vector<vector<double> > params(numParticles);
    for (int i = 0; i < numParticles; i++)
        force.getParticleParameters(i, particle[i], params[i]);
    vector<string> paramNames;
    for (int i = 0; i < force.getNumPerParticleParameters(); i++)
        paramNames.push_back(force.getPerParticleParameterName(i));
    globalParamNames.resize(force.getNumGlobalParameters());
    globalParamValues.resize(force.getNumGlobalParameters());
    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
        globalParamNames[i] = force.getGlobalParameterName(i);
        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
    }
    gpuSetCustomExternalParameters(data.gpu, particle, params, force.getEnergyFunction(), paramNames, globalParamNames);
    if (globalParamValues.size() > 0)
Peter Eastman's avatar
Peter Eastman committed
751
        SetCustomExternalGlobalParams(globalParamValues);
752
753
}

754
double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
755
756
757
758
759
760
761
762
763
764
765
766
767
768
    updateGlobalParams(context);
    kCalculateCustomExternalForces(data.gpu);
    return 0.0;
}

void CudaCalcCustomExternalForceKernel::updateGlobalParams(ContextImpl& context) {
    bool changed = false;
    for (int i = 0; i < (int) globalParamNames.size(); i++) {
        float value = (float) context.getParameter(globalParamNames[i]);
        if (value != globalParamValues[i])
            changed = true;
        globalParamValues[i] = value;
    }
    if (changed)
Peter Eastman's avatar
Peter Eastman committed
769
        SetCustomExternalGlobalParams(globalParamValues);
770
771
}

772
void OPENMMCUDA_EXPORT OpenMM::cudaOpenMMInitializeIntegration(const System& system, CudaPlatform::PlatformData& data, const Integrator& integrator) {
773
774
775
776
777
778
779
780
781
782
783
784
785
786

    // Initialize any terms that haven't already been handled by a Force.

    _gpuContext* gpu = data.gpu;
    if (!data.hasBonds)
        gpuSetBondParameters(gpu, vector<int>(), vector<int>(), vector<float>(), vector<float>());
    if (!data.hasAngles)
        gpuSetBondAngleParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>());
    if (!data.hasPeriodicTorsions)
        gpuSetDihedralParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>(), vector<int>());
    if (!data.hasRB)
        gpuSetRbDihedralParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>(),
                vector<float>(), vector<float>(), vector<float>(), vector<float>());
    if (!data.hasNonbonded) {
787
788
        gpuSetCoulombParameters(gpu, (float) ONE_4PI_EPS0, vector<int>(), vector<float>(), vector<float>(), vector<float>(), vector<char>(), vector<vector<int> >(), NO_CUTOFF);
        gpuSetLJ14Parameters(gpu, (float) ONE_4PI_EPS0, 1.0f, vector<int>(), vector<int>(), vector<float>(), vector<float>(), vector<float>(), vector<float>());
789
790
        if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
            throw OpenMMException("CudaPlatform requires GBSAOBCForce and GBVIForce to be used with a NonbondedForce");
791
    }
792
793
794
    
    // Set masses.
    
Peter Eastman's avatar
Peter Eastman committed
795
796
797
798
    int numParticles = system.getNumParticles();
    vector<float> mass(numParticles);
    for (int i = 0; i < numParticles; i++)
        mass[i] = (float) system.getParticleMass(i);
799
800
801
802
    gpuSetMass(gpu, mass);
    
    // Set constraints.
    
803
    int numConstraints = system.getNumConstraints();
Peter Eastman's avatar
Peter Eastman committed
804
805
    vector<int> particle1(numConstraints);
    vector<int> particle2(numConstraints);
806
807
808
809
    vector<float> distance(numConstraints);
    vector<float> invMass1(numConstraints);
    vector<float> invMass2(numConstraints);
    for (int i = 0; i < numConstraints; i++) {
Peter Eastman's avatar
Peter Eastman committed
810
        int particle1Index, particle2Index;
811
        double constraintDistance;
Peter Eastman's avatar
Peter Eastman committed
812
813
814
        system.getConstraintParameters(i, particle1Index, particle2Index, constraintDistance);
        particle1[i] = particle1Index;
        particle2[i] = particle2Index;
815
        distance[i] = (float) constraintDistance;
Peter Eastman's avatar
Peter Eastman committed
816
817
        invMass1[i] = 1.0f/mass[particle1Index];
        invMass2[i] = 1.0f/mass[particle2Index];
818
    }
819
    gpuSetConstraintParameters(gpu, particle1, particle2, distance, invMass1, invMass2, (float)integrator.getConstraintTolerance());
820
821
    
    // Finish initialization.
822

823
824
825
    gpuBuildThreadBlockWorkList(gpu);
    gpuBuildExclusionList(gpu);
    gpuBuildOutputBuffers(gpu);
826
    gpuSetConstants(gpu);
827
828
829
830
    if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
        kClearBornSumAndForces(gpu);
    else
        kClearForces(gpu);
831
    cudaThreadSynchronize();
832
833
834
835
836
837
}

CudaIntegrateVerletStepKernel::~CudaIntegrateVerletStepKernel() {
}

void CudaIntegrateVerletStepKernel::initialize(const System& system, const VerletIntegrator& integrator) {
838
    cudaOpenMMInitializeIntegration(system, data, integrator);
839
840
841
    prevStepSize = -1.0;
}

842
void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIntegrator& integrator) {
843
844
845
846
847
    _gpuContext* gpu = data.gpu;
    double stepSize = integrator.getStepSize();
    if (stepSize != prevStepSize) {
        // Initialize the GPU parameters.
        
848
        gpuSetVerletIntegrationParameters(gpu, (float) stepSize, 0.0f);
849
850
851
852
853
        gpuSetConstants(gpu);
        prevStepSize = stepSize;
    }
    kVerletUpdatePart1(gpu);
    kApplyFirstShake(gpu);
854
    kApplyFirstSettle(gpu);
855
    kApplyFirstCCMA(gpu);
856
857
    if (data.removeCM)
        if (data.stepCount%data.cmMotionFrequency == 0)
858
859
            gpu->bCalculateCM = true;
    kVerletUpdatePart2(gpu);
860
    data.time += stepSize;
861
    data.stepCount++;
862
863
864
865
866
867
}

CudaIntegrateLangevinStepKernel::~CudaIntegrateLangevinStepKernel() {
}

void CudaIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) {
868
    cudaOpenMMInitializeIntegration(system, data, integrator);
869
870
871
    _gpuContext* gpu = data.gpu;
    gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
    gpuInitializeRandoms(gpu);
872
873
    prevTemp = -1.0;
    prevFriction = -1.0;
874
875
876
    prevStepSize = -1.0;
}

877
void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const LangevinIntegrator& integrator) {
878
    _gpuContext* gpu = data.gpu;
879
880
881
    double temperature = integrator.getTemperature();
    double friction = integrator.getFriction();
    double stepSize = integrator.getStepSize();
882
883
    if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
        // Initialize the GPU parameters.
884
        
885
        double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
886
        gpuSetLangevinIntegrationParameters(gpu, (float) tau, (float) stepSize, (float) temperature, 0.0f);
887
888
889
890
891
892
        gpuSetConstants(gpu);
        kGenerateRandoms(gpu);
        prevTemp = temperature;
        prevFriction = friction;
        prevStepSize = stepSize;
    }
893
    kLangevinUpdatePart1(gpu);
894
895
    if (data.removeCM)
        if (data.stepCount%data.cmMotionFrequency == 0)
896
            gpu->bCalculateCM = true;
897
    kLangevinUpdatePart2(gpu);
898
    kApplySecondShake(gpu);
899
    kApplySecondSettle(gpu);
900
    kApplySecondCCMA(gpu);
901
    kSetVelocitiesFromPositions(gpu);
902
    data.time += stepSize;
903
    data.stepCount++;
904
}
905
906
907
908
909

CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() {
}

void CudaIntegrateBrownianStepKernel::initialize(const System& system, const BrownianIntegrator& integrator) {
910
    cudaOpenMMInitializeIntegration(system, data, integrator);
911
912
913
    _gpuContext* gpu = data.gpu;
    gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
    gpuInitializeRandoms(gpu);
914
915
    prevTemp = -1.0;
    prevFriction = -1.0;
916
917
918
    prevStepSize = -1.0;
}

919
void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const BrownianIntegrator& integrator) {
920
921
922
923
924
925
926
927
    _gpuContext* gpu = data.gpu;
    double temperature = integrator.getTemperature();
    double friction = integrator.getFriction();
    double stepSize = integrator.getStepSize();
    if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
        // Initialize the GPU parameters.
        
        double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
928
        gpuSetBrownianIntegrationParameters(gpu, (float) tau, (float) stepSize, (float) temperature);
929
930
931
932
933
934
935
936
        gpuSetConstants(gpu);
        kGenerateRandoms(gpu);
        prevTemp = temperature;
        prevFriction = friction;
        prevStepSize = stepSize;
    }
    kBrownianUpdatePart1(gpu);
    kApplyFirstShake(gpu);
937
    kApplyFirstSettle(gpu);
938
    kApplyFirstCCMA(gpu);
939
940
    if (data.removeCM)
        if (data.stepCount%data.cmMotionFrequency == 0)
941
942
            gpu->bCalculateCM = true;
    kBrownianUpdatePart2(gpu);
943
    data.time += stepSize;
944
945
946
947
948
949
950
    data.stepCount++;
}

CudaIntegrateVariableVerletStepKernel::~CudaIntegrateVariableVerletStepKernel() {
}

void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, const VariableVerletIntegrator& integrator) {
951
    cudaOpenMMInitializeIntegration(system, data, integrator);
952
953
954
    prevErrorTol = -1.0;
}

955
void CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime) {
956
957
958
959
960
961
962
963
964
    _gpuContext* gpu = data.gpu;
    double errorTol = integrator.getErrorTolerance();
    if (errorTol != prevErrorTol) {
        // Initialize the GPU parameters.

        gpuSetVerletIntegrationParameters(gpu, 0.0f, (float) errorTol);
        gpuSetConstants(gpu);
        prevErrorTol = errorTol;
    }
965
    float maxStepSize = (float)(maxTime-data.time);
966
    kSelectVerletStepSize(gpu, maxStepSize);
967
968
969
970
971
972
973
974
975
976
    kVerletUpdatePart1(gpu);
    kApplyFirstShake(gpu);
    kApplyFirstSettle(gpu);
    kApplyFirstCCMA(gpu);
    if (data.removeCM)
        if (data.stepCount%data.cmMotionFrequency == 0)
            gpu->bCalculateCM = true;
    kVerletUpdatePart2(gpu);
    gpu->psStepSize->Download();
    data.time += (*gpu->psStepSize)[0].y;
977
978
    if ((*gpu->psStepSize)[0].y == maxStepSize)
        data.time = maxTime; // Avoid round-off error
979
    data.stepCount++;
980
}
981

982
983
984
985
CudaIntegrateVariableLangevinStepKernel::~CudaIntegrateVariableLangevinStepKernel() {
}

void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) {
986
    cudaOpenMMInitializeIntegration(system, data, integrator);
987
988
989
    _gpuContext* gpu = data.gpu;
    gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
    gpuInitializeRandoms(gpu);
990
991
    prevTemp = -1.0;
    prevFriction = -1.0;
992
993
994
995
996
997
998
999
1000
1001
1002
1003
    prevErrorTol = -1.0;
}

void CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
    _gpuContext* gpu = data.gpu;
    double temperature = integrator.getTemperature();
    double friction = integrator.getFriction();
    double errorTol = integrator.getErrorTolerance();
    if (temperature != prevTemp || friction != prevFriction || errorTol != prevErrorTol) {
        // Initialize the GPU parameters.

        double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
1004
        gpuSetLangevinIntegrationParameters(gpu, (float) tau, 0.0f, (float) temperature, (float) errorTol);
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
        gpuSetConstants(gpu);
        kGenerateRandoms(gpu);
        prevTemp = temperature;
        prevFriction = friction;
        prevErrorTol = errorTol;
    }
    float maxStepSize = (float)(maxTime-data.time);
    kSelectLangevinStepSize(gpu, maxStepSize);
    kLangevinUpdatePart1(gpu);
    if (data.removeCM)
        if (data.stepCount%data.cmMotionFrequency == 0)
            gpu->bCalculateCM = true;
    kLangevinUpdatePart2(gpu);
    kApplySecondShake(gpu);
    kApplySecondSettle(gpu);
    kApplySecondCCMA(gpu);
1021
    kSetVelocitiesFromPositions(gpu);
1022
1023
1024
1025
1026
1027
1028
    gpu->psStepSize->Download();
    data.time += (*gpu->psStepSize)[0].y;
    if ((*gpu->psStepSize)[0].y == maxStepSize)
        data.time = maxTime; // Avoid round-off error
    data.stepCount++;
}

1029
1030
1031
1032
CudaApplyAndersenThermostatKernel::~CudaApplyAndersenThermostatKernel() {
}

void CudaApplyAndersenThermostatKernel::initialize(const System& system, const AndersenThermostat& thermostat) {
1033
1034
1035
    _gpuContext* gpu = data.gpu;
    gpu->seed = (unsigned long) thermostat.getRandomNumberSeed();
    gpuInitializeRandoms(gpu);
1036
1037
    prevTemp = -1.0;
    prevFrequency = -1.0;
1038
1039
1040
    prevStepSize = -1.0;
}

1041
void CudaApplyAndersenThermostatKernel::execute(ContextImpl& context) {
1042
    _gpuContext* gpu = data.gpu;
1043
1044
    double temperature = context.getParameter(AndersenThermostat::Temperature());
    double frequency = context.getParameter(AndersenThermostat::CollisionFrequency());
1045
1046
1047
1048
    double stepSize = context.getIntegrator().getStepSize();
    if (temperature != prevTemp || frequency != prevFrequency || stepSize != prevStepSize) {
        // Initialize the GPU parameters.
        
1049
        gpuSetAndersenThermostatParameters(gpu, (float) temperature, (float) frequency);
1050
1051
1052
1053
1054
1055
1056
1057
        gpuSetConstants(gpu);
        kGenerateRandoms(gpu);
        prevTemp = temperature;
        prevFrequency = frequency;
        prevStepSize = stepSize;
    }
    kCalculateAndersenThermostat(gpu);
}
1058

1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
CudaApplyMonteCarloBarostatKernel::~CudaApplyMonteCarloBarostatKernel() {
    if (moleculeAtoms != NULL)
        delete moleculeAtoms;
    if (moleculeStartIndex != NULL)
        delete moleculeStartIndex;
}

void CudaApplyMonteCarloBarostatKernel::initialize(const System& system, const MonteCarloBarostat& thermostat) {
}

void CudaApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, double scale) {
    if (!hasInitializedMolecules) {
        hasInitializedMolecules = true;

        // Create the arrays with the molecule definitions.

        vector<vector<int> > molecules = context.getMolecules();
        numMolecules = molecules.size();
        moleculeAtoms = new CUDAStream<int>(context.getSystem().getNumParticles(), 1, "moleculeAtoms");
        moleculeStartIndex = new CUDAStream<int>(numMolecules+1, 1, "moleculeStartIndex");
        int index = 0;
        for (int i = 0; i < numMolecules; i++) {
            (*moleculeStartIndex)[i] = index;
            for (int j = 0; j < (int) molecules[i].size(); j++)
                (*moleculeAtoms)[index++] = molecules[i][j];
        }
        (*moleculeStartIndex)[numMolecules] = index;
        moleculeAtoms->Upload();
        moleculeStartIndex->Upload();
    }
    _gpuContext* gpu = data.gpu;
    gpu->psPosqP4->CopyFrom(*gpu->psPosq4);
    kScaleAtomCoordinates(gpu, scale, *moleculeAtoms, *moleculeStartIndex);
1092
1093
    for (int i = 0; i < (int) gpu->posCellOffsets.size(); i++)
        gpu->posCellOffsets[i] = make_int3(0, 0, 0);
1094
1095
1096
1097
1098
1099
1100
}

void CudaApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
    _gpuContext* gpu = data.gpu;
    gpu->psPosq4->CopyFrom(*gpu->psPosqP4);
}

1101
void CudaCalcKineticEnergyKernel::initialize(const System& system) {
Peter Eastman's avatar
Peter Eastman committed
1102
1103
    int numParticles = system.getNumParticles();
    masses.resize(numParticles);
1104
    for (int i = 0; i < numParticles; ++i)
Peter Eastman's avatar
Peter Eastman committed
1105
        masses[i] = system.getParticleMass(i);
1106
1107
}

1108
double CudaCalcKineticEnergyKernel::execute(ContextImpl& context) {
1109
1110
1111
    // We don't currently have a GPU kernel to do this, so we retrieve the velocities and calculate the energy
    // on the CPU.
    
1112
1113
    _gpuContext* gpu = data.gpu;
    gpu->psVelm4->Download();
1114
    double energy = 0.0;
1115
1116
1117
1118
    for (int i = 0; i < (int) masses.size(); ++i) {
        float4 v = (*gpu->psVelm4)[i];
        energy += masses[i]*(v.x*v.x+v.y*v.y+v.z*v.z);
    }
1119
1120
    return 0.5*energy;
}
1121

1122
void CudaRemoveCMMotionKernel::initialize(const System& system, const CMMotionRemover& force) {
1123
    data.removeCM = true;
1124
    data.cmMotionFrequency = force.getFrequency();
1125
1126
}

1127
void CudaRemoveCMMotionKernel::execute(ContextImpl& context) {
1128
}