CudaContext.h 26.2 KB
Newer Older
1
2
3
4
5
6
#ifndef OPENMM_CUDACONTEXT_H_
#define OPENMM_CUDACONTEXT_H_

/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
Evan Pretti's avatar
Evan Pretti committed
7
8
 * This is part of the OpenMM molecular simulation toolkit.                   *
 * See https://openmm.org/development.                                        *
9
 *                                                                            *
10
 * Portions copyright (c) 2009-2026 Stanford University and the Authors.      *
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

#include <map>
#include <string>
30
#include <utility>
31
32
33
34
35
36
37
38
#define __CL_ENABLE_EXCEPTIONS
#ifdef _MSC_VER
    // Prevent Windows from defining macros that interfere with other code.
    #define NOMINMAX
#endif
#include <cuda.h>
#include <builtin_types.h>
#include <vector_functions.h>
39
#include "openmm/common/windowsExportCommon.h"
Peter Eastman's avatar
Peter Eastman committed
40
#include "CudaArray.h"
41
42
43
44
#include "CudaBondedUtilities.h"
#include "CudaExpressionUtilities.h"
#include "CudaIntegrationUtilities.h"
#include "CudaNonbondedUtilities.h"
45
#include "CudaPlatform.h"
46
#include "CudaQueue.h"
47
48
#include "openmm/OpenMMException.h"
#include "openmm/common/ComputeContext.h"
49
#include "openmm/Kernel.h"
50

51
52
typedef unsigned int tileflags;

53
54
55
56
57
58
59
60
61
62
63
64
65
namespace OpenMM {

/**
 * This class contains the information associated with a Context by the CUDA Platform.  Each CudaContext is
 * specific to a particular device, and manages data structures and kernels for that device.  When running a simulation
 * in parallel on multiple devices, there is a separate CudaContext for each one.  The list of all contexts is
 * stored in the CudaPlatform::PlatformData.
 * <p>
 * In addition, a worker thread is created for each CudaContext.  This is used for parallel computations, so that
 * blocking calls to one device will not block other devices.  When only a single device is being used, the worker
 * thread is not used and calculations are performed on the main application thread.
 */

66
class OPENMM_EXPORT_COMMON CudaContext : public ComputeContext {
67
68
69
70
public:
    class WorkTask;
    class WorkThread;
    class ReorderListener;
71
72
    class ForcePreComputation;
    class ForcePostComputation;
73
74
75
    static const int ThreadBlockSize;
    static const int TileSize;
    CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const std::string& precision,
76
            const std::string& tempDir, CudaPlatform::PlatformData& platformData, CudaContext* originalContext);
77
    ~CudaContext();
78
79
80
81
82
    /**
     * This is called to initialize internal data structures after all Forces in the system
     * have been initialized.
     */
    void initialize();
83
84
85
86
87
88
    /**
     * Get the CUcontext associated with this object.
     */
    CUcontext getContext() {
        return context;
    }
89
90
91
92
93
94
95
96
97
98
99
    /**
     * Get whether the CUcontext associated with this object is currently a valid contex.
     */
    bool getContextIsValid() const {
        return contextIsValid;
    }
    /**
     * Set the CUcontext associated with this object to be the current context.  If the context is not
     * valid, this returns without doing anything.
     */
    void setAsCurrent();
100
101
102
103
104
105
106
107
108
109
    /**
     * Push the CUcontext associated with this object to be the current context.  If the context is not
     * valid, this returns without doing anything.
     */
    void pushAsCurrent();
    /**
     * Pop the CUcontext associated with this object off the stack of contexts.  If the context is not
     * valid, this returns without doing anything.
     */
    void popAsCurrent();
110
111
112
113
114
115
    /**
     * Get the CUdevice associated with this object.
     */
    CUdevice getDevice() {
        return device;
    }
116
117
118
119
120
121
    /**
     * Get the compute capability of the device associated with this object.
     */
    double getComputeCapability() const {
        return computeCapability;
    }
122
123
124
    /**
     * Get the index of the CUdevice associated with this object.
     */
125
    int getDeviceIndex() const {
126
127
128
129
130
131
132
133
        return deviceIndex;
    }
    /**
     * Get the PlatformData object this context is part of.
     */
    CudaPlatform::PlatformData& getPlatformData() {
        return platformData;
    }
134
135
136
137
138
139
140
141
    /**
     * Get the number of contexts being used for the current simulation.
     * This is relevant when a simulation is parallelized across multiple devices.  In that case,
     * one CudaContext is created for each device.
     */
    int getNumContexts() const {
        return platformData.contexts.size();
    }
142
143
144
145
146
147
    /**
     * Get the index of this context in the list stored in the PlatformData.
     */
    int getContextIndex() const {
        return contextIndex;
    }
148
149
150
151
152
153
    /**
     * Get a list of all contexts being used for the current simulation.
     * This is relevant when a simulation is parallelized across multiple devices.  In that case,
     * one ComputeContext is created for each device.
     */
    std::vector<ComputeContext*> getAllContexts();
154
155
156
    /**
     * Get the ContextImpl is ComputeContext is associated with.
     */
157
158
    ContextImpl* getContextImpl() {
        return platformData.context;
159
    }
Peter Eastman's avatar
Peter Eastman committed
160
161
162
163
164
    /**
     * Get a workspace used for accumulating energy when a simulation is parallelized across
     * multiple devices.
     */
    double& getEnergyWorkspace();
165
    /**
166
     * Create a new ComputeQueue for use with this context.
167
     */
168
    ComputeQueue createQueue();
169
    /**
170
     * Get the stream currently being used for execution.
171
     */
172
    CUstream getCurrentStream();
173
174
175
176
177
178
179
180
181
    /**
     * Construct an uninitialized array of the appropriate class for this platform.  The returned
     * value should be created on the heap with the "new" operator.
     */
    CudaArray* createArray();
    /**
     * Construct a ComputeEvent object of the appropriate class for this platform.
     */
    ComputeEvent createEvent();
182
183
184
185
186
187
188
189
190
191
192
193
194
    /**
     * Construct a ComputeSort object of the appropriate class for this platform.
     * 
     * @param trait      a SortTrait defining the type of data to sort.  It should have been allocated
     *                   on the heap with the "new" operator.  This object takes over ownership of it,
     *                   and deletes it when the ComputeSort is deleted.
     * @param length     the length of the arrays this object will be used to sort
     * @param uniform    whether the input data is expected to follow a uniform or nonuniform
     *                   distribution.  This argument is used only as a hint.  It allows parts
     *                   of the algorithm to be tuned for faster performance on the expected
     *                   distribution.
     */
    ComputeSort createSort(ComputeSortImpl::SortTrait* trait, unsigned int length, bool uniform=true);
195
196
197
198
199
200
201
202
203
204
205
206
207
    /**
     * Compile source code to create a ComputeProgram.
     *
     * @param source             the source code of the program
     * @param defines            a set of preprocessor definitions (name, value) to define when compiling the program
     */
    ComputeProgram compileProgram(const std::string source, const std::map<std::string, std::string>& defines=std::map<std::string, std::string>());
    /**
     * Convert an array to an CudaArray.  If the argument is already an CudaArray, this simply casts it.
     * If the argument is a ComputeArray that wraps a CudaArray, this returns the wrapped array.  For any
     * other argument, this throws an exception.
     */
    CudaArray& unwrap(ArrayInterface& array) const;
208
209
210
211
    /**
     * Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
     */
    CudaArray& getPosq() {
Peter Eastman's avatar
Peter Eastman committed
212
        return posq;
213
    }
214
215
216
217
    /**
     * Get the array which contains a correction to the position of each atom.  This only exists if getUseMixedPrecision() returns true.
     */
    CudaArray& getPosqCorrection() {
Peter Eastman's avatar
Peter Eastman committed
218
        return posqCorrection;
219
    }
220
221
222
223
    /**
     * Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom.
     */
    CudaArray& getVelm() {
Peter Eastman's avatar
Peter Eastman committed
224
        return velm;
225
    }
226
    /**
227
     * Get the array which contains the force on each atom (represented as three long longs in 64 bit fixed point).
228
229
     */
    CudaArray& getForce() {
Peter Eastman's avatar
Peter Eastman committed
230
        return force;
231
    }
232
233
234
235
236
237
    /**
     * The CUDA platform does not use floating point force buffers, so this throws an exception.
     */
    ArrayInterface& getFloatForceBuffer() {
        throw OpenMMException("CUDA platform does not use floating point force buffers");
    }
238
239
240
241
242
243
244
245
246
247
248
249
250
    /**
     * Get the array which contains a contribution to each force represented as 64 bit fixed point.
     * This is a synonym for getForce().  It exists to satisfy the ComputeContext interface.
     */
    CudaArray& getLongForceBuffer() {
        return force;
    }
    /**
     * All CUDA devices support 64 bit atomics, so this throws an exception.
     */
    ArrayInterface& getForceBuffers() {
        throw OpenMMException("CUDA platform does not use floating point force buffers");
    }
251
252
253
254
    /**
     * Get the array which contains the buffer in which energy is computed.
     */
    CudaArray& getEnergyBuffer() {
Peter Eastman's avatar
Peter Eastman committed
255
        return energyBuffer;
256
    }
257
258
259
260
    /**
     * Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
     */
    CudaArray& getEnergyParamDerivBuffer() {
Peter Eastman's avatar
Peter Eastman committed
261
        return energyParamDerivBuffer;
262
    }
263
264
265
266
267
268
269
270
    /**
     * Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
     * This is guaranteed to be at least as large as any of the arrays returned by methods of this class.
     */
    void* getPinnedBuffer() {
        return pinnedBuffer;
    }
    /**
271
272
273
274
275
     * Get a shared ThreadPool that code can use to parallelize operations.
     * 
     * Because this object is freely available to all code, care is needed to avoid conflicts.  Only use it
     * from the main thread, and make sure all operations are complete before you invoke any other code that
     * might make use of it
276
     */
277
278
    ThreadPool& getThreadPool() {
        return getPlatformData().threads;
279
280
281
282
283
    }
    /**
     * Get the array which contains the index of each atom.
     */
    CudaArray& getAtomIndexArray() {
Peter Eastman's avatar
Peter Eastman committed
284
        return atomIndexDevice;
285
    }
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
    /**
     * Create a CUDA module from source code.
     *
     * @param source             the source code of the module
     * @param optimizationFlags  the optimization flags to pass to the CUDA compiler.  If this is
     *                           omitted, a default set of options will be used
     */
    CUmodule createModule(const std::string source, const char* optimizationFlags = NULL);
    /**
     * Create a CUDA module from source code.
     *
     * @param source             the source code of the module
     * @param defines            a set of preprocessor definitions (name, value) to define when compiling the program
     * @param optimizationFlags  the optimization flags to pass to the CUDA compiler.  If this is
     *                           omitted, a default set of options will be used
     */
    CUmodule createModule(const std::string source, const std::map<std::string, std::string>& defines, const char* optimizationFlags = NULL);
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
    /**
     * Get a kernel from a CUDA module.
     *
     * @param module    the module to get the kernel from
     * @param name      the name of the kernel to get
     */
    CUfunction getKernel(CUmodule& module, const std::string& name);
    /**
     * Execute a kernel.
     *
     * @param kernel       the kernel to execute
     * @param arguments    an array of pointers to the kernel arguments
     * @param threads      the maximum number of threads that should be used
     * @param blockSize    the size of each thread block to use
     * @param sharedSize   the amount of dynamic shared memory to allocated for the kernel, in bytes
     */
    void executeKernel(CUfunction kernel, void** arguments, int workUnits, int blockSize = -1, unsigned int sharedSize = 0);
320
321
322
323
324
325
    /**
     * Compute the largest thread block size that can be used for a kernel that requires a particular amount of
     * shared memory per thread.
     * 
     * @param memory        the number of bytes of shared memory per thread
     */
326
    int computeThreadBlockSize(double memory) const;
327
328
329
    /**
     * Set all elements of an array to 0.
     */
330
    void clearBuffer(ArrayInterface& array);
331
332
333
334
    /**
     * Set all elements of an array to 0.
     *
     * @param memory     the memory to clear
335
     * @param size       the size of the buffer in bytes
336
337
     */
    void clearBuffer(CUdeviceptr memory, int size);
338
339
340
    /**
     * Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
     */
341
    void addAutoclearBuffer(ArrayInterface& array);
342
343
344
345
    /**
     * Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
     *
     * @param memory     the memory to clear
346
     * @param size       the size of the buffer in bytes
347
348
     */
    void addAutoclearBuffer(CUdeviceptr memory, int size);
349
350
351
352
    /**
     * Clear all buffers that have been registered with addAutoclearBuffer().
     */
    void clearAutoclearBuffers();
Peter Eastman's avatar
Peter Eastman committed
353
354
355
356
    /**
     * Sum the buffer containing energy.
     */
    double reduceEnergy();
357
    /**
358
     * Get the number of blocks of TileSize atoms.
359
     */
360
361
    int getNumAtomBlocks() const {
        return numAtomBlocks;
362
    }
363
    /**
364
     * Get the standard number of thread blocks to use when executing kernels.
365
     */
366
367
    int getNumThreadBlocks() const {
        return numThreadBlocks;
368
369
    }
    /**
370
     * Get the maximum number of threads in a thread block supported by this device.
371
     */
372
373
    int getMaxThreadBlockSize() const {
        return 1024;
374
    }
375
    /**
376
377
     * Get whether the device being used is a CPU.  In some cases, different algorithms
     * may be more efficient on CPUs and GPUs.
378
     */
379
380
    bool getIsCPU() const {
        return false;
381
382
    }
    /**
383
     * Get the SIMD width of the device being used.
384
     */
385
386
    int getSIMDWidth() const {
        return 32;
387
388
    }
    /**
389
     * Get whether the device being used supports 64 bit atomic operations on global memory.
390
     */
391
392
    bool getSupports64BitGlobalAtomics() const {
        return true;
393
394
    }
    /**
395
     * Get whether the device being used supports double precision math.
396
     */
397
398
    bool getSupportsDoublePrecision() const {
        return true;
399
400
401
402
    }
    /**
     * Get whether double precision is being used.
     */
403
    bool getUseDoublePrecision() const {
404
405
406
        return useDoublePrecision;
    }
    /**
407
     * Get whether mixed precision is being used.
408
     */
409
    bool getUseMixedPrecision() const {
410
        return useMixedPrecision;
411
    }
412
413
414
    /**
     * Get whether the periodic box is triclinic.
     */
415
    bool getBoxIsTriclinic() const {
416
417
        return boxIsTriclinic;
    }
418
419
420
    /**
     * Convert a CUDA result code to the corresponding string description.
     */
421
422
    static std::string getErrorString(CUresult result);
    /**
423
     * Get the vectors defining the periodic box.
424
     */
425
426
427
428
    void getPeriodicBoxVectors(Vec3& a, Vec3& b, Vec3& c) const {
        a = Vec3(periodicBoxVecX.x, periodicBoxVecX.y, periodicBoxVecX.z);
        b = Vec3(periodicBoxVecY.x, periodicBoxVecY.y, periodicBoxVecY.z);
        c = Vec3(periodicBoxVecZ.x, periodicBoxVecZ.y, periodicBoxVecZ.z);
429
430
    }
    /**
431
     * Set the vectors defining the periodic box.
432
     */
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
    void setPeriodicBoxVectors(const Vec3& a, const Vec3& b, const Vec3& c) {
        periodicBoxVecX = make_double4(a[0], a[1], a[2], 0.0);
        periodicBoxVecY = make_double4(b[0], b[1], b[2], 0.0);
        periodicBoxVecZ = make_double4(c[0], c[1], c[2], 0.0);
        periodicBoxVecXFloat = make_float4((float) a[0], (float) a[1], (float) a[2], 0.0f);
        periodicBoxVecYFloat = make_float4((float) b[0], (float) b[1], (float) b[2], 0.0f);
        periodicBoxVecZFloat = make_float4((float) c[0], (float) c[1], (float) c[2], 0.0f);
        periodicBoxSize = make_double4(a[0], b[1], c[2], 0.0);
        invPeriodicBoxSize = make_double4(1.0/a[0], 1.0/b[1], 1.0/c[2], 0.0);
        periodicBoxSizeFloat = make_float4((float) a[0], (float) b[1], (float) c[2], 0.0f);
        invPeriodicBoxSizeFloat = make_float4(1.0f/(float) a[0], 1.0f/(float) b[1], 1.0f/(float) c[2], 0.0f);
    }
    /**
     * Get the size of the periodic box.
     */
    double4 getPeriodicBoxSize() const {
        return periodicBoxSize;
450
451
452
453
454
455
456
    }
    /**
     * Get the inverse of the size of the periodic box.
     */
    double4 getInvPeriodicBoxSize() const {
        return invPeriodicBoxSize;
    }
457
458
459
460
461
462
463
464
465
466
467
468
469
470
    /**
     * Get a pointer to the size of the periodic box, represented as either a float4 or double4 depending on
     * this context's precision.  This value is suitable for passing to kernels as an argument.
     */
    void* getPeriodicBoxSizePointer() {
        return (useDoublePrecision ? reinterpret_cast<void*>(&periodicBoxSize) : reinterpret_cast<void*>(&periodicBoxSizeFloat));
    }
    /**
     * Get a pointer to the inverse of the size of the periodic box, represented as either a float4 or double4 depending on
     * this context's precision.  This value is suitable for passing to kernels as an argument.
     */
    void* getInvPeriodicBoxSizePointer() {
        return (useDoublePrecision ? reinterpret_cast<void*>(&invPeriodicBoxSize) : reinterpret_cast<void*>(&invPeriodicBoxSizeFloat));
    }
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
    /**
     * Get a pointer to the first periodic box vector, represented as either a float4 or double4 depending on
     * this context's precision.  This value is suitable for passing to kernels as an argument.
     */
    void* getPeriodicBoxVecXPointer() {
        return (useDoublePrecision ? reinterpret_cast<void*>(&periodicBoxVecX) : reinterpret_cast<void*>(&periodicBoxVecXFloat));
    }
    /**
     * Get a pointer to the second periodic box vector, represented as either a float4 or double4 depending on
     * this context's precision.  This value is suitable for passing to kernels as an argument.
     */
    void* getPeriodicBoxVecYPointer() {
        return (useDoublePrecision ? reinterpret_cast<void*>(&periodicBoxVecY) : reinterpret_cast<void*>(&periodicBoxVecYFloat));
    }
    /**
     * Get a pointer to the third periodic box vector, represented as either a float4 or double4 depending on
     * this context's precision.  This value is suitable for passing to kernels as an argument.
     */
    void* getPeriodicBoxVecZPointer() {
        return (useDoublePrecision ? reinterpret_cast<void*>(&periodicBoxVecZ) : reinterpret_cast<void*>(&periodicBoxVecZFloat));
    }
492
493
494
495
496
497
498
499
500
501
502
503
    /**
     * Get the CudaIntegrationUtilities for this context.
     */
    CudaIntegrationUtilities& getIntegrationUtilities() {
        return *integration;
    }
    /**
     * Get the CudaExpressionUtilities for this context.
     */
    CudaExpressionUtilities& getExpressionUtilities() {
        return *expression;
    }
504
505
506
507
508
509
    /**
     * Get the CudaBondedUtilities for this context.
     */
    CudaBondedUtilities& getBondedUtilities() {
        return *bonded;
    }
510
511
512
513
514
515
    /**
     * Get the CudaNonbondedUtilities for this context.
     */
    CudaNonbondedUtilities& getNonbondedUtilities() {
        return *nonbonded;
    }
516
517
518
519
520
521
522
523
524
    /**
     * Create a new NonbondedUtilities for use with this context.  This should be called
     * only in unusual situations, when a Force needs its own NonbondedUtilities object
     * separate from the standard one.  The caller is responsible for deleting the object
     * when it is no longer needed.
     */
    CudaNonbondedUtilities* createNonbondedUtilities() {
        return new CudaNonbondedUtilities(*this);
    }
525
526
527
528
529
530
531
532
533
    /**
     * Create an object for performing 3D FFTs.  The caller is responsible for deleting
     * the object when it is no longer needed.
     *
     * @param xsize   the first dimension of the data sets on which FFTs will be performed
     * @param ysize   the second dimension of the data sets on which FFTs will be performed
     * @param zsize   the third dimension of the data sets on which FFTs will be performed
     * @param realToComplex  if true, a real-to-complex transform will be done.  Otherwise, it is complex-to-complex.
     */
534
    FFT3D createFFT(int xsize, int ysize, int zsize, bool realToComplex=false);
535
536
537
538
539
    /**
     * This should be called by the Integrator from its own initialize() method.
     * It ensures all contexts are fully initialized.
     */
    void initializeContexts();
540
541
542
543
    /**
     * Set the particle charges.  These are packed into the fourth element of the posq array.
     */
    void setCharges(const std::vector<double>& charges);
544
545
546
547
548
    /**
     * Request to use the fourth element of the posq array for storing charges.  Since only one force can
     * do that, this returns true the first time it is called, and false on all subsequent calls.
     */
    bool requestPosqCharges();
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
    /**
     * Get the names of all parameters with respect to which energy derivatives are computed.
     */
    const std::vector<std::string>& getEnergyParamDerivNames() const {
        return energyParamDerivNames;
    }
    /**
     * Get a workspace data structure used for accumulating the values of derivatives of the energy
     * with respect to parameters.
     */
    std::map<std::string, double>& getEnergyParamDerivWorkspace() {
        return energyParamDerivWorkspace;
    }
    /**
     * Register that the derivative of potential energy with respect to a context parameter
     * will need to be calculated.  If this is called multiple times for a single parameter,
     * it is only added to the list once.
     * 
     * @param param    the name of the parameter to add
     */
    void addEnergyParameterDerivative(const std::string& param);
570
    /**
571
572
573
574
     * Wait until all work that has been queued (kernel executions, asynchronous data transfers, etc.)
     * has been submitted to the device.  This does not mean it has necessarily been completed.
     * Calling this periodically may improve the responsiveness of the computer's GUI, but at the
     * expense of reduced simulation performance.
575
     */
576
    void flushQueue();
577
578
579
580
    /**
     * Get the flags that should be used when creating CUevent objects.
     */
    unsigned int getEventFlags();
581
582
583
584
585
586
    /**
     * Ensure that CUDA has been initialized.  This usually does not need to be called directly, because
     * it is called automatically when a CudaContext is created.  You can call it if you want to be sure
     * CUDA has been initialized without creating a CudaContext.
     */
    static void ensureCudaInitialized();
587
private:
588
589
590
591
    /**
     * Compute a sorted list of device indices in decreasing order of desirability
     */
    std::vector<int> getDevicePrecedence();
592
    static bool hasInitializedCuda;
593
    double computeCapability;
594
595
596
597
598
    CudaPlatform::PlatformData& platformData;
    int deviceIndex;
    int contextIndex;
    int numAtomBlocks;
    int numThreadBlocks;
599
    int gpuArchitecture;
600
    bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, boxIsTriclinic, hasAssignedPosqCharges;
601
    bool isLinkedContext;
602
    std::string tempDir, cacheDir;
603
604
    float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
    double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize;
605
606
607
608
609
610
611
612
613
614
    std::string defaultOptimizationOptions;
    std::map<std::string, std::string> compilationDefines;
    CUcontext context;
    CUdevice device;
    CUfunction clearBufferKernel;
    CUfunction clearTwoBuffersKernel;
    CUfunction clearThreeBuffersKernel;
    CUfunction clearFourBuffersKernel;
    CUfunction clearFiveBuffersKernel;
    CUfunction clearSixBuffersKernel;
Peter Eastman's avatar
Peter Eastman committed
615
    CUfunction reduceEnergyKernel;
616
    CUfunction setChargesKernel;
617
    void* pinnedBuffer;
Peter Eastman's avatar
Peter Eastman committed
618
619
620
621
622
623
624
625
626
    CudaArray posq;
    CudaArray posqCorrection;
    CudaArray velm;
    CudaArray force;
    CudaArray energyBuffer;
    CudaArray energySum;
    CudaArray energyParamDerivBuffer;
    CudaArray atomIndexDevice;
    CudaArray chargeBuffer;
627
628
    std::vector<std::string> energyParamDerivNames;
    std::map<std::string, double> energyParamDerivWorkspace;
629
630
    std::vector<CUdeviceptr> autoclearBuffers;
    std::vector<int> autoclearBufferSizes;
631
632
    CudaIntegrationUtilities* integration;
    CudaExpressionUtilities* expression;
633
    CudaBondedUtilities* bonded;
634
    CudaNonbondedUtilities* nonbonded;
635
    Kernel compilerKernel;
636
637
638
};

/**
639
 * This class exists only for backward compatibility.  Use ComputeContext::WorkTask instead.
640
 */
641
class OPENMM_EXPORT_COMMON CudaContext::WorkTask : public ComputeContext::WorkTask {
642
643
644
};

/**
645
 * This class exists only for backward compatibility.  Use ComputeContext::ReorderListener instead.
646
 */
647
class OPENMM_EXPORT_COMMON CudaContext::ReorderListener : public ComputeContext::ReorderListener {
648
649
};

650
/**
651
 * This class exists only for backward compatibility.  Use ComputeContext::ForcePreComputation instead.
652
 */
653
class OPENMM_EXPORT_COMMON CudaContext::ForcePreComputation : public ComputeContext::ForcePreComputation {
654
655
656
};

/**
657
 * This class exists only for backward compatibility.  Use ComputeContext::ForcePostComputation instead.
658
 */
659
class OPENMM_EXPORT_COMMON CudaContext::ForcePostComputation : public ComputeContext::ForcePostComputation {
660
661
};

662
663
664
} // namespace OpenMM

#endif /*OPENMM_CUDACONTEXT_H_*/