HipContext.h 26.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#ifndef OPENMM_HIPCONTEXT_H_
#define OPENMM_HIPCONTEXT_H_

/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2009-2019 Stanford University and the Authors.      *
 * Portions copyright (c) 2020 Advanced Micro Devices, Inc.                   *
 * Authors: Peter Eastman, Nicholas Curtis                                    *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */


/*
 * Porting notes:
  - Hip only marginally supports the CUDA context API, and will remove
    support eventually.  To my knowledge, contexts don't really buy you anything
    that streams / hipSetDevice don't.  Hence, for this implementation, we are doing
    away entirely with the context usage.
 */


#include <map>
42
#include <stack>
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#include <string>
#include <utility>
#define __CL_ENABLE_EXCEPTIONS
#ifdef _MSC_VER
    // Prevent Windows from defining macros that interfere with other code.
    #define NOMINMAX
#endif
#include <pthread.h>
#include <hip/hip_runtime.h>
#include "openmm/common/windowsExportCommon.h"
#include "HipArray.h"
#include "HipBondedUtilities.h"
#include "HipExpressionUtilities.h"
#include "HipIntegrationUtilities.h"
#include "HipNonbondedUtilities.h"
#include "HipPlatform.h"
59
#include "HipFFT3D.h"
60
61
62
63
#include "openmm/OpenMMException.h"
#include "openmm/common/ComputeContext.h"
#include "openmm/Kernel.h"

Anton Gorenko's avatar
Anton Gorenko committed
64
typedef unsigned int tileflags;
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88

namespace OpenMM {

/**
 * This class contains the information associated with a Context by the HIP Platform.  Each HipContext is
 * specific to a particular device, and manages data structures and kernels for that device.  When running a simulation
 * in parallel on multiple devices, there is a separate HipContext for each one.  The list of all contexts is
 * stored in the HipPlatform::PlatformData.
 * <p>
 * In addition, a worker thread is created for each HipContext.  This is used for parallel computations, so that
 * blocking calls to one device will not block other devices.  When only a single device is being used, the worker
 * thread is not used and calculations are performed on the main application thread.
 */

class OPENMM_EXPORT_COMMON HipContext : public ComputeContext {
public:
    class WorkTask;
    class WorkThread;
    class ReorderListener;
    class ForcePreComputation;
    class ForcePostComputation;
    static const int ThreadBlockSize;
    static const int TileSize;
    HipContext(const System& system, int deviceIndex, bool useBlockingSync, const std::string& precision,
89
            const std::string& tempDir, HipPlatform::PlatformData& platformData, HipContext* originalContext);
90
91
92
93
94
95
96
97
98
99
100
101
102
    ~HipContext();
    /**
     * This is called to initialize internal data structures after all Forces in the system
     * have been initialized.
     */
    void initialize();
    /**
     * Get whether the context associated with this object is valid.
     */
    bool getContextIsValid() const {
        return contextIsValid;
    }
    /**
103
     * Set the device associated with this object to be the current device.  If the context is not
104
105
106
     * valid, this returns without doing anything.
     */
    void setAsCurrent();
107
108
109
110
111
112
113
114
115
116
    /**
     * Push the device associated with this object to be the current device.  If the context is not
     * valid, this returns without doing anything.
     */
    void pushAsCurrent();
    /**
     * Pop the device associated with this object off the stack of contexts.  If the context is not
     * valid, this returns without doing anything.
     */
    void popAsCurrent();
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
    /**
     * Get the hipDevice_t associated with this object.
     */
    hipDevice_t getDevice() {
        return device;
    }
    /**
     * Get the compute capability of the device associated with this object.
     */
    double getComputeCapability() const {
        return computeCapability;
    }
    /**
     * Get the index of the hipDevice_t associated with this object.
     */
    int getDeviceIndex() const {
        return deviceIndex;
    }
    /**
     * Get the PlatformData object this context is part of.
     */
    HipPlatform::PlatformData& getPlatformData() {
        return platformData;
    }
    /**
     * Get the number of contexts being used for the current simulation.
     * This is relevant when a simulation is parallelized across multiple devices.  In that case,
     * one HipContext is created for each device.
     */
    int getNumContexts() const {
        return platformData.contexts.size();
    }
    /**
     * Get the index of this context in the list stored in the PlatformData.
     */
    int getContextIndex() const {
        return contextIndex;
    }
    /**
     * Get the stream currently being used for execution.
     */
    hipStream_t getCurrentStream();
    /**
     * Set the stream to use for execution.
     */
    void setCurrentStream(hipStream_t stream);
    /**
     * Reset the context to using the default stream for execution.
     */
    void restoreDefaultStream();
    /**
     * Construct an uninitialized array of the appropriate class for this platform.  The returned
     * value should be created on the heap with the "new" operator.
     */
    HipArray* createArray();
    /**
     * Construct a ComputeEvent object of the appropriate class for this platform.
     */
    ComputeEvent createEvent();
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
    /**
     * Create a new HipFFT3D.
     *
     * @param xsize   the first dimension of the data sets on which FFTs will be performed
     * @param ysize   the second dimension of the data sets on which FFTs will be performed
     * @param zsize   the third dimension of the data sets on which FFTs will be performed
     * @param realToComplex  if true, a real-to-complex transform will be done.  Otherwise, it is complex-to-complex.
     * @param stream  HIP stream
     * @param in      the data to transform, ordered such that in[x*ysize*zsize + y*zsize + z] contains element (x, y, z)
     * @param out     on exit, this contains the transformed data
     */
    HipFFT3D* createFFT(int xsize, int ysize, int zsize, bool realToComplex, hipStream_t stream, HipArray& in, HipArray& out);
    /**
     * Get the smallest legal size for a dimension of the grid supported by the FFT.
     */
    virtual int findLegalFFTDimension(int minimum);
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
    /**
     * Compile source code to create a ComputeProgram.
     *
     * @param source             the source code of the program
     * @param defines            a set of preprocessor definitions (name, value) to define when compiling the program
     */
    ComputeProgram compileProgram(const std::string source, const std::map<std::string, std::string>& defines=std::map<std::string, std::string>());
    /**
     * Convert an array to an HipArray.  If the argument is already an HipArray, this simply casts it.
     * If the argument is a ComputeArray that wraps a HipArray, this returns the wrapped array.  For any
     * other argument, this throws an exception.
     */
    HipArray& unwrap(ArrayInterface& array) const;
    /**
     * Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
     */
    HipArray& getPosq() {
        return posq;
    }
    /**
     * Get the array which contains a correction to the position of each atom.  This only exists if getUseMixedPrecision() returns true.
     */
    HipArray& getPosqCorrection() {
        return posqCorrection;
    }
    /**
     * Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom.
     */
    HipArray& getVelm() {
        return velm;
    }
    /**
     * Get the array which contains the force on each atom (represented as three long longs in 64 bit fixed point).
     */
    HipArray& getForce() {
        return force;
    }
    /**
     * The HIP platform does not use floating point force buffers, so this throws an exception.
     */
    ArrayInterface& getFloatForceBuffer() {
        throw OpenMMException("HIP platform does not use floating point force buffers");
    }
    /**
     * Get the array which contains a contribution to each force represented as 64 bit fixed point.
     * This is a synonym for getForce().  It exists to satisfy the ComputeContext interface.
     */
    HipArray& getLongForceBuffer() {
        return force;
    }
    /**
     * Not all HIP devices support 64 bit atomics, so this throws an exception.
     * @return
     */
    ArrayInterface& getForceBuffers() {
        throw OpenMMException("HIP platform does not use floating point force buffers");
    }
    /**
     * Get the array which contains the buffer in which energy is computed.
     */
    HipArray& getEnergyBuffer() {
        return energyBuffer;
    }
    /**
     * Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
     */
    HipArray& getEnergyParamDerivBuffer() {
        return energyParamDerivBuffer;
    }
    /**
     * Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
     * This is guaranteed to be at least as large as any of the arrays returned by methods of this class.
     */
    void* getPinnedBuffer() {
        return pinnedBuffer;
    }
    /**
     * Get a shared ThreadPool that code can use to parallelize operations.
     *
     * Because this object is freely available to all code, care is needed to avoid conflicts.  Only use it
     * from the main thread, and make sure all operations are complete before you invoke any other code that
     * might make use of it
     */
    ThreadPool& getThreadPool() {
        return getPlatformData().threads;
    }
    /**
     * Get the array which contains the index of each atom.
     */
    HipArray& getAtomIndexArray() {
        return atomIndexDevice;
    }
284
285
286
287
288
289
290
291
292
293
294
295
    /**
     * Get a file name in tempDir unique for the current process and context.
     */
    std::string getTempFileName() const;
    /**
     * Get src hash.
     */
    std::string getHash(const std::string& src) const;
    /**
     * Get a filename in cacheDir based on src hash.
     */
    std::string getCacheFileName(const std::string& src) const;
296
297
298
299
300
    /**
     * Create a HIP module from source code.
     *
     * @param source             the source code of the module
     */
Anton Gorenko's avatar
Anton Gorenko committed
301
    hipModule_t createModule(const std::string source);
302
303
304
305
306
307
    /**
     * Create a HIP module from source code.
     *
     * @param source             the source code of the module
     * @param defines            a set of preprocessor definitions (name, value) to define when compiling the program
     */
Anton Gorenko's avatar
Anton Gorenko committed
308
    hipModule_t createModule(const std::string source, const std::map<std::string, std::string>& defines);
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
    /**
     * Get a kernel from a HIP module.
     *
     * @param module    the module to get the kernel from
     * @param name      the name of the kernel to get
     */
    hipFunction_t getKernel(hipModule_t& module, const std::string& name);
    /**
     * Execute a kernel.
     *
     * @param kernel       the kernel to execute
     * @param arguments    an array of pointers to the kernel arguments
     * @param threads      the maximum number of threads that should be used
     * @param blockSize    the size of each thread block to use
     * @param sharedSize   the amount of dynamic shared memory to allocated for the kernel, in bytes
     */
Anton Gorenko's avatar
Anton Gorenko committed
325
326
327
328
329
330
331
332
333
334
335
    void executeKernel(hipFunction_t kernel, void** arguments, int threads, int blockSize = -1, unsigned int sharedSize = 0);
    /**
     * Execute a kernel with full grid.
     *
     * @param kernel       the kernel to execute
     * @param arguments    an array of pointers to the kernel arguments
     * @param threads      the total number of threads that should be used
     * @param blockSize    the size of each thread block to use
     * @param sharedSize   the amount of dynamic shared memory to allocated for the kernel, in bytes
     */
    void executeKernelFlat(hipFunction_t kernel, void** arguments, int threads, int blockSize = -1, unsigned int sharedSize = 0);
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
    /**
     * Compute the largest thread block size that can be used for a kernel that requires a particular amount of
     * shared memory per thread.
     *
     * @param memory        the number of bytes of shared memory per thread
     */
    int computeThreadBlockSize(double memory) const;
    /**
     * Set all elements of an array to 0.
     */
    void clearBuffer(ArrayInterface& array);
    /**
     * Set all elements of an array to 0.
     *
     * @param memory     the memory to clear
     * @param size       the size of the buffer in bytes
     */
    void clearBuffer(hipDeviceptr_t memory, int size);
    /**
     * Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
     */
    void addAutoclearBuffer(ArrayInterface& array);
    /**
     * Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
     *
     * @param memory     the memory to clear
     * @param size       the size of the buffer in bytes
     */
    void addAutoclearBuffer(hipDeviceptr_t memory, int size);
    /**
     * Clear all buffers that have been registered with addAutoclearBuffer().
     */
    void clearAutoclearBuffers();
    /**
     * Sum the buffer containing energy.
     */
    double reduceEnergy();
    /**
     * Get the number of blocks of TileSize atoms.
     */
    int getNumAtomBlocks() const {
        return numAtomBlocks;
    }
    /**
     * Get the standard number of thread blocks to use when executing kernels.
     */
    int getNumThreadBlocks() const {
        return numThreadBlocks;
    }
    /**
     * Get the maximum number of threads in a thread block supported by this device.
     */
    int getMaxThreadBlockSize() const {
389
        return 256;
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
    }
    /**
     * Get whether the device being used is a CPU.  In some cases, different algorithms
     * may be more efficient on CPUs and GPUs.
     */
    bool getIsCPU() const {
        return false;
    }
    /**
     * Get the SIMD width of the device being used.
     */
    int getSIMDWidth() const {
        return simdWidth;
    }
    /**
Anton Gorenko's avatar
Anton Gorenko committed
405
     * Get the number of multiprocessors (compute units) of the device being used.
406
     */
Anton Gorenko's avatar
Anton Gorenko committed
407
408
    int getMultiprocessors() const {
        return multiprocessors;
409
410
411
412
413
    }
    /**
     * Get whether the device being used supports 64 bit atomic operations on global memory.
     */
    bool getSupports64BitGlobalAtomics() const {
Anton Gorenko's avatar
Anton Gorenko committed
414
        return true;
415
    }
Anton Gorenko's avatar
Anton Gorenko committed
416
417
418
419
420
421
422
    /**
     * Get whether the device being used supports 32 bit floating point atomic operations
     * on global memory (fast hardware instructions, not a compare-and-swap loop implementation).
     */
    bool getSupportsHardwareFloatGlobalAtomicAdd() const {
        return supportsHardwareFloatGlobalAtomicAdd;
    }
423
424
425
426
    /**
     * Get whether the device being used supports double precision math.
     */
    bool getSupportsDoublePrecision() const {
Anton Gorenko's avatar
Anton Gorenko committed
427
        return true;
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
    }
    /**
     * Get whether double precision is being used.
     */
    bool getUseDoublePrecision() const {
        return useDoublePrecision;
    }
    /**
     * Get whether mixed precision is being used.
     */
    bool getUseMixedPrecision() const {
        return useMixedPrecision;
    }
    /**
     * Get whether the periodic box is triclinic.
     */
    bool getBoxIsTriclinic() const {
        return boxIsTriclinic;
    }
    /**
     * Convert a HIP result code to the corresponding string description.
     */
    static std::string getErrorString(hipError_t result);
    /**
     * Get the vectors defining the periodic box.
     */
    void getPeriodicBoxVectors(Vec3& a, Vec3& b, Vec3& c) const {
        a = Vec3(periodicBoxVecX.x, periodicBoxVecX.y, periodicBoxVecX.z);
        b = Vec3(periodicBoxVecY.x, periodicBoxVecY.y, periodicBoxVecY.z);
        c = Vec3(periodicBoxVecZ.x, periodicBoxVecZ.y, periodicBoxVecZ.z);
    }
    /**
     * Set the vectors defining the periodic box.
     */
    void setPeriodicBoxVectors(const Vec3& a, const Vec3& b, const Vec3& c) {
        periodicBoxVecX = make_double4(a[0], a[1], a[2], 0.0);
        periodicBoxVecY = make_double4(b[0], b[1], b[2], 0.0);
        periodicBoxVecZ = make_double4(c[0], c[1], c[2], 0.0);
        periodicBoxVecXFloat = make_float4((float) a[0], (float) a[1], (float) a[2], 0.0f);
        periodicBoxVecYFloat = make_float4((float) b[0], (float) b[1], (float) b[2], 0.0f);
        periodicBoxVecZFloat = make_float4((float) c[0], (float) c[1], (float) c[2], 0.0f);
        periodicBoxSize = make_double4(a[0], b[1], c[2], 0.0);
        invPeriodicBoxSize = make_double4(1.0/a[0], 1.0/b[1], 1.0/c[2], 0.0);
        periodicBoxSizeFloat = make_float4((float) a[0], (float) b[1], (float) c[2], 0.0f);
        invPeriodicBoxSizeFloat = make_float4(1.0f/(float) a[0], 1.0f/(float) b[1], 1.0f/(float) c[2], 0.0f);
    }
    /**
     * Get the size of the periodic box.
     */
    double4 getPeriodicBoxSize() const {
        return periodicBoxSize;
    }
    /**
     * Get the inverse of the size of the periodic box.
     */
    double4 getInvPeriodicBoxSize() const {
        return invPeriodicBoxSize;
    }
    /**
     * Get a pointer to the size of the periodic box, represented as either a float4 or double4 depending on
     * this context's precision.  This value is suitable for passing to kernels as an argument.
     */
    void* getPeriodicBoxSizePointer() {
        return (useDoublePrecision ? reinterpret_cast<void*>(&periodicBoxSize) : reinterpret_cast<void*>(&periodicBoxSizeFloat));
    }
    /**
     * Get a pointer to the inverse of the size of the periodic box, represented as either a float4 or double4 depending on
     * this context's precision.  This value is suitable for passing to kernels as an argument.
     */
    void* getInvPeriodicBoxSizePointer() {
        return (useDoublePrecision ? reinterpret_cast<void*>(&invPeriodicBoxSize) : reinterpret_cast<void*>(&invPeriodicBoxSizeFloat));
    }
    /**
     * Get a pointer to the first periodic box vector, represented as either a float4 or double4 depending on
     * this context's precision.  This value is suitable for passing to kernels as an argument.
     */
    void* getPeriodicBoxVecXPointer() {
        return (useDoublePrecision ? reinterpret_cast<void*>(&periodicBoxVecX) : reinterpret_cast<void*>(&periodicBoxVecXFloat));
    }
    /**
     * Get a pointer to the second periodic box vector, represented as either a float4 or double4 depending on
     * this context's precision.  This value is suitable for passing to kernels as an argument.
     */
    void* getPeriodicBoxVecYPointer() {
        return (useDoublePrecision ? reinterpret_cast<void*>(&periodicBoxVecY) : reinterpret_cast<void*>(&periodicBoxVecYFloat));
    }
    /**
     * Get a pointer to the third periodic box vector, represented as either a float4 or double4 depending on
     * this context's precision.  This value is suitable for passing to kernels as an argument.
     */
    void* getPeriodicBoxVecZPointer() {
        return (useDoublePrecision ? reinterpret_cast<void*>(&periodicBoxVecZ) : reinterpret_cast<void*>(&periodicBoxVecZFloat));
    }
    /**
     * Get the HipIntegrationUtilities for this context.
     */
    HipIntegrationUtilities& getIntegrationUtilities() {
        return *integration;
    }
    /**
     * Get the HipExpressionUtilities for this context.
     */
    HipExpressionUtilities& getExpressionUtilities() {
        return *expression;
    }
    /**
     * Get the HipBondedUtilities for this context.
     */
    HipBondedUtilities& getBondedUtilities() {
        return *bonded;
    }
    /**
     * Get the HipNonbondedUtilities for this context.
     */
    HipNonbondedUtilities& getNonbondedUtilities() {
        return *nonbonded;
    }
    /**
     * Create a new NonbondedUtilities for use with this context.  This should be called
     * only in unusual situations, when a Force needs its own NonbondedUtilities object
     * separate from the standard one.  The caller is responsible for deleting the object
     * when it is no longer needed.
     */
    HipNonbondedUtilities* createNonbondedUtilities() {
        return new HipNonbondedUtilities(*this);
    }
    /**
     * This should be called by the Integrator from its own initialize() method.
     * It ensures all contexts are fully initialized.
     */
    void initializeContexts();
    /**
     * Set the particle charges.  These are packed into the fourth element of the posq array.
     */
    void setCharges(const std::vector<double>& charges);
    /**
     * Request to use the fourth element of the posq array for storing charges.  Since only one force can
     * do that, this returns true the first time it is called, and false on all subsequent calls.
     */
    bool requestPosqCharges();
    /**
     * Get the names of all parameters with respect to which energy derivatives are computed.
     */
    const std::vector<std::string>& getEnergyParamDerivNames() const {
        return energyParamDerivNames;
    }
    /**
     * Get a workspace data structure used for accumulating the values of derivatives of the energy
     * with respect to parameters.
     */
    std::map<std::string, double>& getEnergyParamDerivWorkspace() {
        return energyParamDerivWorkspace;
    }
    /**
     * Register that the derivative of potential energy with respect to a context parameter
     * will need to be calculated.  If this is called multiple times for a single parameter,
     * it is only added to the list once.
     *
     * @param param    the name of the parameter to add
     */
    void addEnergyParameterDerivative(const std::string& param);
    /**
     * Wait until all work that has been queued (kernel executions, asynchronous data transfers, etc.)
     * has been submitted to the device.  This does not mean it has necessarily been completed.
     * Calling this periodically may improve the responsiveness of the computer's GUI, but at the
     * expense of reduced simulation performance.
     */
    void flushQueue();
596
597
598
599
    /**
     * Get the flags that should be used when creating hipEvent_t objects.
     */
    unsigned int getEventFlags();
600
601
602
603
    /**
     * Get the flags that should be used when allocating pinned host memory.
     */
    unsigned int getHostMallocFlags();
604
605
606
607
608
609
610
611
612
613
614
615
616
private:
    /**
     * Compute a sorted list of device indices in decreasing order of desirability
     */
    std::vector<int> getDevicePrecedence();
    static bool hasInitializedHip;
    double computeCapability;
    HipPlatform::PlatformData& platformData;
    int deviceIndex;
    int contextIndex;
    int numAtomBlocks;
    int numThreadBlocks;
    int simdWidth;
Anton Gorenko's avatar
Anton Gorenko committed
617
    int multiprocessors;
618
    int sharedMemPerBlock;
Anton Gorenko's avatar
Anton Gorenko committed
619
    bool supportsHardwareFloatGlobalAtomicAdd;
620
    bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, boxIsTriclinic, hasAssignedPosqCharges;
621
    bool isLinkedContext;
622
    std::string tempDir, cacheDir, gpuArchitecture;
623
624
625
    float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
    double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize;
    std::map<std::string, std::string> compilationDefines;
626
    std::vector<hipModule_t> loadedModules;
627
    hipDevice_t device;
628
    std::stack<hipDevice_t> outerScopeDevices;
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
    hipStream_t currentStream;
    hipFunction_t clearBufferKernel;
    hipFunction_t clearTwoBuffersKernel;
    hipFunction_t clearThreeBuffersKernel;
    hipFunction_t clearFourBuffersKernel;
    hipFunction_t clearFiveBuffersKernel;
    hipFunction_t clearSixBuffersKernel;
    hipFunction_t reduceEnergyKernel;
    hipFunction_t setChargesKernel;
    void* pinnedBuffer;
    HipArray posq;
    HipArray posqCorrection;
    HipArray velm;
    HipArray force;
    HipArray energyBuffer;
    HipArray energySum;
    HipArray energyParamDerivBuffer;
    HipArray atomIndexDevice;
    HipArray chargeBuffer;
    std::vector<std::string> energyParamDerivNames;
    std::map<std::string, double> energyParamDerivWorkspace;
    std::vector<hipDeviceptr_t> autoclearBuffers;
    std::vector<int> autoclearBufferSizes;
    HipIntegrationUtilities* integration;
    HipExpressionUtilities* expression;
    HipBondedUtilities* bonded;
    HipNonbondedUtilities* nonbonded;
};

/**
 * This class exists only for backward compatibility.  Use ComputeContext::WorkTask instead.
 */
class OPENMM_EXPORT_COMMON HipContext::WorkTask : public ComputeContext::WorkTask {
};

/**
 * This class exists only for backward compatibility.  Use ComputeContext::ReorderListener instead.
 */
class OPENMM_EXPORT_COMMON HipContext::ReorderListener : public ComputeContext::ReorderListener {
};

/**
 * This class exists only for backward compatibility.  Use ComputeContext::ForcePreComputation instead.
 */
class OPENMM_EXPORT_COMMON HipContext::ForcePreComputation : public ComputeContext::ForcePreComputation {
};

/**
 * This class exists only for backward compatibility.  Use ComputeContext::ForcePostComputation instead.
 */
class OPENMM_EXPORT_COMMON HipContext::ForcePostComputation : public ComputeContext::ForcePostComputation {
};

} // namespace OpenMM

#endif /*OPENMM_HIPCONTEXT_H_*/