tensoradapter.h 2.87 KB
Newer Older
1
/**
2
 *  Copyright (c) 2020-2022 by Contributors
3
4
 * @file tensoradapter.h
 * @brief Header file for functions exposed by the adapter library.
5
 *
6
7
 * Functions in this library must be exported with extern "C" so that DGL can
 * locate them with dlsym(3) (or GetProcAddress on Windows).
8
9
10
11
12
 */

#ifndef TENSORADAPTER_H_
#define TENSORADAPTER_H_

13
14
15
#ifdef DGL_USE_CUDA
#include <cuda_runtime.h>
#endif  // DGL_USE_CUDA
16
17
18
19
20

namespace tensoradapter {

extern "C" {

21
/**
22
 * @brief Allocate a piece of CPU memory via
23
 * PyTorch's CPUAllocator
24
 *
25
26
 * @param nbytes The size to be allocated.
 * @return Pointer to the allocated memory.
27
28
29
 */
void* CPURawAlloc(size_t nbytes);

30
/**
31
 * @brief Free the CPU memory.
32
 *
33
 * @param ptr Pointer to the memory to be freed.
34
 */
35
void CPURawDelete(void* ptr);
36

37
#ifdef DGL_USE_CUDA
38
/**
39
 * @brief Allocate a piece of GPU memory via
40
41
 * PyTorch's THCCachingAllocator.
 *
42
43
44
 * @param nbytes The size to be allocated.
 * @param stream The stream to be allocated on.
 * @return Pointer to the allocated memory.
45
 */
46
void* CUDARawAlloc(size_t nbytes, cudaStream_t stream);
47

48
/**
49
 * @brief Free the GPU memory.
50
 *
51
 * @param ptr Pointer to the memory to be freed.
52
 */
53
void CUDARawDelete(void* ptr);
54

55
/**
56
 * @brief Get the current CUDA stream.
57
58
 */
cudaStream_t CUDACurrentStream();
59

60
/**
61
 * @brief Let the caching allocator know which streams are using this tensor.
62
 *
63
64
65
 * @param ptr Pointer of the tensor to be recorded.
 * @param stream The stream that is using this tensor.
 * @param device_id Device of the tensor.
66
67
 */
void RecordStream(void* ptr, cudaStream_t stream, int device_id);
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107

/**
 * @brief Allocate a piece of pinned CPU memory via
 *     PyTorch's CachingHostAllocator.
 *
 * @param nbytes The size to be allocated.
 * @param ctx Pointer to the PyTorch storage ctx ptr returned from the
 *     allocator.
 * @param deleter Pointer to the delete function ptr returned from the
 *     allocator.
 * @return Raw pointer to the allocated memory.
 */
void* CUDARawHostAlloc(size_t nbytes, void** ctx, void** raw_deleter);

/**
 * @brief 'Free' the pinned CPU memory via
 *     inserting the memory block back to the free list.
 *
 * @param deleter Pointer to the delete function ptr returned from the
 *     allocator.
 */
void CUDARawHostDelete(void** raw_deleter);

/**
 * @brief 'Record' a CUDA stream (usually from a copy kernel) for the pinned
 *     memory via PyTorch's CachingHostAllocator.
 *
 * @param data Pointer of the tensor to be recorded.
 * @param ctx PyTorch storage ctx ptr returned from the allocator.
 * @param stream The stream that currently consumes this tensor.
 * @param device_id Device of the tensor.
 */
void CUDARecordHostAlloc(
    void* data, void* ctx, cudaStream_t stream, int device_id);

/**
 * @brief Release cached pinned memory allocations via cudaHostFree.
 */
void CUDAHostAllocatorEmptyCache();

108
#endif  // DGL_USE_CUDA
109
110
111
112
113
}

};  // namespace tensoradapter

#endif  // TENSORADAPTER_H_