torch.cpp 1.7 KB
Newer Older
1
/**
2
 *  Copyright (c) 2020-2022 by Contributors
3
4
 * @file torch/torch.cpp
 * @brief Implementation of PyTorch adapter library.
5
6
 */

7
#include <c10/core/CPUAllocator.h>
8
#include <tensoradapter_exports.h>
9
#ifdef DGL_USE_CUDA
10
#include <ATen/cuda/CUDAContext.h>
11
#include <c10/cuda/CUDACachingAllocator.h>
12
#include <c10/cuda/CUDAStream.h>
13
#include <cuda_runtime.h>
14
#endif  // DGL_USE_CUDA
15

16
17
18
19
namespace tensoradapter {

extern "C" {

20
21
22
23
24
25
TA_EXPORTS void* CPURawAlloc(size_t nbytes) {
  return c10::GetCPUAllocator()->raw_allocate(nbytes);
}

TA_EXPORTS void CPURawDelete(void* ptr) {
  c10::GetCPUAllocator()->raw_deallocate(ptr);
26
27
}

28
#ifdef DGL_USE_CUDA
29
TA_EXPORTS void* CUDARawAlloc(size_t nbytes, cudaStream_t stream) {
30
  at::globalContext().lazyInitCUDA();
31
  return c10::cuda::CUDACachingAllocator::raw_alloc_with_stream(nbytes, stream);
32
33
}

34
TA_EXPORTS void CUDARawDelete(void* ptr) {
35
36
  c10::cuda::CUDACachingAllocator::raw_delete(ptr);
}
37
38
39
40

TA_EXPORTS cudaStream_t CUDACurrentStream() {
  return at::cuda::getCurrentCUDAStream();
}
41
42
43

TA_EXPORTS void RecordStream(void* ptr, cudaStream_t stream, int device_id) {
  c10::DataPtr data_ptr{
44
      ptr, ptr, c10::cuda::CUDACachingAllocator::get()->raw_deleter(),
45
      c10::Device(c10::DeviceType::CUDA, device_id)};
46
  c10::cuda::CUDACachingAllocator::recordStream(
47
48
49
50
51
52
53
54
55
      data_ptr,
      // getStreamFromExternal doesn't exist before PyTorch 1.10, just copy it
      // here
      c10::cuda::CUDAStream(
          c10::cuda::CUDAStream::UNCHECKED,
          c10::Stream(
              c10::Stream::UNSAFE,
              c10::Device(c10::DeviceType::CUDA, device_id),
              reinterpret_cast<int64_t>(stream))));
56
57
  data_ptr.release_context();
}
58
#endif  // DGL_USE_CUDA
59
60
61
};

};  // namespace tensoradapter