torch.cpp 3.3 KB
Newer Older
sangwzh's avatar
sangwzh committed
1
// !!! This is a file automatically generated by hipify!!!
2
/**
3
 *  Copyright (c) 2020-2022 by Contributors
4
5
 * @file torch/torch.cpp
 * @brief Implementation of PyTorch adapter library.
6
7
 */

8
#include <c10/core/CPUAllocator.h>
9
#include <tensoradapter_exports.h>
10
#ifdef DGL_USE_CUDA
sangwzh's avatar
sangwzh committed
11
12
13
14
15
#include <ATen/hip/HIPContext.h>
#include <ATen/hip/CachingHostAllocator.h>
#include <ATen/hip/impl/HIPCachingAllocatorMasqueradingAsCUDA.h>
#include <ATen/hip/impl/HIPStreamMasqueradingAsCUDA.h>
#include <hip/hip_runtime.h>
16
#endif  // DGL_USE_CUDA
17

18
19
20
21
namespace tensoradapter {

extern "C" {

22
23
24
25
26
27
TA_EXPORTS void* CPURawAlloc(size_t nbytes) {
  return c10::GetCPUAllocator()->raw_allocate(nbytes);
}

TA_EXPORTS void CPURawDelete(void* ptr) {
  c10::GetCPUAllocator()->raw_deallocate(ptr);
28
29
}

30
#ifdef DGL_USE_CUDA
sangwzh's avatar
sangwzh committed
31
TA_EXPORTS void* CUDARawAlloc(size_t nbytes, hipStream_t stream) {
32
  at::globalContext().lazyInitCUDA();
sangwzh's avatar
sangwzh committed
33
  return c10::hip::HIPCachingAllocator::raw_alloc_with_stream(nbytes, stream);
34
35
}

36
TA_EXPORTS void CUDARawDelete(void* ptr) {
sangwzh's avatar
sangwzh committed
37
  c10::hip::HIPCachingAllocator::raw_delete(ptr);
38
}
39

sangwzh's avatar
sangwzh committed
40
41
TA_EXPORTS hipStream_t CUDACurrentStream() {
  return at::hip::getCurrentHIPStreamMasqueradingAsCUDA();
42
}
43

sangwzh's avatar
sangwzh committed
44
TA_EXPORTS void RecordStream(void* ptr, hipStream_t stream, int device_id) {
45
  c10::DataPtr data_ptr{
sangwzh's avatar
sangwzh committed
46
      ptr, ptr, c10::hip::HIPCachingAllocatorMasqueradingAsCUDA::get()->raw_deleter(),
47
      c10::Device(c10::DeviceType::CUDA, device_id)};
sangwzh's avatar
sangwzh committed
48
  c10::hip::HIPCachingAllocatorMasqueradingAsCUDA::recordStreamMasqueradingAsCUDA(
49
      data_ptr,
sangwzh's avatar
sangwzh committed
50
      // getStreamFromExternalMasqueradingAsCUDA doesn't exist before PyTorch 1.10, just copy it
51
      // here
sangwzh's avatar
sangwzh committed
52
53
      c10::hip::HIPStreamMasqueradingAsCUDA(
          c10::hip::HIPStreamMasqueradingAsCUDA::UNCHECKED,
54
55
56
57
          c10::Stream(
              c10::Stream::UNSAFE,
              c10::Device(c10::DeviceType::CUDA, device_id),
              reinterpret_cast<int64_t>(stream))));
58
59
  data_ptr.release_context();
}
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89

class CUDAHostDeleter {
 public:
  explicit CUDAHostDeleter(std::unique_ptr<void, c10::DeleterFnPtr> ptr)
      : ptr_(std::move(ptr)) {}

 private:
  std::unique_ptr<void, c10::DeleterFnPtr> ptr_;
};

TA_EXPORTS void* CUDARawHostAlloc(
    size_t nbytes, void** ctx, void** raw_deleter) {
  auto data_ptr = at::cuda::getCachingHostAllocator()->allocate(nbytes);
  auto raw = data_ptr.get();
  // Return the raw ctx ptr for recording event.
  *ctx = data_ptr.get_context();

  // Transfer ownership to raw_deleter.
  auto* data_deleter = new CUDAHostDeleter(data_ptr.move_context());
  *raw_deleter = static_cast<void*>(data_deleter);
  return raw;
}

// Designated CUDAHostDeleter for CUDARawHostAlloc.
TA_EXPORTS void CUDARawHostDelete(void** raw_deleter) {
  delete static_cast<CUDAHostDeleter*>(*raw_deleter);
  *raw_deleter = nullptr;
}

TA_EXPORTS void CUDARecordHostAlloc(
sangwzh's avatar
sangwzh committed
90
    void* ptr, void* ctx, hipStream_t stream, int device_id) {
91
92
  at::cuda::CachingHostAllocator_recordEvent(
      ptr, ctx,
sangwzh's avatar
sangwzh committed
93
94
      c10::hip::HIPStreamMasqueradingAsCUDA(
          c10::hip::HIPStreamMasqueradingAsCUDA::UNCHECKED,
95
96
97
98
99
100
101
102
103
          c10::Stream(
              c10::Stream::UNSAFE,
              c10::Device(c10::DeviceType::CUDA, device_id),
              reinterpret_cast<int64_t>(stream))));
}

TA_EXPORTS void CUDAHostAllocatorEmptyCache() {
  at::cuda::CachingHostAllocator_emptyCache();
}
104
#endif  // DGL_USE_CUDA
105
106
107
};

};  // namespace tensoradapter