common.cpp 4.9 KB
Newer Older
Przemek Tredak's avatar
Przemek Tredak committed
1
/*************************************************************************
2
 * Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
Przemek Tredak's avatar
Przemek Tredak committed
3
4
5
6
7
 *
 * See LICENSE for license information.
 ************************************************************************/

#include "common.h"
8

Przemek Tredak's avatar
Przemek Tredak committed
9
10
11
#include "transformer_engine/transformer_engine.h"

transformer_engine::DType getTransformerEngineFP8Type(bool e4m3_if_hybrid,
12
13
14
15
16
17
                                                      const std::string& fp8_recipe) {
  // if e4m3 or hybrid + forward
  if ((fp8_recipe == "E4M3") || ((fp8_recipe == "HYBRID") && e4m3_if_hybrid)) {
    return transformer_engine::DType::kFloat8E4M3;
  }
  return transformer_engine::DType::kFloat8E5M2;
Przemek Tredak's avatar
Przemek Tredak committed
18
19
20
}

transformer_engine::TensorWrapper makeTransformerEngineTensor(
21
    void* data_ptr, const NVTEShape& shape, const transformer_engine::DType type) {
Przemek Tredak's avatar
Przemek Tredak committed
22
23
24
25
  return transformer_engine::TensorWrapper(data_ptr, shape, type);
}

transformer_engine::TensorWrapper makeTransformerEngineTensor(
26
    void* data_ptr, const std::vector<size_t>& shape, const transformer_engine::DType type) {
Przemek Tredak's avatar
Przemek Tredak committed
27
28
29
30
  return transformer_engine::TensorWrapper(data_ptr, shape, type);
}

transformer_engine::TensorWrapper makeTransformerEngineTensor(at::Tensor tensor) {
31
32
  transformer_engine::DType dtype = GetTransformerEngineDType(tensor.scalar_type());
  std::vector<size_t> shape;
Przemek Tredak's avatar
Przemek Tredak committed
33

34
35
36
37
  for (auto s : tensor.sizes()) {
    shape.push_back(s);
  }
  return makeTransformerEngineTensor(tensor.data_ptr(), shape, dtype);
Przemek Tredak's avatar
Przemek Tredak committed
38
39
}

40
41
42
43
44
45
46
47
transformer_engine::TensorWrapper makeTransformerEngineTensor(void* data_ptr,
                                                              const std::vector<size_t>& shape,
                                                              const transformer_engine::DType type,
                                                              void* amax_ptr, void* scale_ptr,
                                                              void* scale_inv_ptr) {
  return transformer_engine::TensorWrapper(
      data_ptr, shape, type, reinterpret_cast<float*>(amax_ptr),
      reinterpret_cast<float*>(scale_ptr), reinterpret_cast<float*>(scale_inv_ptr));
48
49
}

50
transformer_engine::TensorWrapper makeTransformerEngineTensor(at::Tensor tensor, at::Tensor amax,
51
52
                                                              const at::Tensor scale,
                                                              at::Tensor scale_inv) {
53
54
55
56
57
58
59
60
61
62
63
64
  transformer_engine::DType dtype = GetTransformerEngineDType(tensor.scalar_type());
  std::vector<size_t> shape;

  for (auto s : tensor.sizes()) {
    shape.push_back(s);
  }
  NVTE_CHECK(amax.scalar_type() == at::kFloat);
  NVTE_CHECK(scale.scalar_type() == at::kFloat);
  NVTE_CHECK(scale_inv.scalar_type() == at::kFloat);

  return makeTransformerEngineTensor(tensor.data_ptr(), shape, dtype, amax.data_ptr(),
                                     scale.data_ptr(), scale_inv.data_ptr());
65
66
}

67
68
69
70
71
72
size_t product(const std::vector<size_t>& shape) {
  size_t ret = 1;
  for (auto s : shape) {
    ret *= s;
  }
  return ret;
Przemek Tredak's avatar
Przemek Tredak committed
73
74
}

75
at::Tensor allocateSpace(const std::vector<size_t>& shape, const transformer_engine::DType type,
cyanguwa's avatar
cyanguwa committed
76
                         bool init_to_zeros) {
77
78
79
80
81
82
83
  std::vector<int64_t> shape_int64(shape.begin(), shape.end());
  c10::IntArrayRef ar_shape(shape_int64);
  if (init_to_zeros) {
    return at::zeros(ar_shape, at::CUDA(GetATenDType(type)));
  } else {
    return at::empty(ar_shape, at::CUDA(GetATenDType(type)));
  }
cyanguwa's avatar
cyanguwa committed
84
85
}

86
at::Tensor allocateSpace(const NVTEShape& shape, const transformer_engine::DType type,
Przemek Tredak's avatar
Przemek Tredak committed
87
                         bool init_to_zeros) {
88
89
90
91
92
93
94
95
96
97
98
99
100
  auto size = shape.ndim;
  if (size == 2 && init_to_zeros) {
    return at::zeros({static_cast<int64_t>(shape.data[0]), static_cast<int64_t>(shape.data[1])},
                     at::CUDA(GetATenDType(type)));
  } else if (size == 2) {
    return at::empty({static_cast<int64_t>(shape.data[0]), static_cast<int64_t>(shape.data[1])},
                     at::CUDA(GetATenDType(type)));
  } else if (size == 1 && init_to_zeros) {
    return at::zeros({static_cast<int64_t>(shape.data[0])}, at::CUDA(GetATenDType(type)));
  } else if (size == 1) {
    return at::empty({static_cast<int64_t>(shape.data[0])}, at::CUDA(GetATenDType(type)));
  }
  NVTE_CHECK(false, "Should never reach here! func: allocateSpace");
Przemek Tredak's avatar
Przemek Tredak committed
101
102
}

103
104
105
at::Tensor allocateTorchTensor(int M, int N, transformer_engine::DType dtype) {
  return at::empty({static_cast<int64_t>(M), static_cast<int64_t>(N)},
                   at::CUDA(GetATenDType(dtype)));
Przemek Tredak's avatar
Przemek Tredak committed
106
107
}

108
109
at::Tensor allocateTorchTensor(int M, transformer_engine::DType dtype) {
  return at::empty({static_cast<int64_t>(M)}, at::CUDA(GetATenDType(dtype)));
Przemek Tredak's avatar
Przemek Tredak committed
110
}
111

112
void* getDataPtr(at::Tensor tensor, int offset) {
113
114
115
116
117
118
119
120
121
122
  void* dptr = nullptr;
  if (tensor.numel() > 0) {
    dptr = tensor.data_ptr();
  }
  if (dptr != nullptr && offset != 0) {
    char* char_ptr = reinterpret_cast<char*>(dptr);
    char_ptr += offset * tensor.element_size();
    dptr = reinterpret_cast<void*>(char_ptr);
  }
  return dptr;
123
}