common.cu 5.09 KB
Newer Older
Przemek Tredak's avatar
Przemek Tredak committed
1
/*************************************************************************
2
 * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
Przemek Tredak's avatar
Przemek Tredak committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
 *
 * See LICENSE for license information.
 ************************************************************************/

#include "common.h"
#include "transformer_engine/transformer_engine.h"


transformer_engine::DType getTransformerEngineFP8Type(bool e4m3_if_hybrid,
                                                      const std::string &fp8_recipe) {
    // if e4m3 or hybrid + forward
    if ( (fp8_recipe == "E4M3") || ( (fp8_recipe == "HYBRID") && e4m3_if_hybrid ) ) {
        return transformer_engine::DType::kFloat8E4M3;
    }
    return transformer_engine::DType::kFloat8E5M2;
}

transformer_engine::TensorWrapper makeTransformerEngineTensor(
    void* data_ptr,
    const NVTEShape& shape,
    const transformer_engine::DType type) {
  return transformer_engine::TensorWrapper(data_ptr, shape, type);
}


transformer_engine::TensorWrapper makeTransformerEngineTensor(
    void* data_ptr,
    const std::vector<size_t>& shape,
    const transformer_engine::DType type) {
  return transformer_engine::TensorWrapper(data_ptr, shape, type);
}


transformer_engine::TensorWrapper makeTransformerEngineTensor(at::Tensor tensor) {
    transformer_engine::DType dtype = GetTransformerEngineDType(tensor.scalar_type());
    std::vector<size_t> shape;

    for (auto s : tensor.sizes()) {
        shape.push_back(s);
    }
    return makeTransformerEngineTensor(tensor.data_ptr(), shape, dtype);
}


47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
transformer_engine::TensorWrapper makeTransformerEngineTensor(
    void* data_ptr,
    const std::vector<size_t>& shape,
    const transformer_engine::DType type,
    void* amax_ptr,
    void* scale_ptr,
    void* scale_inv_ptr) {
  return transformer_engine::TensorWrapper(data_ptr, shape, type,
                                           reinterpret_cast<float*>(amax_ptr),
                                           reinterpret_cast<float*>(scale_ptr),
                                           reinterpret_cast<float*>(scale_inv_ptr));
}


transformer_engine::TensorWrapper makeTransformerEngineTensor(at::Tensor tensor,
                                                              at::Tensor amax,
                                                              const at::Tensor scale,
                                                              at::Tensor scale_inv) {
    transformer_engine::DType dtype = GetTransformerEngineDType(tensor.scalar_type());
    std::vector<size_t> shape;

    for (auto s : tensor.sizes()) {
        shape.push_back(s);
    }
    NVTE_CHECK(amax.scalar_type() == at::kFloat);
    NVTE_CHECK(scale.scalar_type() == at::kFloat);
    NVTE_CHECK(scale_inv.scalar_type() == at::kFloat);

    return makeTransformerEngineTensor(tensor.data_ptr(), shape, dtype,
                                       amax.data_ptr(),
                                       scale.data_ptr(),
                                       scale_inv.data_ptr());
}


Przemek Tredak's avatar
Przemek Tredak committed
82
83
84
85
86
87
88
89
90
size_t product(const std::vector<size_t> &shape) {
    size_t ret = 1;
    for (auto s : shape) {
        ret *= s;
    }
    return ret;
}


cyanguwa's avatar
cyanguwa committed
91
92
93
94
95
96
97
98
99
100
101
102
103
at::Tensor allocateSpace(const std::vector<size_t>& shape,
                         const transformer_engine::DType type,
                         bool init_to_zeros) {
    std::vector<int64_t> shape_int64(shape.begin(), shape.end());
    c10::IntArrayRef ar_shape(shape_int64);
    if (init_to_zeros) {
        return at::zeros(ar_shape, at::CUDA(GetATenDType(type)));
    } else {
        return at::empty(ar_shape, at::CUDA(GetATenDType(type)));
    }
}


Przemek Tredak's avatar
Przemek Tredak committed
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
at::Tensor allocateSpace(const NVTEShape &shape,
                         const transformer_engine::DType type,
                         bool init_to_zeros) {
    auto size = shape.ndim;
    if (size == 2 && init_to_zeros) {
        return at::zeros({static_cast<int64_t>(shape.data[0]),
                          static_cast<int64_t>(shape.data[1])},
                          at::CUDA(GetATenDType(type)));
    } else if (size == 2) {
        return at::empty({static_cast<int64_t>(shape.data[0]),
                          static_cast<int64_t>(shape.data[1])},
                          at::CUDA(GetATenDType(type)));
    } else if (size == 1 && init_to_zeros) {
        return at::zeros({static_cast<int64_t>(shape.data[0])}, at::CUDA(GetATenDType(type)));
    } else if (size == 1) {
        return at::empty({static_cast<int64_t>(shape.data[0])}, at::CUDA(GetATenDType(type)));
    }
    NVTE_CHECK(false, "Should never reach here! func: allocateSpace");
}


at::Tensor allocateTorchTensor(int M,
                               int N,
                               transformer_engine::DType dtype
) {
    return at::empty({static_cast<int64_t>(M), static_cast<int64_t>(N)},
                     at::CUDA(GetATenDType(dtype)));
}


at::Tensor allocateTorchTensor(int M,
                               transformer_engine::DType dtype
) {
    return at::empty({static_cast<int64_t>(M)},
                     at::CUDA(GetATenDType(dtype)));
}
140
141
142
143
144
145
146
147

void *getDataPtr(at::Tensor t) {
    if (t.numel() > 0) {
        return t.data_ptr();
    } else {
        return nullptr;
    }
}