gemm.h 4.49 KB
Newer Older
1
2
3
4
5
6
7
8
9
/*!
 * \file tl/op/gemm.h
 * \brief Define gemm operator.
 *
 */

#ifndef TVM_TL_OP_GEMM_H_
#define TVM_TL_OP_GEMM_H_

10
#include "operator.h"
11
12

namespace tvm {
13

14
15
16
17
namespace tl {

using namespace tir;

18
enum class GemmWarpPolicyType : uint8_t {
19
20
21
  kSquare = 0,
  kFullRow = 1,
  kFullCol = 2,
22
23
24
  kFree = 3,
};

25
26
// Target GEMM instruction
enum class GemmInst : uint8_t { kMMA, kWGMMA, kTCGEN5MMA, kMFMA };
27
28
29
30
31
32
class GemmWarpPolicyNode : public Object {
public:
  mutable int m_warp{0};
  mutable int n_warp{0};
  int policy_type;

33
  TVM_FFI_DECLARE_OBJECT_INFO("tl.GemmWarpPolicy", GemmWarpPolicyNode, Object);
34
35
36
37
38
39
40
41
42
43

  static void RegisterReflection() {
    namespace refl = tvm::ffi::reflection;
    refl::ObjectDef<GemmWarpPolicyNode>()
        .def_ro("policy_type", &GemmWarpPolicyNode::policy_type)
        .def_ro("m_warp", &GemmWarpPolicyNode::m_warp)
        .def_ro("n_warp", &GemmWarpPolicyNode::n_warp);
  }

  std::pair<int, int> ComputeWarpPartition(int M, int N, int block_size,
44
45
                                           Target target,
                                           GemmInst gemm_inst) const;
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

  bool isSquare() const {
    return policy_type == int(GemmWarpPolicyType::kSquare);
  }
  bool isFullRow() const {
    return policy_type == int(GemmWarpPolicyType::kFullRow);
  }
  bool isFullCol() const {
    return policy_type == int(GemmWarpPolicyType::kFullCol);
  }
  bool isFree() const { return policy_type == int(GemmWarpPolicyType::kFree); }
};

class GemmWarpPolicy : public ObjectRef {
public:
61
62
  TVM_FFI_DEFINE_OBJECT_REF_METHODS_NULLABLE(GemmWarpPolicy, ObjectRef,
                                             GemmWarpPolicyNode);
63
64

  explicit GemmWarpPolicy(GemmWarpPolicyType policy_type) {
65
    auto node = tvm::ffi::make_object<GemmWarpPolicyNode>();
66
67
68
69
70
    node->policy_type = (int)policy_type;
    data_ = std::move(node);
  }

  explicit GemmWarpPolicy(int policy_type) {
71
    auto node = tvm::ffi::make_object<GemmWarpPolicyNode>();
72
73
74
75
76
    node->policy_type = policy_type;
    data_ = std::move(node);
  }

  explicit GemmWarpPolicy(int m_warp, int n_warp) {
77
    auto node = tvm::ffi::make_object<GemmWarpPolicyNode>();
78
79
80
81
82
    node->m_warp = m_warp;
    node->n_warp = n_warp;
    node->policy_type = (int)GemmWarpPolicyType::kFree;
    data_ = std::move(node);
  }
83
};
84

85
86
class GemmNode : public TileOperatorNode {
public:
87
  bool CheckWGMMA() const;
88
  tir::Buffer A, B, C;
89
90
  // pointer to the A, B, C
  PrimExpr Aptr, Bptr, Cptr;
91
92
  bool trans_A, trans_B;
  int M, N, K;
93
94
  int stride_A, stride_B;
  int offset_A, offset_B;
95
  PrimExpr clear_accum = const_false();
96
97
  // k_pack please ref to bitblas/tl/mfma_macro_generator.py::k_pack
  // only will be enabled under cdna mfma instructions
98
  int kPack = 1;
99
  int wg_wait = 0;
100
101
102
  PrimExpr mbarptr;
  std::optional<tir::Buffer> mbar; // mbar is optional, only used for TCGEN5MMA
  Array<PrimExpr> C_coords;
103
  mutable GemmWarpPolicy policy;
104
  TVM_FFI_DECLARE_OBJECT_INFO_FINAL("tl.Gemm", GemmNode, TileOperatorNode);
105

106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
  static void RegisterReflection() {
    namespace refl = tvm::ffi::reflection;
    refl::ObjectDef<GemmNode>()
        .def_ro("A", &GemmNode::A)
        .def_ro("B", &GemmNode::B)
        .def_ro("C", &GemmNode::C)
        .def_ro("Aptr", &GemmNode::Aptr)
        .def_ro("Bptr", &GemmNode::Bptr)
        .def_ro("Cptr", &GemmNode::Cptr)
        .def_ro("trans_A", &GemmNode::trans_A)
        .def_ro("trans_B", &GemmNode::trans_B)
        .def_ro("M", &GemmNode::M)
        .def_ro("N", &GemmNode::N)
        .def_ro("K", &GemmNode::K)
        .def_ro("stride_A", &GemmNode::stride_A)
        .def_ro("stride_B", &GemmNode::stride_B)
        .def_ro("offset_A", &GemmNode::offset_A)
        .def_ro("offset_B", &GemmNode::offset_B)
        .def_ro("clear_accum", &GemmNode::clear_accum)
        .def_ro("kPack", &GemmNode::kPack)
        .def_ro("wg_wait", &GemmNode::wg_wait)
        .def_ro("policy", &GemmNode::policy);
  }

130
131
132
133
134
135
136
137
  Stmt Lower(const LowerArgs &T, arith::Analyzer *analyzer) const override;
  LayoutMap InferLayout(const LayoutInferArgs &T,
                        InferLevel level) const override;

  TileOperator Clone() const;

private:
  GemmInst GetGemmInst(int block_size, Target target) const;
138
139
  bool AllowTCGEN5MMA(Target target) const;
  bool AllowWGMMA(int block_size, Target target) const;
140
141
142
143
144
145

  mutable bool completed_ = false;
};

class Gemm : public TileOperator {
public:
146
  TVM_FFI_DEFINE_OBJECT_REF_METHODS_NULLABLE(Gemm, TileOperator, GemmNode);
147
148
  TVM_DLL Gemm(Array<PrimExpr> args, BufferMap vmap);
  static const Op &Get();
149
150
};

151
152
} // namespace tl
} // namespace tvm
153

154
#endif //  TVM_TL_OP_GEMM_H_