inject_fence_proxy.cc 5.89 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

/*!
 * \file inject_fence_proxy.cc
 * \brief Inject fence between generic and async proxies (sm90+)
 */

25
#include <tvm/ffi/reflection/registry.h>
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#include <tvm/tir/analysis.h>
#include <tvm/tir/builtin.h>
#include <tvm/tir/op.h>
#include <tvm/tir/stmt_functor.h>
#include <tvm/tir/transform.h>

#include "../op/builtin.h"

namespace tvm {
namespace tl {

using namespace tir;

enum class Proxy { kGeneric, kAsync, kBoth };

class ProxyMarker : public StmtVisitor {
42
public:
43
44
  ProxyMarker() = default;

45
  Proxy GetProxy(const StmtNode *stmt) const {
46
47
48
49
50
51
52
53
54
    auto it = map_.find(stmt);
    // ICHECK(it != map_.end());
    // TODO: This is a hack implementation to avoid the ICHECK failure.
    if (it == map_.end()) {
      return Proxy::kGeneric;
    }
    return it->second;
  }

55
  Proxy GetProxy(const Stmt &stmt) const { return GetProxy(stmt.get()); }
56

57
  void VisitStmt_(const EvaluateNode *op) final {
58
59
    Proxy proxy = Proxy::kAsync;
    if (auto call = op->value.as<CallNode>()) {
60
61
      if (call->op.same_as(ptx_ldmatirx()) ||
          call->op.same_as(ptx_stmatirx())) {
62
63
64
65
66
67
        proxy = Proxy::kGeneric;
      }
    }
    SetProxy(op, proxy);
  }

68
  void VisitStmt_(const BufferStoreNode *op) final {
69
70
71
72
    Proxy proxy = Proxy::kGeneric;
    SetProxy(op, proxy);
  }

73
  void VisitStmt_(const SeqStmtNode *op) final {
74
75
76
77
78
79
80
81
82
83
84
    StmtVisitor::VisitStmt_(op);
    auto role = GetProxy(op->seq[0]);
    for (auto stmt : op->seq) {
      if (role != GetProxy(stmt)) {
        role = Proxy::kBoth;
        break;
      }
    }
    SetProxy(op, role);
  }

85
  void VisitStmt_(const IfThenElseNode *op) final {
86
87
88
89
    StmtVisitor::VisitStmt_(op);
    auto role = GetProxy(op->then_case);
    if (op->else_case.defined()) {
      auto role_else = GetProxy(op->else_case.value());
90
91
      if (role != role_else)
        role = Proxy::kBoth;
92
93
94
95
    }
    SetProxy(op, role);
  }

96
  void VisitStmt_(const BlockRealizeNode *op) final {
97
98
99
100
    StmtVisitor::VisitStmt_(op);
    SetProxy(op, GetProxy(op->block));
  }

101
  template <class NodeType> void HandleBodyStmt(const NodeType *op) {
102
103
104
105
    StmtVisitor::VisitStmt_(op);
    SetProxy(op, GetProxy(op->body));
  }

106
107
108
109
110
  void VisitStmt_(const ForNode *op) final { HandleBodyStmt(op); }
  void VisitStmt_(const LetStmtNode *op) final { HandleBodyStmt(op); }
  void VisitStmt_(const AttrStmtNode *op) final { HandleBodyStmt(op); }
  void VisitStmt_(const AssertStmtNode *op) final { HandleBodyStmt(op); }
  void VisitStmt_(const BlockNode *op) final { HandleBodyStmt(op); }
111

112
113
114
private:
  void SetProxy(const StmtNode *stmt, Proxy proxy) { map_[stmt] = proxy; }
  std::unordered_map<const StmtNode *, Proxy> map_;
115
116
};

117
118
119
120
121
122
123
124
125
126
127
class TMAStoreSyncInjector : public StmtExprMutator {
public:
  static PrimFunc Substitute(PrimFunc f) {
    auto T = TMAStoreSyncInjector();
    f.CopyOnWrite()->body = T(f->body);
    return f;
  }

private:
  Stmt VisitStmt_(const EvaluateNode *op) final {
    if (auto call = op->value.as<CallNode>()) {
128
      if (call->op.same_as(tma_store())) {
129
130
131
        Array<Stmt> new_body;
        new_body.push_back(GetRef<Evaluate>(op));
        new_body.push_back(
132
            Evaluate(Call(DataType::Handle(), tma_store_arrive(), {})));
133
        new_body.push_back(
134
            Evaluate(Call(DataType::Handle(), tma_store_wait(), {})));
135
136
137
138
139
140
141
        return SeqStmt(std::move(new_body));
      }
    }
    return StmtExprMutator::VisitStmt_(op);
  }
};

142
class InjectFenceProxy : public StmtExprMutator {
143
public:
144
145
146
147
148
149
  static PrimFunc Substitute(PrimFunc f) {
    auto T = InjectFenceProxy();
    f.CopyOnWrite()->body = T(f->body);
    return f;
  }

150
151
private:
  Proxy get_generic_proxy(const Stmt &stmt) {
152
153
154
155
156
    auto marker = ProxyMarker();
    marker(stmt);
    return marker.GetProxy(stmt);
  }

157
  Stmt VisitStmt_(const SeqStmtNode *op) final {
158
159
160
    ICHECK(op->seq.size() > 0);
    Array<Stmt> new_body;
    Proxy cur_proxy, prev_proxy;
161
    auto fence_stmt =
162
        Evaluate(Call(DataType::Handle(), fence_proxy_async(), {}));
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
    prev_proxy = get_generic_proxy(op->seq[0]);
    new_body.push_back(VisitStmt(op->seq[0]));
    if (op->seq.size() > 1) {
      for (int i = 1; i < static_cast<int>(op->seq.size()); i++) {
        cur_proxy = get_generic_proxy(op->seq[i]);
        if (cur_proxy == Proxy::kAsync && prev_proxy == Proxy::kGeneric) {
          new_body.push_back(fence_stmt);
        }
        new_body.push_back(VisitStmt(op->seq[i]));
        prev_proxy = cur_proxy;
      }
    }
    ICHECK(new_body.size() > 0);
    return new_body.size() == 1 ? new_body[0] : SeqStmt(std::move(new_body));
  }

  // Stmt VisitStmt_(const ForNode* op) final {
  //   std::cout << "ForNode:" << op->body->GetTypeKey() << std::endl;
  //   return StmtExprMutator::VisitStmt_(op);
  // }

  InjectFenceProxy() = default;
};

using namespace tir::transform;

tvm::transform::Pass InjectFenceProxy() {
  auto pass_func = [=](PrimFunc f, IRModule m, PassContext ctx) {
191
    f = TMAStoreSyncInjector::Substitute(f);
192
193
194
195
196
    return InjectFenceProxy::Substitute(f);
  };
  return CreatePrimFuncPass(pass_func, 0, "tl.InjectFenceProxy", {});
}

197
198
199
200
TVM_FFI_STATIC_INIT_BLOCK({
  namespace refl = tvm::ffi::reflection;
  refl::GlobalDef().def("tl.transform.InjectFenceProxy", InjectFenceProxy);
});
201

202
203
} // namespace tl
} // namespace tvm