atomic_add.h 3.31 KB
Newer Older
1
2
/*!
 * \file tl/op/atomic_add.h
3
 * \brief Atomic addition operations for concurrent memory updates
4
5
6
7
8
 */

#ifndef TVM_TL_OP_ATOMIC_ADD_H_
#define TVM_TL_OP_ATOMIC_ADD_H_

9
#include "operator.h"
10
11
12
13
14
15
16
#include "parallel.h"

namespace tvm {
namespace tl {

using namespace tir;

17
/// Node class for atomic addition operations
18
class AtomicAddNode : public TileOperatorNode {
19
public:
20
21
22
  Buffer src, dst; ///< Source and destination buffers
  Array<Range> src_range,
      dst_range;          ///< Access ranges for source and destination
23
  IntImm use_tma;         ///< Whether to use TMA for memory operations
24
  IntImm coalesced_width; ///< Width for memory coalescing optimization
25
  IntImm memory_order;    ///< Memory order for atomic operations
26

27
  mutable ParallelOp par_op_; ///< Associated parallel operation
28
29
30
31
32
33
34
35
  static constexpr const char *_type_key = "tl.AtomicAdd";
  TVM_DECLARE_FINAL_OBJECT_INFO(AtomicAddNode, TileOperatorNode);

  Stmt Lower(const LowerArgs &T, arith::Analyzer *analyzer) const;
  LayoutMap InferLayout(const LayoutInferArgs &T, InferLevel level) const;

  static const Op &Get();
  TileOperator Clone() const;
36

37
38
39
40
41
42
43
  static void RegisterReflection() {
    namespace refl = tvm::ffi::reflection;
    refl::ObjectDef<AtomicAddNode>()
        .def_ro("src", &AtomicAddNode::src)
        .def_ro("dst", &AtomicAddNode::dst)
        .def_ro("src_range", &AtomicAddNode::src_range)
        .def_ro("dst_range", &AtomicAddNode::dst_range)
44
        .def_ro("use_tma", &AtomicAddNode::use_tma)
45
46
        .def_ro("coalesced_width", &AtomicAddNode::coalesced_width)
        .def_ro("memory_order", &AtomicAddNode::memory_order);
47
48
49
50
51
52
  }

  bool SEqualReduce(const AtomicAddNode *other, SEqualReducer equal) const {
    return equal(src, other->src) && equal(dst, other->dst) &&
           equal(src_range, other->src_range) &&
           equal(dst_range, other->dst_range) &&
53
           equal(use_tma, other->use_tma) &&
54
55
           equal(coalesced_width, other->coalesced_width) &&
           equal(memory_order, other->memory_order);
56
57
58
59
60
61
62
  }

  void SHashReduce(SHashReducer hash_reduce) const {
    hash_reduce(src);
    hash_reduce(dst);
    hash_reduce(src_range);
    hash_reduce(dst_range);
63
    hash_reduce(use_tma);
64
    hash_reduce(coalesced_width);
65
    hash_reduce(memory_order);
66
67
68
69
70
  }

  static constexpr bool _type_has_method_sequal_reduce = true;
  static constexpr bool _type_has_method_shash_reduce = true;

71
protected:
72
  /// Create SIMT-style parallel loop structure
73
  For MakeSIMTLoop(arith::Analyzer *analyzer) const;
74
  /// Generate iteration variables for loop nest
75
  Array<IterVar> MakeIterVars() const;
76
  /// Generate buffer indices from iteration variables
77
  Array<PrimExpr> MakeIndices(const Array<IterVar> &ivs, int src_dst) const;
78
79
  /// Return buffer indices and size
  std::pair<Array<PrimExpr>, PrimExpr> ReturnIndicesAndSize(int src_dst) const;
80
  /// Create boundary predicate for memory safety
81
82
  PrimExpr MakePredicate(arith::Analyzer *analyzer, const Array<IterVar> &ivs,
                         Array<PrimExpr> extents, int src_dst) const;
83
};
84

85
/// Wrapper class for atomic addition operations
86
87
88
89
90
class AtomicAdd : public TileOperator {
public:
  TVM_DEFINE_OBJECT_REF_METHODS(AtomicAdd, TileOperator, AtomicAddNode);
  TVM_DLL AtomicAdd(Array<PrimExpr> args, BufferMap vmap);
  static const Op &Get();
91
92
93
94
95
96
};

} // namespace tl
} // namespace tvm

#endif //  TVM_TL_OP_ATOMIC_ADD_H_