Commit 3b290001 authored by rocking's avatar rocking
Browse files

Refine variable name

parent d5efa5e5
...@@ -4,16 +4,16 @@ ...@@ -4,16 +4,16 @@
#include "layernorm2d_fwd_instance_common.hpp" #include "layernorm2d_fwd_instance_common.hpp"
template <ck_tile::index_t NRepeat, template <ck_tile::index_t kNRepeat,
ck_tile::index_t kMThreadPerBlock, ck_tile::index_t kMThreadPerBlock,
ck_tile::index_t kNThreadPerBlock, ck_tile::index_t kNThreadPerBlock,
ck_tile::index_t VectorAccessSize, ck_tile::index_t kkVectorAccessSize,
bool kTwoPass> bool kTwoPass>
using t = layernorm2d_fwd_traits_<ck_tile::bf16_t, using t = layernorm2d_fwd_traits_<ck_tile::bf16_t,
NRepeat, kNRepeat,
kMThreadPerBlock, kMThreadPerBlock,
kNThreadPerBlock, kNThreadPerBlock,
VectorAccessSize, kkVectorAccessSize,
false, false,
false, false,
kTwoPass>; kTwoPass>;
......
...@@ -4,16 +4,16 @@ ...@@ -4,16 +4,16 @@
#include "layernorm2d_fwd_instance_common.hpp" #include "layernorm2d_fwd_instance_common.hpp"
template <ck_tile::index_t NRepeat, template <ck_tile::index_t kNRepeat,
ck_tile::index_t kMThreadPerBlock, ck_tile::index_t kMThreadPerBlock,
ck_tile::index_t kNThreadPerBlock, ck_tile::index_t kNThreadPerBlock,
ck_tile::index_t VectorAccessSize, ck_tile::index_t kVectorAccessSize,
bool kTwoPass> bool kTwoPass>
using t = layernorm2d_fwd_traits_<ck_tile::bf16_t, using t = layernorm2d_fwd_traits_<ck_tile::bf16_t,
NRepeat, kNRepeat,
kMThreadPerBlock, kMThreadPerBlock,
kNThreadPerBlock, kNThreadPerBlock,
VectorAccessSize, kVectorAccessSize,
true, true,
false, false,
kTwoPass>; kTwoPass>;
......
...@@ -4,16 +4,16 @@ ...@@ -4,16 +4,16 @@
#include "layernorm2d_fwd_instance_common.hpp" #include "layernorm2d_fwd_instance_common.hpp"
template <ck_tile::index_t NRepeat, template <ck_tile::index_t kNRepeat,
ck_tile::index_t kMThreadPerBlock, ck_tile::index_t kMThreadPerBlock,
ck_tile::index_t kNThreadPerBlock, ck_tile::index_t kNThreadPerBlock,
ck_tile::index_t VectorAccessSize, ck_tile::index_t kVectorAccessSize,
bool kTwoPass> bool kTwoPass>
using t = layernorm2d_fwd_traits_<ck_tile::fp16_t, using t = layernorm2d_fwd_traits_<ck_tile::fp16_t,
NRepeat, kNRepeat,
kMThreadPerBlock, kMThreadPerBlock,
kNThreadPerBlock, kNThreadPerBlock,
VectorAccessSize, kVectorAccessSize,
false, false,
false, false,
kTwoPass>; kTwoPass>;
......
...@@ -4,16 +4,16 @@ ...@@ -4,16 +4,16 @@
#include "layernorm2d_fwd_instance_common.hpp" #include "layernorm2d_fwd_instance_common.hpp"
template <ck_tile::index_t NRepeat, template <ck_tile::index_t kNRepeat,
ck_tile::index_t kMThreadPerBlock, ck_tile::index_t kMThreadPerBlock,
ck_tile::index_t kNThreadPerBlock, ck_tile::index_t kNThreadPerBlock,
ck_tile::index_t VectorAccessSize, ck_tile::index_t kVectorAccessSize,
bool kTwoPass> bool kTwoPass>
using t = layernorm2d_fwd_traits_<ck_tile::fp16_t, using t = layernorm2d_fwd_traits_<ck_tile::fp16_t,
NRepeat, kNRepeat,
kMThreadPerBlock, kMThreadPerBlock,
kNThreadPerBlock, kNThreadPerBlock,
VectorAccessSize, kVectorAccessSize,
true, true,
false, false,
kTwoPass>; kTwoPass>;
......
...@@ -51,10 +51,10 @@ struct layernorm2d_fwd_args ...@@ -51,10 +51,10 @@ struct layernorm2d_fwd_args
// this is used to pattern-match internl kernel implementation, not to instantiate kernel // this is used to pattern-match internl kernel implementation, not to instantiate kernel
template <typename DataType_, template <typename DataType_,
ck_tile::index_t NRepeat, ck_tile::index_t kNRepeat,
ck_tile::index_t kMThreadPerBlock, ck_tile::index_t kMThreadPerBlock,
ck_tile::index_t kNThreadPerBlock, ck_tile::index_t kNThreadPerBlock,
ck_tile::index_t VectorAccessSize, ck_tile::index_t kVectorAccessSize,
bool kPadN_, bool kPadN_,
bool kSaveMeanInvStd_, bool kSaveMeanInvStd_,
bool kTwoPass_> bool kTwoPass_>
...@@ -69,11 +69,11 @@ struct layernorm2d_fwd_traits_ ...@@ -69,11 +69,11 @@ struct layernorm2d_fwd_traits_
kMThreadPerBlock * kNThreadPerBlock / warpSize; kMThreadPerBlock * kNThreadPerBlock / warpSize;
// kNThreadPerBlock / 16; // kNThreadPerBlock / 16;
using thread_tile = ck_tile::sequence<MRepeat, NRepeat, VectorAccessSize>; using thread_tile = ck_tile::sequence<MRepeat, kNRepeat, kVectorAccessSize>;
using warp_tile = ck_tile::sequence<MRepeat * warpSize / kNThreadPerBlock, using warp_tile = ck_tile::sequence<MRepeat * warpSize / kNThreadPerBlock,
NRepeat * kNThreadPerBlock * VectorAccessSize>; kNRepeat * kNThreadPerBlock * kVectorAccessSize>;
using block_tile = ck_tile::sequence<kMWarpPerBlock * MRepeat * warpSize / kNThreadPerBlock, using block_tile = ck_tile::sequence<kMWarpPerBlock * MRepeat * warpSize / kNThreadPerBlock,
NRepeat * kNThreadPerBlock * VectorAccessSize>; kNRepeat * kNThreadPerBlock * kVectorAccessSize>;
using Shape = ck_tile::TileLayernorm2dShape<thread_tile, warp_tile, block_tile>; using Shape = ck_tile::TileLayernorm2dShape<thread_tile, warp_tile, block_tile>;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment