Commit 009cce41 authored by carlushuang's avatar carlushuang
Browse files

opt loading

parent d56b41fd
...@@ -145,8 +145,10 @@ struct Layernorm2dFwd ...@@ -145,8 +145,10 @@ struct Layernorm2dFwd
number<Vector_N>{}, number<Vector_N>{},
number<1>{}); number<1>{});
// NOTE: we don't do any pad in this kernel for loading, assume that inside kernel will
// check the max count dynamically
const auto tmp2_ = pad_tensor_view( const auto tmp2_ = pad_tensor_view(
tmp_, make_tuple(number<Block_M>{}, number<Block_N>{}), sequence<kPadM, kPadN>{}); tmp_, make_tuple(number<Block_M>{}, number<Block_N>{}), sequence<false, false>{});
return make_tile_window( return make_tile_window(
tmp2_, make_tuple(number<Block_M>{}, number<Block_N>{}), {iM, 0}); tmp2_, make_tuple(number<Block_M>{}, number<Block_N>{}), {iM, 0});
}(); }();
...@@ -160,7 +162,7 @@ struct Layernorm2dFwd ...@@ -160,7 +162,7 @@ struct Layernorm2dFwd
number<1>{}); number<1>{});
const auto tmp2_ = const auto tmp2_ =
pad_tensor_view(tmp_, make_tuple(number<Block_N>{}), sequence<kPadN>{}); pad_tensor_view(tmp_, make_tuple(number<Block_N>{}), sequence<false>{});
return make_tile_window(tmp2_, make_tuple(number<Block_N>{}), {0}); return make_tile_window(tmp2_, make_tuple(number<Block_N>{}), {0});
}(); }();
...@@ -174,7 +176,7 @@ struct Layernorm2dFwd ...@@ -174,7 +176,7 @@ struct Layernorm2dFwd
number<1>{}); number<1>{});
const auto tmp2_ = const auto tmp2_ =
pad_tensor_view(tmp_, make_tuple(number<Block_N>{}), sequence<kPadN>{}); pad_tensor_view(tmp_, make_tuple(number<Block_N>{}), sequence<false>{});
return make_tile_window(tmp2_, make_tuple(number<Block_M>{}, number<Block_N>{}), {0}); return make_tile_window(tmp2_, make_tuple(number<Block_M>{}, number<Block_N>{}), {0});
}(); }();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment