Commit 10b6f3a8 authored by skrider's avatar skrider Committed by Woosuk Kwon
Browse files

revert hardcoded rotcossin thread layout

parent 166f33fd
...@@ -158,9 +158,7 @@ struct Flash_fwd_kernel_traits : public Base { ...@@ -158,9 +158,7 @@ struct Flash_fwd_kernel_traits : public Base {
make_tiled_copy(Copy_Atom<DefaultCopy, ElementAccum>{}, make_tiled_copy(Copy_Atom<DefaultCopy, ElementAccum>{},
GmemLayoutAtomOaccum{}, GmemLayoutAtomOaccum{},
Layout<Shape < _1, _4>>{})); // Val layout, 4 vals per store Layout<Shape < _1, _4>>{})); // Val layout, 4 vals per store
// using GmemLayoutAtomRotcossin = GmemLayoutAtom; using GmemLayoutAtomRotcossin = GmemLayoutAtom;
using GmemLayoutAtomRotcossin = Layout<Shape <Int<kNThreads / kGmemThreadsPerRow>, Int<kGmemThreadsPerRow>>,
Stride<Int<kGmemThreadsPerRow>, _1>>;
using GmemTiledCopyRotcossin = decltype( using GmemTiledCopyRotcossin = decltype(
make_tiled_copy(Copy_Atom<UniversalCopy<uint64_t>, Element>{}, make_tiled_copy(Copy_Atom<UniversalCopy<uint64_t>, Element>{},
GmemLayoutAtomRotcossin{}, GmemLayoutAtomRotcossin{},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment