Commit f625455c authored by Andriy Roshchenko's avatar Andriy Roshchenko
Browse files

Refactor static loop.

parent 128685c3
...@@ -129,10 +129,12 @@ __device__ AFragT load_A_col_major(AType const* input_ptr) ...@@ -129,10 +129,12 @@ __device__ AFragT load_A_col_major(AType const* input_ptr)
bit_cast<ARawT>(input_ptr[startOffset + 31 * kOffset])}; // XXX v[31] = Reg 7 [24:31] bit_cast<ARawT>(input_ptr[startOffset + 31 * kOffset])}; // XXX v[31] = Reg 7 [24:31]
#else #else
auto fragA = AScalarFragT{}; auto fragA = AScalarFragT{};
static_for<0, VW, 1>{}([&](auto i) { #pragma unroll VW
fragA[static_cast<int>(i)] = for(uint32_t i = 0; i < VW; i++)
bit_cast<ARawT>(input_ptr[startOffset + static_cast<int>(i) * kOffset]); {
}); fragA[i] = bit_cast<ARawT>(input_ptr[startOffset + i * kOffset]);
}
#endif #endif
return fragA; return fragA;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment