Commit 2a768425 authored by Mateusz Ozga's avatar Mateusz Ozga
Browse files

Merge remote-tracking branch 'origin/develop' into mozga-amd/universal_gemm_weight

parents f358055f 1c45ca35
...@@ -43,8 +43,6 @@ struct TileFmhaShape ...@@ -43,8 +43,6 @@ struct TileFmhaShape
static constexpr index_t NumWarps = max(NumGemm0Warps, NumGemm1Warps); static constexpr index_t NumWarps = max(NumGemm0Warps, NumGemm1Warps);
static_assert(std::is_same_v<Gemm0WarpTile, Gemm1WarpTile>);
static constexpr index_t kM0 = BlockTile::at(number<0>{}); // tile size along q seqlen static constexpr index_t kM0 = BlockTile::at(number<0>{}); // tile size along q seqlen
static constexpr index_t kN0 = BlockTile::at(number<1>{}); // tile size along k seqlen static constexpr index_t kN0 = BlockTile::at(number<1>{}); // tile size along k seqlen
static constexpr index_t kK0 = BlockTile::at(number<2>{}); // tile size along qk gemm unroll static constexpr index_t kK0 = BlockTile::at(number<2>{}); // tile size along qk gemm unroll
......
This diff is collapsed.
...@@ -77,7 +77,7 @@ bool profile_grouped_gemm_impl(int do_verification, ...@@ -77,7 +77,7 @@ bool profile_grouped_gemm_impl(int do_verification,
std::vector<Tensor<CDataType>> c_m_n_host_results; std::vector<Tensor<CDataType>> c_m_n_host_results;
std::vector<Tensor<CDataType>> c_m_n_device_results; std::vector<Tensor<CDataType>> c_m_n_device_results;
ComputeDataType max_abs_in_val = 0.f; double max_abs_in_val = 0.f;
for(std::size_t i = 0; i < group_count; i++) for(std::size_t i = 0; i < group_count; i++)
{ {
a_m_k.push_back( a_m_k.push_back(
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment