"docs/source/ko/vscode:/vscode.git/clone" did not exist on "30e5e81d58eb9c3979c07e6626bae89c1df8c0e1"
Commit 231e3f8d authored by Bartlomiej Wroblewski's avatar Bartlomiej Wroblewski
Browse files

Review: Remove unnecessary empty lines

parent 7863805d
...@@ -71,23 +71,17 @@ struct BlockwiseGemmDpp_k0mk1_k0nk1_m0n0m1n1m2n2 ...@@ -71,23 +71,17 @@ struct BlockwiseGemmDpp_k0mk1_k0nk1_m0n0m1n1m2n2
__device__ static auto CalculateAThreadOriginDataIndex() __device__ static auto CalculateAThreadOriginDataIndex()
{ {
const auto wave_idx = GetWaveIdx(); const auto wave_idx = GetWaveIdx();
const auto waveId_m = wave_idx[I0];
const auto waveId_m = wave_idx[I0];
const auto dpp_a_idx = dpp_gemm.CalculateAThreadOriginDataIndex(); const auto dpp_a_idx = dpp_gemm.CalculateAThreadOriginDataIndex();
return make_tuple(0, waveId_m, dpp_a_idx[I1], KPerThread * dpp_a_idx[I0]); return make_tuple(0, waveId_m, dpp_a_idx[I1], KPerThread * dpp_a_idx[I0]);
} }
__device__ static auto CalculateBThreadOriginDataIndex() __device__ static auto CalculateBThreadOriginDataIndex()
{ {
const auto wave_idx = GetWaveIdx(); const auto wave_idx = GetWaveIdx();
const auto waveId_n = wave_idx[I1];
const auto waveId_n = wave_idx[I1];
const auto dpp_b_idx = dpp_gemm.CalculateBThreadOriginDataIndex(); const auto dpp_b_idx = dpp_gemm.CalculateBThreadOriginDataIndex();
return make_tuple(0, waveId_n, dpp_b_idx[I1], KPerThread * dpp_b_idx[I0]); return make_tuple(0, waveId_n, dpp_b_idx[I1], KPerThread * dpp_b_idx[I0]);
} }
...@@ -95,11 +89,9 @@ struct BlockwiseGemmDpp_k0mk1_k0nk1_m0n0m1n1m2n2 ...@@ -95,11 +89,9 @@ struct BlockwiseGemmDpp_k0mk1_k0nk1_m0n0m1n1m2n2
__device__ static auto CalculateCThreadOriginDataIndex(Number<m0>, Number<n0>) __device__ static auto CalculateCThreadOriginDataIndex(Number<m0>, Number<n0>)
{ {
const auto wave_idx = GetWaveIdx(); const auto wave_idx = GetWaveIdx();
const auto waveId_m = wave_idx[I0]; const auto waveId_m = wave_idx[I0];
const auto waveId_n = wave_idx[I1]; const auto waveId_n = wave_idx[I1];
const auto blk_idx = dpp_gemm.GetBeginOfThreadBlk();
const auto blk_idx = dpp_gemm.GetBeginOfThreadBlk();
constexpr auto mrepeat_mwave_MPerDpp_to_m_adaptor = make_single_stage_tensor_adaptor( constexpr auto mrepeat_mwave_MPerDpp_to_m_adaptor = make_single_stage_tensor_adaptor(
make_tuple(make_unmerge_transform(make_tuple(MRepeat, MWaves, MPerDpp))), make_tuple(make_unmerge_transform(make_tuple(MRepeat, MWaves, MPerDpp))),
...@@ -141,9 +133,8 @@ struct BlockwiseGemmDpp_k0mk1_k0nk1_m0n0m1n1m2n2 ...@@ -141,9 +133,8 @@ struct BlockwiseGemmDpp_k0mk1_k0nk1_m0n0m1n1m2n2
__host__ __device__ static constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_N2() __host__ __device__ static constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_N2()
{ {
constexpr auto c_m0_m1_m2_n_tblk_lens = dpp_gemm.GetCMNThreadBlkLengths(); constexpr auto c_m0_m1_m2_n_tblk_lens = dpp_gemm.GetCMNThreadBlkLengths();
constexpr auto M0 = c_m0_m1_m2_n_tblk_lens[I0];
constexpr auto M0 = c_m0_m1_m2_n_tblk_lens[I0]; constexpr auto N = c_m0_m1_m2_n_tblk_lens[I1];
constexpr auto N = c_m0_m1_m2_n_tblk_lens[I1];
return make_naive_tensor_descriptor_packed( return make_naive_tensor_descriptor_packed(
make_tuple(Number<MRepeat>{}, Number<NRepeat>{}, I1, I1, M0, N)); make_tuple(Number<MRepeat>{}, Number<NRepeat>{}, I1, I1, M0, N));
...@@ -152,9 +143,8 @@ struct BlockwiseGemmDpp_k0mk1_k0nk1_m0n0m1n1m2n2 ...@@ -152,9 +143,8 @@ struct BlockwiseGemmDpp_k0mk1_k0nk1_m0n0m1n1m2n2
__host__ __device__ static constexpr auto GetCThreadDescriptor_G_M0_N0_M1_N1_M2_N2() __host__ __device__ static constexpr auto GetCThreadDescriptor_G_M0_N0_M1_N1_M2_N2()
{ {
constexpr auto c_m0_m1_m2_n_tblk_lens = dpp_gemm.GetCMNThreadBlkLengths(); constexpr auto c_m0_m1_m2_n_tblk_lens = dpp_gemm.GetCMNThreadBlkLengths();
constexpr auto M0 = c_m0_m1_m2_n_tblk_lens[I0];
constexpr auto M0 = c_m0_m1_m2_n_tblk_lens[I0]; constexpr auto N = c_m0_m1_m2_n_tblk_lens[I1];
constexpr auto N = c_m0_m1_m2_n_tblk_lens[I1];
return make_naive_tensor_descriptor_packed( return make_naive_tensor_descriptor_packed(
make_tuple(I1, Number<MRepeat>{}, Number<NRepeat>{}, I1, I1, M0, N)); make_tuple(I1, Number<MRepeat>{}, Number<NRepeat>{}, I1, I1, M0, N));
......
...@@ -243,14 +243,11 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_dpp ...@@ -243,14 +243,11 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_dpp
{ {
// LDS allocation for A and B: be careful of alignment // LDS allocation for A and B: be careful of alignment
constexpr auto a_block_desc_k0_m_k1 = GetABlockDescriptor_K0PerBlock_MPerBlock_K1(); constexpr auto a_block_desc_k0_m_k1 = GetABlockDescriptor_K0PerBlock_MPerBlock_K1();
constexpr auto b_block_desc_k0_n_k1 = GetBBlockDescriptor_K0PerBlock_NPerBlock_K1(); constexpr auto b_block_desc_k0_n_k1 = GetBBlockDescriptor_K0PerBlock_NPerBlock_K1();
constexpr auto max_lds_align = K1;
constexpr auto max_lds_align = K1;
constexpr auto a_block_space_size_aligned = constexpr auto a_block_space_size_aligned =
math::integer_least_multiple(a_block_desc_k0_m_k1.GetElementSpaceSize(), max_lds_align); math::integer_least_multiple(a_block_desc_k0_m_k1.GetElementSpaceSize(), max_lds_align);
constexpr auto b_block_space_size_aligned = constexpr auto b_block_space_size_aligned =
math::integer_least_multiple(b_block_desc_k0_n_k1.GetElementSpaceSize(), max_lds_align); math::integer_least_multiple(b_block_desc_k0_n_k1.GetElementSpaceSize(), max_lds_align);
...@@ -539,7 +536,6 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_dpp ...@@ -539,7 +536,6 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_dpp
// HACK: this force m/n_block_data_idx_on_grid into SGPR // HACK: this force m/n_block_data_idx_on_grid into SGPR
const index_t m_block_data_idx_on_grid = const index_t m_block_data_idx_on_grid =
__builtin_amdgcn_readfirstlane(block_work_idx[I0] * MPerBlock); __builtin_amdgcn_readfirstlane(block_work_idx[I0] * MPerBlock);
const index_t n_block_data_idx_on_grid = const index_t n_block_data_idx_on_grid =
__builtin_amdgcn_readfirstlane(block_work_idx[I1] * NPerBlock); __builtin_amdgcn_readfirstlane(block_work_idx[I1] * NPerBlock);
...@@ -547,7 +543,6 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_dpp ...@@ -547,7 +543,6 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_dpp
// A matrix in LDS memory, dst of blockwise copy // A matrix in LDS memory, dst of blockwise copy
constexpr auto a_block_desc_k0_m_k1 = GetABlockDescriptor_K0PerBlock_MPerBlock_K1(); constexpr auto a_block_desc_k0_m_k1 = GetABlockDescriptor_K0PerBlock_MPerBlock_K1();
// B matrix in LDS memory, dst of blockwise copy // B matrix in LDS memory, dst of blockwise copy
constexpr auto b_block_desc_k0_n_k1 = GetBBlockDescriptor_K0PerBlock_NPerBlock_K1(); constexpr auto b_block_desc_k0_n_k1 = GetBBlockDescriptor_K0PerBlock_NPerBlock_K1();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment