DeviceGemmMultipleD_Xdl_CShuffle<
            ${layout_a},
            ${layout_b},
            ${layout_ds},
            ${layout_e},
            ${type_a},
            ${type_b},
            ${type_acc},
            ${type_cshuffle},
            ${type_ds},
            ${type_e},
            ${elementwise_op_a},
            ${elementwise_op_b},
            ${elementwise_op_cde},
            ${Gemm_spec},
            ${num_gemmk_prefetch_stage},
            ${block_size},
            ${mperblock},
            ${nperblock},
            ${kperblock},
            ${ak1},
            ${bk1},
            ${mperXDL},
            ${nperXDL},
            ${mXdlperwave},
            ${nXdlperwave},
            ${ABT_thread_cluster_lengths_K0_M_K1},
            ${ABT_thread_cluster_arrange_order},
            ${ABT_src_access_order},
            ${ABT_src_vec_dim},
            ${ABT_src_scalar_per_vec},
            ${ABT_dst_scalar_per_vec_k1},
            ${ABT_lds_add_extra_m},
            ${BBT_thread_cluster_lengths_K0_N_K1},
            ${BBT_thread_cluster_arrange_order},
            ${BBT_src_access_order},
            ${BBT_src_vec_dim},
            ${BBT_src_scalar_per_vec},
            ${BBT_dst_scalar_per_vec_k1},
            ${BBT_lds_add_extra_n},
            ${CS_m_Xdl_per_wave_per_shuffle},
            ${CS_n_Xdl_per_wave_per_shuffle},
            ${CTT_cluster_lengths_m_block_m_wave_m_per_Xdl_n_block_n_wave_n_per_Xdl},
            ${CTT_scalar_per_vector_n_wave_n_per_Xdl}>;
