Commit 8e587254 authored by Po Yen Chen's avatar Po Yen Chen Committed by GitHub
Browse files

Revert "clang-format and remove dead code"

This reverts commit edb78a47.
parent 9f6e0fa2
...@@ -128,39 +128,42 @@ struct BlockFmhaPipelineQSKSVS ...@@ -128,39 +128,42 @@ struct BlockFmhaPipelineQSKSVS
typename OAccElementFunction, typename OAccElementFunction,
typename PositionEncoding> typename PositionEncoding>
CK_TILE_HOST_DEVICE auto CK_TILE_HOST_DEVICE auto
operator()(const QDramBlockWindowTmp & q_dram_block_window_tmp, // M0*K0 tile // operator()(const QDramBlockWindowTmp& q_dram_block_window_tmp, // M0*K0 tile
const QElementFunction & // const QElementFunction& q_element_func,
q_element_func, // const KDramBlockWindowTmp& k_dram_block_window_tmp, // N0*K0 tile
const KDramBlockWindowTmp & // const KElementFunction& k_element_func,
k_dram_block_window_tmp, // N0*K0 tile // const VDramBlockWindowTmp& v_dram_block_window_tmp, // N1*K1 tile
const KElementFunction & // const VElementFunction& v_element_func,
k_element_func, // const BiasDramBlockWindowTmp& bias_dram_block_window_tmp, // M0*N0 tile
const VDramBlockWindowTmp & // const BiasElementFunction& bias_element_func,
v_dram_block_window_tmp, // N1*K1 tile // LSEDramBlockWindowTmp& lse_dram_window_tmp, // M0*1 tile
const VElementFunction & // const LSEElementFunction& lse_element_func,
v_element_func, // const SAccElementFunction& s_acc_element_func,
const BiasDramBlockWindowTmp & // const PComputeElementFunction& p_compute_element_func,
bias_dram_block_window_tmp, // M0*N0 tile // const OAccElementFunction& o_acc_element_func,
const BiasElementFunction & // FmhaMask mask,
bias_element_func, // PositionEncoding position_encoding,
RandValDramBlockWindowTmp & // float scale_s,
randval_dram_block_window_tmp, // void* smem_ptr) const
LSEDramBlockWindowTmp & operator()(const QDramBlockWindowTmp& q_dram_block_window_tmp, // M0*K0 tile
lse_dram_window_tmp, // M0*1 tile const QElementFunction& q_element_func,
const LSEElementFunction & const KDramBlockWindowTmp& k_dram_block_window_tmp, // N0*K0 tile
lse_element_func, const KElementFunction& k_element_func,
const SAccElementFunction & const VDramBlockWindowTmp& v_dram_block_window_tmp, // N1*K1 tile
s_acc_element_func, const VElementFunction& v_element_func,
const PComputeElementFunction & const BiasDramBlockWindowTmp& bias_dram_block_window_tmp, // M0*N0 tile
p_compute_element_func, const BiasElementFunction& bias_element_func,
const OAccElementFunction & RandValDramBlockWindowTmp& randval_dram_block_window_tmp,
o_acc_element_func, LSEDramBlockWindowTmp& lse_dram_window_tmp, // M0*1 tile
const LSEElementFunction& lse_element_func,
const SAccElementFunction& s_acc_element_func,
const PComputeElementFunction& p_compute_element_func,
const OAccElementFunction& o_acc_element_func,
FmhaMask mask, FmhaMask mask,
PositionEncoding position_encoding, PositionEncoding position_encoding,
float scale_s, float scale_s,
void* smem_ptr, void* smem_ptr,
DropoutType & DropoutType& dropout) const
dropout) const
{ {
static_assert( static_assert(
std::is_same_v<QDataType, remove_cvref_t<typename QDramBlockWindowTmp::DataType>> && std::is_same_v<QDataType, remove_cvref_t<typename QDramBlockWindowTmp::DataType>> &&
...@@ -260,8 +263,8 @@ struct BlockFmhaPipelineQSKSVS ...@@ -260,8 +263,8 @@ struct BlockFmhaPipelineQSKSVS
{seqlen_k_start, 0}); {seqlen_k_start, 0});
const auto bias_origin = bias_dram_block_window_tmp.get_window_origin(); const auto bias_origin = bias_dram_block_window_tmp.get_window_origin();
auto bias_dram_window = auto bias_dram_window = make_tile_window(
make_tile_window(bias_dram_block_window_tmp.get_bottom_tensor_view(), bias_dram_block_window_tmp.get_bottom_tensor_view(),
bias_dram_block_window_tmp.get_window_lengths(), bias_dram_block_window_tmp.get_window_lengths(),
{bias_origin.at(number<0>{}), seqlen_k_start}, // M/N {bias_origin.at(number<0>{}), seqlen_k_start}, // M/N
Policy::template MakeBiasDramTileDistribution<decltype(gemm_0)>()); Policy::template MakeBiasDramTileDistribution<decltype(gemm_0)>());
...@@ -618,6 +621,41 @@ struct BlockFmhaPipelineQSKSVS ...@@ -618,6 +621,41 @@ struct BlockFmhaPipelineQSKSVS
return o_acc; return o_acc;
} }
// template <typename QDramBlockWindowTmp,
// typename KDramBlockWindowTmp,
// typename VDramBlockWindowTmp,
// typename BiasDramBlockWindowTmp,
// typename LSEDramBlockWindowTmp,
// typename PositionEncoding>
// CK_TILE_HOST_DEVICE auto
// operator()(const QDramBlockWindowTmp& q_dram_block_window_tmp, // M0*K0 tile
// const KDramBlockWindowTmp& k_dram_block_window_tmp, // N0*K0 tile
// const VDramBlockWindowTmp& v_dram_block_window_tmp, // N1*K1 tile
// const BiasDramBlockWindowTmp& bias_dram_block_window_tmp, // M0*N0 tile
// LSEDramBlockWindowTmp& lse_dram_block_window_tmp, // M0*1 tile
// FmhaMask mask,
// PositionEncoding position_encoding,
// float scale_s,
// void* smem_ptr) const
// {
// return operator()(q_dram_block_window_tmp,
// identity{},
// k_dram_block_window_tmp,
// identity{},
// v_dram_block_window_tmp,
// identity{},
// bias_dram_block_window_tmp,
// identity{},
// lse_dram_block_window_tmp,
// identity{},
// identity{},
// identity{},
// identity{},
// mask,
// position_encoding,
// scale_s,
// smem_ptr);
// }
template <typename QDramBlockWindowTmp, template <typename QDramBlockWindowTmp,
typename KDramBlockWindowTmp, typename KDramBlockWindowTmp,
typename VDramBlockWindowTmp, typename VDramBlockWindowTmp,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment