"vscode:/vscode.git/clone" did not exist on "53b31903e5e7c3dff8775cc51b5aca602e72f10c"
Commit 487826b3 authored by aska-0096's avatar aska-0096
Browse files

tempsave, fp8 sanity error

parent 50e10656
...@@ -6,8 +6,12 @@ ...@@ -6,8 +6,12 @@
// clang-format off // clang-format off
// rm rn tm tn vn pd x 3p // rm rn tm tn vn pd x 3p
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 1, 4, 64, 8, true , true, false>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 1, 4, 64, 8, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 2, 4, 64, 4, true , true, false>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 2, 4, 64, 4, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 4, 4, 64, 2, true , true, false>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 4, 64, 2, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 8, 4, 64, 1, true , true, false>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 8, 4, 64, 1, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 1, 4, 64, 8, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 2, 4, 64, 4, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 4, 64, 2, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 8, 4, 64, 1, true , true, false>>(const S&, A);
// clang-format on // clang-format on
...@@ -5,8 +5,11 @@ ...@@ -5,8 +5,11 @@
#include "add_rmsnorm2d_rdquant_fwd_instance_common.hpp" #include "add_rmsnorm2d_rdquant_fwd_instance_common.hpp"
// clang-format off // clang-format off
// rm rn tm tn vn pd x 3p // rm rn tm tn vn pd x 3p
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 1, 4, 64, 1, true , true, false>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 1, 4, 64, 1, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 1, 4, 64, 2, true , true, false>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 1, 4, 64, 2, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 2, 4, 64, 1, true , true, false>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 2, 4, 64, 1, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 1, 4, 64, 1, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 1, 4, 64, 2, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 2, 4, 64, 1, true , true, false>>(const S&, A);
// clang-format on // clang-format on
...@@ -6,7 +6,10 @@ ...@@ -6,7 +6,10 @@
// clang-format off // clang-format off
// rm rn tm tn vn pd x 3p // rm rn tm tn vn pd x 3p
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 3, 4, 64, 4, true , true, false>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 3, 4, 64, 4, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 6, 4, 64, 2, true , true, false>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 6, 4, 64, 2, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 12, 4, 64, 1, true , true, false>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 12, 4, 64, 1, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 3, 4, 64, 4, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 6, 4, 64, 2, true , true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 12, 4, 64, 1, true , true, false>>(const S&, A);
// clang-format on // clang-format on
...@@ -6,9 +6,29 @@ ...@@ -6,9 +6,29 @@
// clang-format off // clang-format off
// rm rn tm tn vn pd x 3p // rm rn tm tn vn pd x 3p
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 4, 1, 256, 8, true, true, false>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 1, 256, 8, true, true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 8, 1, 256, 4, true, true, false>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 8, 1, 256, 4, true, true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 4, 1, 1024, 2, true, true, false>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 1, 1024, 2, true, true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 8, 1, 1024, 1, true, true, false>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 8, 1, 1024, 1, true, true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 1, 256, 8, true, false, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 8, 1, 256, 4, true, false, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 1, 1024, 2, true, false, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 8, 1, 1024, 1, true, false, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 1, 256, 8, false, false, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 8, 1, 256, 4, false, false, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 1, 1024, 2, false, false, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 8, 1, 1024, 1, false, false, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 1, 256, 8, true, true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 8, 1, 256, 4, true, true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 1, 1024, 2, true, true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 8, 1, 1024, 1, true, true, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 1, 256, 8, true, false, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 8, 1, 256, 4, true, false, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 1, 1024, 2, true, false, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 8, 1, 1024, 1, true, false, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 1, 256, 8, false, false, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 8, 1, 256, 4, false, false, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 1, 1024, 2, false, false, false>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 8, 1, 1024, 1, false, false, false>>(const S&, A);
// clang-format on // clang-format on
...@@ -6,9 +6,12 @@ ...@@ -6,9 +6,12 @@
// clang-format off // clang-format off
// rm rn tm tn vn pd x 3p // rm rn tm tn vn pd x 3p
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 4, 1, 256, 8, true, true, true>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 1, 256, 8, true, true, true>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 8, 1, 256, 4, true, true, true>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 8, 1, 256, 4, true, true, true>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 4, 1, 1024, 2, true, true, true>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 1, 1024, 2, true, true, true>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, 1, 8, 1, 1024, 1, true, true, true>>(const S&, A); template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 8, 1, 1024, 1, true, true, true>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 1, 256, 8, true, true, true>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 8, 1, 256, 4, true, true, true>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 1, 1024, 2, true, true, true>>(const S&, A);
template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 8, 1, 1024, 1, true, true, true>>(const S&, A);
// clang-format on // clang-format on
...@@ -11,7 +11,8 @@ ...@@ -11,7 +11,8 @@
using S = ck_tile::stream_config; using S = ck_tile::stream_config;
using A = add_rmsnorm2d_rdquant_fwd_args; using A = add_rmsnorm2d_rdquant_fwd_args;
template <typename DataType_, template <typename InputDataType_,
typename QuantizedDataType_,
ck_tile::index_t Repeat_M_, // each thread repeat along M ck_tile::index_t Repeat_M_, // each thread repeat along M
ck_tile::index_t Repeat_N_, // each thread repeat along N ck_tile::index_t Repeat_N_, // each thread repeat along N
ck_tile::index_t ThreadPerBlock_M_, // num threads along M ck_tile::index_t ThreadPerBlock_M_, // num threads along M
...@@ -20,7 +21,8 @@ template <typename DataType_, ...@@ -20,7 +21,8 @@ template <typename DataType_,
bool kPadN_, bool kPadN_,
bool kSaveInvRms_, bool kSaveInvRms_,
bool kTwoPass_> bool kTwoPass_>
using trait_ = add_rmsnorm2d_rdquant_fwd_traits_<DataType_, using trait_ = add_rmsnorm2d_rdquant_fwd_traits_<InputDataType_,
QuantizedDataType_,
Repeat_M_, Repeat_M_,
Repeat_N_, Repeat_N_,
ThreadPerBlock_M_, ThreadPerBlock_M_,
...@@ -33,16 +35,17 @@ using trait_ = add_rmsnorm2d_rdquant_fwd_traits_<DataType_, ...@@ -33,16 +35,17 @@ using trait_ = add_rmsnorm2d_rdquant_fwd_traits_<DataType_,
template <typename Traits_> template <typename Traits_>
float add_rmsnorm2d_rdquant_fwd_(const S& s, A a) float add_rmsnorm2d_rdquant_fwd_(const S& s, A a)
{ {
using DataType = typename Traits_::DataType; using InputDataType = typename Traits_::InputDataType;
using QuantizedDataType = typename Traits_::QuantizedDataType;
using PipelineProblem = ck_tile::AddRmsnorm2dRdquantFwdPipelineProblem< using PipelineProblem = ck_tile::AddRmsnorm2dRdquantFwdPipelineProblem<
typename AddRmsnormRdquantTypeConfig<DataType>::ADataType, typename AddRmsnormRdquantTypeConfig<InputDataType, QuantizedDataType>::ADataType,
typename AddRmsnormRdquantTypeConfig<DataType>::BDataType, typename AddRmsnormRdquantTypeConfig<InputDataType, QuantizedDataType>::BDataType,
typename AddRmsnormRdquantTypeConfig<DataType>::GammaDataType, typename AddRmsnormRdquantTypeConfig<InputDataType, QuantizedDataType>::GammaDataType,
typename AddRmsnormRdquantTypeConfig<DataType>::ComputeDataType, typename AddRmsnormRdquantTypeConfig<InputDataType, QuantizedDataType>::ComputeDataType,
typename AddRmsnormRdquantTypeConfig<DataType>::XDataType, typename AddRmsnormRdquantTypeConfig<InputDataType, QuantizedDataType>::XDataType,
typename AddRmsnormRdquantTypeConfig<DataType>::YScaleDataType, typename AddRmsnormRdquantTypeConfig<InputDataType, QuantizedDataType>::YScaleDataType,
typename AddRmsnormRdquantTypeConfig<DataType>::QYDataType, typename AddRmsnormRdquantTypeConfig<InputDataType, QuantizedDataType>::QYDataType,
typename Traits_::Shape, typename Traits_::Shape,
Traits_::kPadN, Traits_::kPadN,
Traits_::kSaveX, Traits_::kSaveX,
......
...@@ -23,6 +23,11 @@ CK_TILE_HOST void reference_rowwise_quantization2d(const HostTensor<XDataType>& ...@@ -23,6 +23,11 @@ CK_TILE_HOST void reference_rowwise_quantization2d(const HostTensor<XDataType>&
auto v_scale = type_convert<XDataType>(scale_m(m)); auto v_scale = type_convert<XDataType>(scale_m(m));
auto v_qx = v_x / v_scale; auto v_qx = v_x / v_scale;
qx_m_n(m, n) = saturates<QXDataType>{}(v_qx); qx_m_n(m, n) = saturates<QXDataType>{}(v_qx);
if(m == 0 && n == 4)
printf("Qy: %lf, Satruates Qy: %lf\n",
type_convert<float>(v_qx),
type_convert<float>(qx_m_n(m, n)));
} }
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment