merge

fe0ced87 · root · dd3a5424 · 14dc7552 · fe0ced87 · fe0ced87
Commit fe0ced87 authored Nov 02, 2023 by root
3 changed files
--- a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
+++ b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
--- a/include/ck/utility/type_convert.hpp
+++ b/include/ck/utility/type_convert.hpp
@@ -150,8 +150,12 @@ inline __host__ __device__ float2_t type_convert<float2_t, f8x2_t>(f8x2_t x)
    constexpr bool negative_zero_nan = true;
    const auto f8x2_v                = vector_type<f8_t, 2>(x);
    vector_type<float, 2> f32x2_v;
-    f32x2_v.template AsType<float>()(Number<0>{}) = utils::cast_from_f8<f8_t, float, negative_zero_nan>(f8x2_v.template AsType<f8_t>()[Number<0>{}]);
-    f32x2_v.template AsType<float>()(Number<1>{}) = utils::cast_from_f8<f8_t, float, negative_zero_nan>(f8x2_v.template AsType<f8_t>()[Number<1>{}]);
+    f32x2_v.template AsType<float>()(Number<0>{}) =
+        utils::cast_from_f8<f8_t, float, negative_zero_nan>(
+            f8x2_v.template AsType<f8_t>()[Number<0>{}]);
+    f32x2_v.template AsType<float>()(Number<1>{}) =
+        utils::cast_from_f8<f8_t, float, negative_zero_nan>(
+            f8x2_v.template AsType<f8_t>()[Number<1>{}]);
    return f32x2_v.template AsType<float2_t>()[Number<0>{}];
 #endif
 }
@@ -161,12 +165,11 @@ inline __host__ __device__ half2_t type_convert<half2_t, float2_t>(float2_t x)
 {

    const vector_type<float, 2> f32x2_v(x);
-	const auto y = __builtin_amdgcn_cvt_pkrtz(f32x2_v.template AsType<float>()[Number<0>{}], f32x2_v.template AsType<float>()[Number<1>{}]);
+    const auto y = __builtin_amdgcn_cvt_pkrtz(f32x2_v.template AsType<float>()[Number<0>{}],
+                                              f32x2_v.template AsType<float>()[Number<1>{}]);
    return bit_cast<half2_t>(y);
 }

-
-
 // convert fp16 to fp8
 template <>
 inline __host__ __device__ f8_t type_convert<f8_t, half_t>(half_t x)

--- a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp
@@ -28,7 +28,8 @@ using S = ck::Sequence<Is...>;
 using PassThrough = ck::tensor_operation::element_wise::PassThrough;

 // static constexpr auto GemmDefault    = ck::tensor_operation::device::GemmSpecialization::Default;
-// static constexpr auto GemmMNPadding  = ck::tensor_operation::device::GemmSpecialization::MNPadding;
+// static constexpr auto GemmMNPadding  =
+// ck::tensor_operation::device::GemmSpecialization::MNPadding;
 static constexpr auto GemmMNKPadding = ck::tensor_operation::device::GemmSpecialization::MNKPadding;

 using device_gemm_xdl_splitk_f16_f8_f16_mk_kn_mn_generic_instances = std::tuple<