change mask from int32 to f16

70becc77 · ltqin · 8f8c0ddc · 70becc77 · 70becc77 · 70becc77
Commit 70becc77 authored Mar 15, 2023 by ltqin
4 changed files
--- a/client_example/08_fused_attention/fused_attention_bias_mask.cpp
+++ b/client_example/08_fused_attention/fused_attention_bias_mask.cpp
@@ -24,7 +24,7 @@ using B0DataType  = ck::half_t;
 using B1DataType  = ck::half_t;
 using CDataType   = ck::half_t;
 using D00DataType = ck::half_t;
-using D01DataType = int32_t;
+using D01DataType = ck::half_t;
 using AccDataType = float;
 struct SimpleDeviceMem

--- a/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp
+++ b/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp
@@ -399,12 +399,21 @@ struct ScaleMask
    template <typename Y, typename X0, typename X1>
    __host__ __device__ constexpr void operator()(Y& y, const X0& x, const X1& mask) const;
+    template <>
    __host__ __device__ constexpr void
-    operator()(float& y, const float& x, const int32_t& mask) const
+    operator()(float& y, const float& x, const int16_t& mask) const
    {
        float filter_value = (mask == 1 ? 0.0f : mask_filter_value_);
        y                  = scale_ * x + filter_value;
    }
+    template <>
+    __host__ __device__ constexpr void
+    operator()(float& y, const float& x, const half_t& mask) const
+    {
+        float filter_value = (mask < 1.0f ? mask_filter_value_ : 0.0f);
+        y                  = scale_ * x + filter_value;
+    }
    const float scale_;
    const float mask_filter_value_;
 };
@@ -423,12 +432,20 @@ struct ScaleBiasMask
    template <>
    __host__ __device__ constexpr void
-    operator()(float& y, const float& x, const half_t& bias, const int32_t& mask) const
+    operator()(float& y, const float& x, const half_t& bias, const int16_t& mask) const
    {
        float filter_value = (mask == 1 ? 0.0f : mask_filter_value_);
        y                  = scale_ * x + ck::type_convert<float>(bias) + filter_value;
    }
+    template <>
+    __host__ __device__ constexpr void
+    operator()(float& y, const float& x, const half_t& bias, const half_t& mask) const
+    {
+        float filter_value = (mask < 1.0f ? mask_filter_value_ : 0.0f);
+        y                  = scale_ * x + ck::type_convert<float>(bias) + filter_value;
+    }
    const float scale_;
    const float mask_filter_value_;
 };

--- a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute_general.hpp
+++ b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute_general.hpp
@@ -28,7 +28,7 @@ void add_device_batched_gemm_mutiple_d_softmax_gemm_permute_xdl_cshuffle_gmk_gnk
                                            F16,
                                            F16,
                                            F16,
-                                            ck::Tuple<int32_t>,
+                                            ck::Tuple<F16>,
                                            ck::Tuple<>,
                                            PassThrough,
                                            PassThrough,
@@ -48,7 +48,7 @@ void add_device_batched_gemm_mutiple_d_softmax_gemm_permute_xdl_cshuffle_gmk_gnk
                                                            F16,
                                                            F16,
                                                            F16,
-                                                            ck::Tuple<int32_t>,
+                                                            ck::Tuple<F16>,
                                                            ck::Tuple<>,
                                                            PassThrough,
                                                            PassThrough,
@@ -68,7 +68,7 @@ void add_device_batched_gemm_mutiple_d_softmax_gemm_permute_xdl_cshuffle_gmk_gnk
                                            F16,
                                            F16,
                                            F16,
-                                            ck::Tuple<F16, int32_t>,
+                                            ck::Tuple<F16, F16>,
                                            ck::Tuple<>,
                                            PassThrough,
                                            PassThrough,
@@ -88,7 +88,7 @@ void add_device_batched_gemm_mutiple_d_softmax_gemm_permute_xdl_cshuffle_gmk_gnk
                                                            F16,
                                                            F16,
                                                            F16,
-                                                            ck::Tuple<F16, int32_t>,
+                                                            ck::Tuple<F16, F16>,
                                                            ck::Tuple<>,
                                                            PassThrough,
                                                            PassThrough,

--- a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_multiple_d_softmax_gemm_permute_xdl_cshuffle_gmk_gnk_gno_gmo_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_multiple_d_softmax_gemm_permute_xdl_cshuffle_gmk_gnk_gno_gmo_instance.cpp
@@ -80,7 +80,7 @@ void add_device_batched_gemm_mutiple_d_softmax_gemm_permute_xdl_cshuffle_gmk_gnk
                                            F16,
                                            F16,
                                            F16,
-                                            ck::Tuple<int32_t>,
+                                            ck::Tuple<F16>,
                                            ck::Tuple<>,
                                            PassThrough,
                                            PassThrough,
@@ -100,7 +100,7 @@ void add_device_batched_gemm_mutiple_d_softmax_gemm_permute_xdl_cshuffle_gmk_gnk
            1,
            F16,
            F32,
-            ck::Tuple<int32_t>,
+            ck::Tuple<F16>,
            ScaleMask,
            MaskingSpecialization::MaskOutUpperTriangle>{});
 }
@@ -117,7 +117,7 @@ void add_device_batched_gemm_mutiple_d_softmax_gemm_permute_xdl_cshuffle_gmk_gnk
                                                            F16,
                                                            F16,
                                                            F16,
-                                                            ck::Tuple<int32_t>,
+                                                            ck::Tuple<F16>,
                                                            ck::Tuple<>,
                                                            PassThrough,
                                                            PassThrough,
@@ -137,7 +137,7 @@ void add_device_batched_gemm_mutiple_d_softmax_gemm_permute_xdl_cshuffle_gmk_gnk
            1,
            F16,
            F32,
-            ck::Tuple<int32_t>,
+            ck::Tuple<F16>,
            ScaleMask,
            MaskingSpecialization::MaskDisabled>{});
 }
@@ -154,7 +154,7 @@ void add_device_batched_gemm_mutiple_d_softmax_gemm_permute_xdl_cshuffle_gmk_gnk
                                            F16,
                                            F16,
                                            F16,
-                                            ck::Tuple<F16, int32_t>,
+                                            ck::Tuple<F16, F16>,
                                            ck::Tuple<>,
                                            PassThrough,
                                            PassThrough,
@@ -174,7 +174,7 @@ void add_device_batched_gemm_mutiple_d_softmax_gemm_permute_xdl_cshuffle_gmk_gnk
            1,
            F16,
            F32,
-            ck::Tuple<F16, int32_t>,
+            ck::Tuple<F16, F16>,
            ScaleBiasMask,
            MaskingSpecialization::MaskOutUpperTriangle>{});
 }
@@ -191,7 +191,7 @@ void add_device_batched_gemm_mutiple_d_softmax_gemm_permute_xdl_cshuffle_gmk_gnk
                                                            F16,
                                                            F16,
                                                            F16,
-                                                            ck::Tuple<F16, int32_t>,
+                                                            ck::Tuple<F16, F16>,
                                                            ck::Tuple<>,
                                                            PassThrough,
                                                            PassThrough,
@@ -211,7 +211,7 @@ void add_device_batched_gemm_mutiple_d_softmax_gemm_permute_xdl_cshuffle_gmk_gnk
            1,
            F16,
            F32,
-            ck::Tuple<F16, int32_t>,
+            ck::Tuple<F16, F16>,
            ScaleBiasMask,
            MaskingSpecialization::MaskDisabled>{});
 }