format

cc50b687 · Anthony Chang · ab8e0f28 · cc50b687 · cc50b687
Commit cc50b687 authored Jun 20, 2022 by Anthony Chang
2 changed files
--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp
@@ -932,8 +932,10 @@ struct GridwiseGemmLayernorm_k0mk1_k0nk1_mn_xdl_cshuffle_v1
                                            reduce::SquaredAdd,
                                            false>;
-                    const auto d0_zeroVal = ThreadwiseReduceD0::Op::template GetIdentityValue<FloatReduceAcc>();
+                    const auto d0_zeroVal =
-                    const auto d1_zeroVal = ThreadwiseReduceD1::Op::template GetIdentityValue<FloatReduceAcc>();
+                        ThreadwiseReduceD0::Op::template GetIdentityValue<FloatReduceAcc>();
+                    const auto d1_zeroVal =
+                        ThreadwiseReduceD1::Op::template GetIdentityValue<FloatReduceAcc>();
                    static_for<0, mreduce_per_thread, 1>{}(
                        [&](auto i) { d0_thread_buf(i) = d0_zeroVal; });
                    static_for<0, mreduce_per_thread, 1>{}(
@@ -984,8 +986,7 @@ struct GridwiseGemmLayernorm_k0mk1_k0nk1_mn_xdl_cshuffle_v1
                            FloatReduceAcc numerator = c_reduce_thread_buf(dst_offset) - avg_sum;
                            FloatReduceAcc divisor = epsilon + avg_squared_sum - avg_sum * avg_sum;
                            FloatReduceAcc divisor_sqrt;
-                            tensor_operation::element_wise::UnarySqrt{}(
+                            tensor_operation::element_wise::UnarySqrt{}(divisor_sqrt, divisor);
-                                divisor_sqrt, divisor);
                            c_reduce_thread_buf(dst_offset) = numerator / divisor_sqrt;
                        });

--- a/include/ck/utility/reduction_operator.hpp
+++ b/include/ck/utility/reduction_operator.hpp
@@ -84,7 +84,10 @@ struct Add
 struct SquaredAdd
 {
    template <class T>
-    __host__ __device__ static constexpr T GetIdentityValue() { return type_convert<T>(0.0f); };
+    __host__ __device__ static constexpr T GetIdentityValue()
+    {
+        return type_convert<T>(0.0f);
+    };
    __host__ __device__ static constexpr bool
    IsCompatibleInMemoryDataOperation(InMemoryDataOperationEnum operation)