clang-format changes for pr881

39a1f853 · Harisankar Sadasivan · a20863b0 · 39a1f853 · 39a1f853 · 39a1f853
Commit 39a1f853 authored Sep 15, 2023 by Harisankar Sadasivan
5 changed files
--- a/example/53_gemv_splitk/gemv_splitk_fp16.cpp
+++ b/example/53_gemv_splitk/gemv_splitk_fp16.cpp
@@ -19,12 +19,10 @@ using CElementOp = PassThrough;
 static constexpr auto GemmMNPadding = ck::tensor_operation::device::GemmSpecialization::MNPadding;
+#define K1 8 // K1PerThread:2,4,8
+#define K0 4 // K0PerBlock:1,2,3,4...32
-#define K1 8 //K1PerThread:2,4,8
+#define N1 2 // Nperthread:2,4,8
-#define K0 4 //K0PerBlock:1,2,3,4...32 
+#define B 64 // block-size:64
-#define N1 2 //Nperthread:2,4,8
-#define B 64 //block-size:64
 // clang-format off
 using DeviceGemvInstance = ck::tensor_operation::device::deviceGemvDl/*

--- a/include/ck/tensor_operation/gpu/device/device_gemv.hpp
+++ b/include/ck/tensor_operation/gpu/device/device_gemv.hpp
@@ -20,8 +20,7 @@ template <typename ALayout,
          typename CElementwiseOperation>
 struct DeviceGemv : public BaseOperator
 {
-    virtual std::unique_ptr<BaseArgument>
+    virtual std::unique_ptr<BaseArgument> MakeArgumentPointer(const void* p_a,
-    MakeArgumentPointer(const void* p_a,
                                                              const void* p_b,
                                                              void* p_c,
                                                              ck::index_t M,
@@ -33,7 +32,7 @@ struct DeviceGemv : public BaseOperator
                                                              AElementwiseOperation a_element_op,
                                                              BElementwiseOperation b_element_op,
                                                              CElementwiseOperation c_element_op,
-                        ck::index_t KBatch=1) = 0;
+                                                              ck::index_t KBatch = 1) = 0;
    virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
 };

--- a/include/ck/tensor_operation/gpu/device/impl/device_gemv_splitk.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_gemv_splitk.hpp
--- a/include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
+++ b/include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
@@ -658,11 +658,11 @@ struct BlockToCTileMap_3DGrid_KSplit
        return make_tuple(blockIdx.z, blockIdx.y, blockIdx.x);
    }
-        //HS: Map 1D block-id to 3D tuple (M,N,K)
+    // HS: Map 1D block-id to 3D tuple (M,N,K)
    __host__ __device__ inline constexpr auto convert_1D_block_idx_to_3D_tuple(
        const index_t& block_1d_id, const index_t& N, const index_t& k_batch) const
    {
-        const auto Ndim= math::integer_divide_ceil(N, NPerBlock);
+        const auto Ndim = math::integer_divide_ceil(N, NPerBlock);
        return make_tuple(((block_1d_id) / (k_batch * Ndim)),
                          (((block_1d_id) / k_batch) % Ndim),
                          (block_1d_id) % k_batch); // returns 3D tuple as (Mid,Nid,Kid)

--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemv_splitk.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemv_splitk.hpp
@@ -27,7 +27,7 @@ template <typename GridwiseGemv,
          typename Block2CTileMap>
 __global__ void
 #if CK_USE_LAUNCH_BOUNDS
-__launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU)
+    __launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU)
 #endif
        kernel_gemv_dl_v1r3(
            typename GridwiseGemv::Argument karg,