Commit 39a1f853 authored by Harisankar Sadasivan's avatar Harisankar Sadasivan
Browse files

clang-format changes for pr881

parent a20863b0
...@@ -19,12 +19,10 @@ using CElementOp = PassThrough; ...@@ -19,12 +19,10 @@ using CElementOp = PassThrough;
static constexpr auto GemmMNPadding = ck::tensor_operation::device::GemmSpecialization::MNPadding; static constexpr auto GemmMNPadding = ck::tensor_operation::device::GemmSpecialization::MNPadding;
#define K1 8 // K1PerThread:2,4,8
#define K0 4 // K0PerBlock:1,2,3,4...32
#define K1 8 //K1PerThread:2,4,8 #define N1 2 // Nperthread:2,4,8
#define K0 4 //K0PerBlock:1,2,3,4...32 #define B 64 // block-size:64
#define N1 2 //Nperthread:2,4,8
#define B 64 //block-size:64
// clang-format off // clang-format off
using DeviceGemvInstance = ck::tensor_operation::device::deviceGemvDl/* using DeviceGemvInstance = ck::tensor_operation::device::deviceGemvDl/*
......
...@@ -20,20 +20,19 @@ template <typename ALayout, ...@@ -20,20 +20,19 @@ template <typename ALayout,
typename CElementwiseOperation> typename CElementwiseOperation>
struct DeviceGemv : public BaseOperator struct DeviceGemv : public BaseOperator
{ {
virtual std::unique_ptr<BaseArgument> virtual std::unique_ptr<BaseArgument> MakeArgumentPointer(const void* p_a,
MakeArgumentPointer(const void* p_a, const void* p_b,
const void* p_b, void* p_c,
void* p_c, ck::index_t M,
ck::index_t M, ck::index_t N,
ck::index_t N, ck::index_t K,
ck::index_t K, ck::index_t StrideA,
ck::index_t StrideA, ck::index_t StrideB,
ck::index_t StrideB, ck::index_t StrideC,
ck::index_t StrideC, AElementwiseOperation a_element_op,
AElementwiseOperation a_element_op, BElementwiseOperation b_element_op,
BElementwiseOperation b_element_op, CElementwiseOperation c_element_op,
CElementwiseOperation c_element_op, ck::index_t KBatch = 1) = 0;
ck::index_t KBatch=1) = 0;
virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0; virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
}; };
......
...@@ -271,7 +271,7 @@ struct deviceGemvDl : public DeviceGemv<ALayout, ...@@ -271,7 +271,7 @@ struct deviceGemvDl : public DeviceGemv<ALayout,
return false; return false;
} }
} }
// // // //
// polymorphic // polymorphic
bool IsSupportedArgument(const BaseArgument* p_arg) override bool IsSupportedArgument(const BaseArgument* p_arg) override
{ {
......
...@@ -658,14 +658,14 @@ struct BlockToCTileMap_3DGrid_KSplit ...@@ -658,14 +658,14 @@ struct BlockToCTileMap_3DGrid_KSplit
return make_tuple(blockIdx.z, blockIdx.y, blockIdx.x); return make_tuple(blockIdx.z, blockIdx.y, blockIdx.x);
} }
//HS: Map 1D block-id to 3D tuple (M,N,K) // HS: Map 1D block-id to 3D tuple (M,N,K)
__host__ __device__ inline constexpr auto convert_1D_block_idx_to_3D_tuple( __host__ __device__ inline constexpr auto convert_1D_block_idx_to_3D_tuple(
const index_t& block_1d_id, const index_t& N, const index_t& k_batch) const const index_t& block_1d_id, const index_t& N, const index_t& k_batch) const
{ {
const auto Ndim= math::integer_divide_ceil(N, NPerBlock); const auto Ndim = math::integer_divide_ceil(N, NPerBlock);
return make_tuple(((block_1d_id) / (k_batch * Ndim)), return make_tuple(((block_1d_id) / (k_batch * Ndim)),
(((block_1d_id) / k_batch) % Ndim), (((block_1d_id) / k_batch) % Ndim),
(block_1d_id) % k_batch); // returns 3D tuple as (Mid,Nid,Kid) (block_1d_id) % k_batch); // returns 3D tuple as (Mid,Nid,Kid)
} }
template <typename CTileIdx, typename CTileDim> template <typename CTileIdx, typename CTileDim>
......
...@@ -27,12 +27,12 @@ template <typename GridwiseGemv, ...@@ -27,12 +27,12 @@ template <typename GridwiseGemv,
typename Block2CTileMap> typename Block2CTileMap>
__global__ void __global__ void
#if CK_USE_LAUNCH_BOUNDS #if CK_USE_LAUNCH_BOUNDS
__launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU) __launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU)
#endif #endif
kernel_gemv_dl_v1r3( kernel_gemv_dl_v1r3(
typename GridwiseGemv::Argument karg, typename GridwiseGemv::Argument karg,
const Block2CTileMap& block_2_ctile_map) //: in __global__ functions, struct is const Block2CTileMap& block_2_ctile_map) //: in __global__ functions, struct is
// better for reduced load overhead // better for reduced load overhead
{ {
constexpr index_t shared_block_size = constexpr index_t shared_block_size =
GridwiseGemv::GetSharedMemoryNumberOfByte() / sizeof(FloatAB); GridwiseGemv::GetSharedMemoryNumberOfByte() / sizeof(FloatAB);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment