Commit 39a1f853 authored by Harisankar Sadasivan's avatar Harisankar Sadasivan
Browse files

clang-format changes for pr881

parent a20863b0
...@@ -19,12 +19,10 @@ using CElementOp = PassThrough; ...@@ -19,12 +19,10 @@ using CElementOp = PassThrough;
static constexpr auto GemmMNPadding = ck::tensor_operation::device::GemmSpecialization::MNPadding; static constexpr auto GemmMNPadding = ck::tensor_operation::device::GemmSpecialization::MNPadding;
#define K1 8 // K1PerThread:2,4,8
#define K0 4 // K0PerBlock:1,2,3,4...32
#define K1 8 //K1PerThread:2,4,8 #define N1 2 // Nperthread:2,4,8
#define K0 4 //K0PerBlock:1,2,3,4...32 #define B 64 // block-size:64
#define N1 2 //Nperthread:2,4,8
#define B 64 //block-size:64
// clang-format off // clang-format off
using DeviceGemvInstance = ck::tensor_operation::device::deviceGemvDl/* using DeviceGemvInstance = ck::tensor_operation::device::deviceGemvDl/*
......
...@@ -20,8 +20,7 @@ template <typename ALayout, ...@@ -20,8 +20,7 @@ template <typename ALayout,
typename CElementwiseOperation> typename CElementwiseOperation>
struct DeviceGemv : public BaseOperator struct DeviceGemv : public BaseOperator
{ {
virtual std::unique_ptr<BaseArgument> virtual std::unique_ptr<BaseArgument> MakeArgumentPointer(const void* p_a,
MakeArgumentPointer(const void* p_a,
const void* p_b, const void* p_b,
void* p_c, void* p_c,
ck::index_t M, ck::index_t M,
...@@ -33,7 +32,7 @@ struct DeviceGemv : public BaseOperator ...@@ -33,7 +32,7 @@ struct DeviceGemv : public BaseOperator
AElementwiseOperation a_element_op, AElementwiseOperation a_element_op,
BElementwiseOperation b_element_op, BElementwiseOperation b_element_op,
CElementwiseOperation c_element_op, CElementwiseOperation c_element_op,
ck::index_t KBatch=1) = 0; ck::index_t KBatch = 1) = 0;
virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0; virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
}; };
......
...@@ -658,11 +658,11 @@ struct BlockToCTileMap_3DGrid_KSplit ...@@ -658,11 +658,11 @@ struct BlockToCTileMap_3DGrid_KSplit
return make_tuple(blockIdx.z, blockIdx.y, blockIdx.x); return make_tuple(blockIdx.z, blockIdx.y, blockIdx.x);
} }
//HS: Map 1D block-id to 3D tuple (M,N,K) // HS: Map 1D block-id to 3D tuple (M,N,K)
__host__ __device__ inline constexpr auto convert_1D_block_idx_to_3D_tuple( __host__ __device__ inline constexpr auto convert_1D_block_idx_to_3D_tuple(
const index_t& block_1d_id, const index_t& N, const index_t& k_batch) const const index_t& block_1d_id, const index_t& N, const index_t& k_batch) const
{ {
const auto Ndim= math::integer_divide_ceil(N, NPerBlock); const auto Ndim = math::integer_divide_ceil(N, NPerBlock);
return make_tuple(((block_1d_id) / (k_batch * Ndim)), return make_tuple(((block_1d_id) / (k_batch * Ndim)),
(((block_1d_id) / k_batch) % Ndim), (((block_1d_id) / k_batch) % Ndim),
(block_1d_id) % k_batch); // returns 3D tuple as (Mid,Nid,Kid) (block_1d_id) % k_batch); // returns 3D tuple as (Mid,Nid,Kid)
......
...@@ -27,7 +27,7 @@ template <typename GridwiseGemv, ...@@ -27,7 +27,7 @@ template <typename GridwiseGemv,
typename Block2CTileMap> typename Block2CTileMap>
__global__ void __global__ void
#if CK_USE_LAUNCH_BOUNDS #if CK_USE_LAUNCH_BOUNDS
__launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU) __launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU)
#endif #endif
kernel_gemv_dl_v1r3( kernel_gemv_dl_v1r3(
typename GridwiseGemv::Argument karg, typename GridwiseGemv::Argument karg,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment