Commit 39a1f853 authored by Harisankar Sadasivan's avatar Harisankar Sadasivan
Browse files

clang-format changes for pr881

parent a20863b0
......@@ -19,12 +19,10 @@ using CElementOp = PassThrough;
static constexpr auto GemmMNPadding = ck::tensor_operation::device::GemmSpecialization::MNPadding;
#define K1 8 //K1PerThread:2,4,8
#define K0 4 //K0PerBlock:1,2,3,4...32
#define N1 2 //Nperthread:2,4,8
#define B 64 //block-size:64
#define K1 8 // K1PerThread:2,4,8
#define K0 4 // K0PerBlock:1,2,3,4...32
#define N1 2 // Nperthread:2,4,8
#define B 64 // block-size:64
// clang-format off
using DeviceGemvInstance = ck::tensor_operation::device::deviceGemvDl/*
......
......@@ -20,20 +20,19 @@ template <typename ALayout,
typename CElementwiseOperation>
struct DeviceGemv : public BaseOperator
{
virtual std::unique_ptr<BaseArgument>
MakeArgumentPointer(const void* p_a,
const void* p_b,
void* p_c,
ck::index_t M,
ck::index_t N,
ck::index_t K,
ck::index_t StrideA,
ck::index_t StrideB,
ck::index_t StrideC,
AElementwiseOperation a_element_op,
BElementwiseOperation b_element_op,
CElementwiseOperation c_element_op,
ck::index_t KBatch=1) = 0;
virtual std::unique_ptr<BaseArgument> MakeArgumentPointer(const void* p_a,
const void* p_b,
void* p_c,
ck::index_t M,
ck::index_t N,
ck::index_t K,
ck::index_t StrideA,
ck::index_t StrideB,
ck::index_t StrideC,
AElementwiseOperation a_element_op,
BElementwiseOperation b_element_op,
CElementwiseOperation c_element_op,
ck::index_t KBatch = 1) = 0;
virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
};
......
......@@ -271,7 +271,7 @@ struct deviceGemvDl : public DeviceGemv<ALayout,
return false;
}
}
// //
// //
// polymorphic
bool IsSupportedArgument(const BaseArgument* p_arg) override
{
......
......@@ -658,14 +658,14 @@ struct BlockToCTileMap_3DGrid_KSplit
return make_tuple(blockIdx.z, blockIdx.y, blockIdx.x);
}
//HS: Map 1D block-id to 3D tuple (M,N,K)
// HS: Map 1D block-id to 3D tuple (M,N,K)
__host__ __device__ inline constexpr auto convert_1D_block_idx_to_3D_tuple(
const index_t& block_1d_id, const index_t& N, const index_t& k_batch) const
{
const auto Ndim= math::integer_divide_ceil(N, NPerBlock);
const auto Ndim = math::integer_divide_ceil(N, NPerBlock);
return make_tuple(((block_1d_id) / (k_batch * Ndim)),
(((block_1d_id) / k_batch) % Ndim),
(block_1d_id) % k_batch); // returns 3D tuple as (Mid,Nid,Kid)
(((block_1d_id) / k_batch) % Ndim),
(block_1d_id) % k_batch); // returns 3D tuple as (Mid,Nid,Kid)
}
template <typename CTileIdx, typename CTileDim>
......
......@@ -27,12 +27,12 @@ template <typename GridwiseGemv,
typename Block2CTileMap>
__global__ void
#if CK_USE_LAUNCH_BOUNDS
__launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU)
__launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU)
#endif
kernel_gemv_dl_v1r3(
typename GridwiseGemv::Argument karg,
const Block2CTileMap& block_2_ctile_map) //: in __global__ functions, struct is
// better for reduced load overhead
kernel_gemv_dl_v1r3(
typename GridwiseGemv::Argument karg,
const Block2CTileMap& block_2_ctile_map) //: in __global__ functions, struct is
// better for reduced load overhead
{
constexpr index_t shared_block_size =
GridwiseGemv::GetSharedMemoryNumberOfByte() / sizeof(FloatAB);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment