Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
39a1f853
Commit
39a1f853
authored
Sep 15, 2023
by
Harisankar Sadasivan
Browse files
clang-format changes for pr881
parent
a20863b0
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
27 additions
and
30 deletions
+27
-30
example/53_gemv_splitk/gemv_splitk_fp16.cpp
example/53_gemv_splitk/gemv_splitk_fp16.cpp
+4
-6
include/ck/tensor_operation/gpu/device/device_gemv.hpp
include/ck/tensor_operation/gpu/device/device_gemv.hpp
+13
-14
include/ck/tensor_operation/gpu/device/impl/device_gemv_splitk.hpp
...k/tensor_operation/gpu/device/impl/device_gemv_splitk.hpp
+1
-1
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
+4
-4
include/ck/tensor_operation/gpu/grid/gridwise_gemv_splitk.hpp
...ude/ck/tensor_operation/gpu/grid/gridwise_gemv_splitk.hpp
+5
-5
No files found.
example/53_gemv_splitk/gemv_splitk_fp16.cpp
View file @
39a1f853
...
@@ -19,12 +19,10 @@ using CElementOp = PassThrough;
...
@@ -19,12 +19,10 @@ using CElementOp = PassThrough;
static
constexpr
auto
GemmMNPadding
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNPadding
;
static
constexpr
auto
GemmMNPadding
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNPadding
;
#define K1 8 // K1PerThread:2,4,8
#define K0 4 // K0PerBlock:1,2,3,4...32
#define K1 8 //K1PerThread:2,4,8
#define N1 2 // Nperthread:2,4,8
#define K0 4 //K0PerBlock:1,2,3,4...32
#define B 64 // block-size:64
#define N1 2 //Nperthread:2,4,8
#define B 64 //block-size:64
// clang-format off
// clang-format off
using
DeviceGemvInstance
=
ck
::
tensor_operation
::
device
::
deviceGemvDl
/*
using
DeviceGemvInstance
=
ck
::
tensor_operation
::
device
::
deviceGemvDl
/*
...
...
include/ck/tensor_operation/gpu/device/device_gemv.hpp
View file @
39a1f853
...
@@ -20,8 +20,7 @@ template <typename ALayout,
...
@@ -20,8 +20,7 @@ template <typename ALayout,
typename
CElementwiseOperation
>
typename
CElementwiseOperation
>
struct
DeviceGemv
:
public
BaseOperator
struct
DeviceGemv
:
public
BaseOperator
{
{
virtual
std
::
unique_ptr
<
BaseArgument
>
virtual
std
::
unique_ptr
<
BaseArgument
>
MakeArgumentPointer
(
const
void
*
p_a
,
MakeArgumentPointer
(
const
void
*
p_a
,
const
void
*
p_b
,
const
void
*
p_b
,
void
*
p_c
,
void
*
p_c
,
ck
::
index_t
M
,
ck
::
index_t
M
,
...
@@ -33,7 +32,7 @@ struct DeviceGemv : public BaseOperator
...
@@ -33,7 +32,7 @@ struct DeviceGemv : public BaseOperator
AElementwiseOperation
a_element_op
,
AElementwiseOperation
a_element_op
,
BElementwiseOperation
b_element_op
,
BElementwiseOperation
b_element_op
,
CElementwiseOperation
c_element_op
,
CElementwiseOperation
c_element_op
,
ck
::
index_t
KBatch
=
1
)
=
0
;
ck
::
index_t
KBatch
=
1
)
=
0
;
virtual
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
=
0
;
virtual
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
=
0
;
};
};
...
...
include/ck/tensor_operation/gpu/device/impl/device_gemv_splitk.hpp
View file @
39a1f853
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
View file @
39a1f853
...
@@ -658,11 +658,11 @@ struct BlockToCTileMap_3DGrid_KSplit
...
@@ -658,11 +658,11 @@ struct BlockToCTileMap_3DGrid_KSplit
return
make_tuple
(
blockIdx
.
z
,
blockIdx
.
y
,
blockIdx
.
x
);
return
make_tuple
(
blockIdx
.
z
,
blockIdx
.
y
,
blockIdx
.
x
);
}
}
//HS: Map 1D block-id to 3D tuple (M,N,K)
//
HS: Map 1D block-id to 3D tuple (M,N,K)
__host__
__device__
inline
constexpr
auto
convert_1D_block_idx_to_3D_tuple
(
__host__
__device__
inline
constexpr
auto
convert_1D_block_idx_to_3D_tuple
(
const
index_t
&
block_1d_id
,
const
index_t
&
N
,
const
index_t
&
k_batch
)
const
const
index_t
&
block_1d_id
,
const
index_t
&
N
,
const
index_t
&
k_batch
)
const
{
{
const
auto
Ndim
=
math
::
integer_divide_ceil
(
N
,
NPerBlock
);
const
auto
Ndim
=
math
::
integer_divide_ceil
(
N
,
NPerBlock
);
return
make_tuple
(((
block_1d_id
)
/
(
k_batch
*
Ndim
)),
return
make_tuple
(((
block_1d_id
)
/
(
k_batch
*
Ndim
)),
(((
block_1d_id
)
/
k_batch
)
%
Ndim
),
(((
block_1d_id
)
/
k_batch
)
%
Ndim
),
(
block_1d_id
)
%
k_batch
);
// returns 3D tuple as (Mid,Nid,Kid)
(
block_1d_id
)
%
k_batch
);
// returns 3D tuple as (Mid,Nid,Kid)
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemv_splitk.hpp
View file @
39a1f853
...
@@ -27,7 +27,7 @@ template <typename GridwiseGemv,
...
@@ -27,7 +27,7 @@ template <typename GridwiseGemv,
typename
Block2CTileMap
>
typename
Block2CTileMap
>
__global__
void
__global__
void
#if CK_USE_LAUNCH_BOUNDS
#if CK_USE_LAUNCH_BOUNDS
__launch_bounds__
(
CK_MAX_THREAD_PER_BLOCK
,
CK_MIN_BLOCK_PER_CU
)
__launch_bounds__
(
CK_MAX_THREAD_PER_BLOCK
,
CK_MIN_BLOCK_PER_CU
)
#endif
#endif
kernel_gemv_dl_v1r3
(
kernel_gemv_dl_v1r3
(
typename
GridwiseGemv
::
Argument
karg
,
typename
GridwiseGemv
::
Argument
karg
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment