Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
95a1112f
Commit
95a1112f
authored
Oct 12, 2023
by
Bartlomiej Kocot
Browse files
Update README, changelog, profiler
parent
1a0f2c35
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
22 additions
and
8 deletions
+22
-8
CHANGELOG.md
CHANGELOG.md
+1
-1
profiler/README.md
profiler/README.md
+4
-2
profiler/src/profile_grouped_conv_bwd_weight.cpp
profiler/src/profile_grouped_conv_bwd_weight.cpp
+17
-5
No files found.
CHANGELOG.md
View file @
95a1112f
...
@@ -14,7 +14,7 @@ None
...
@@ -14,7 +14,7 @@ None
### Additions
### Additions
-
Added an image to a column kernel (#867)
-
Added an image to a column kernel (#867)
-
Added a column to an image kernel (#930)
-
Added a column to an image kernel (#930)
-
Support for 3D grouped convolution
forward
on RDNA 3 GPUs (#935)
-
Support for 3D grouped convolution on RDNA 3 GPUs (#935
, #950, #985
)
-
Grouped convolution support for small K and C (#822 #879 #897)
-
Grouped convolution support for small K and C (#822 #879 #897)
-
Support for NHWGC (2D and 3D) grouped convolution backward weight (#769 #804)
-
Support for NHWGC (2D and 3D) grouped convolution backward weight (#769 #804)
-
Support for bf16/f32/f16 and NHWGC (2D and 3d) grouped convolution backward data (#757 #799)
-
Support for bf16/f32/f16 and NHWGC (2D and 3d) grouped convolution backward data (#757 #799)
...
...
profiler/README.md
View file @
95a1112f
...
@@ -147,7 +147,9 @@ GB/s: 127.947
...
@@ -147,7 +147,9 @@ GB/s: 127.947
# arg1: tensor operation (grouped_conv_bwd_weight: Grouped Convolution Backward Weight)
# arg1: tensor operation (grouped_conv_bwd_weight: Grouped Convolution Backward Weight)
# arg2: data type (0: Input fp32, Weight fp32, Output fp32
# arg2: data type (0: Input fp32, Weight fp32, Output fp32
# 1: Input fp16, Weight fp16, Output fp16
# 1: Input fp16, Weight fp16, Output fp16
# 2: Input bf16, Weight fp32, Output bf16)
# 2: Input bf16, Weight fp32, Output bf16
# 3: Input fp16, Weight fp16, Output fp16, Gemm bf8@fp8
# 4: Input int8, Weight int8, Output int8)
# arg3: tensor layout (0: Input[G, N, C, Hi, Wi], Weight[G, K, C, Y, X], Output[G, N, K, Ho, Wo]
# arg3: tensor layout (0: Input[G, N, C, Hi, Wi], Weight[G, K, C, Y, X], Output[G, N, K, Ho, Wo]
# 1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, N, Ho, Wo, K]
# 1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, N, Ho, Wo, K]
# 2: Input[N, Hi, Wi, G, C], Weight[G, K, Y, X, C], Output[N, Ho, Wo, G, K]
# 2: Input[N, Hi, Wi, G, C], Weight[G, K, Y, X, C], Output[N, Ho, Wo, G, K]
...
@@ -167,7 +169,7 @@ GB/s: 127.947
...
@@ -167,7 +169,7 @@ GB/s: 127.947
# SplitK
# SplitK
################ op datatype layout verify init log time Ndims G N K C Y X Hi Wi Sy Sx Dy Dx LeftPy LeftPx RightPy RightPx SplitK
################ op datatype layout verify init log time Ndims G N K C Y X Hi Wi Sy Sx Dy Dx LeftPy LeftPx RightPy RightPx SplitK
./bin/ckProfiler grouped_conv_bwd_weight
1
0
1
1 0 1 2 32 256 256 512 3 3 28 28 1 1 1 1 1 0 0 0 1
./bin/ckProfiler grouped_conv_bwd_weight 1
1
0
1 0 1 2 32 256 256 512 3 3 28 28 1 1 1 1 1 0 0 0 1
```
```
...
...
profiler/src/profile_grouped_conv_bwd_weight.cpp
View file @
95a1112f
...
@@ -20,10 +20,11 @@ enum struct ConvLayout
...
@@ -20,10 +20,11 @@ enum struct ConvLayout
enum
struct
ConvDataType
enum
struct
ConvDataType
{
{
F32_F32_F32
,
// 0
F32_F32_F32
,
// 0
F16_F16_F16
,
// 1
F16_F16_F16
,
// 1
BF16_F32_BF16
,
// 2
BF16_F32_BF16
,
// 2
F16_F16_F16_BF8_F8
// 3
F16_F16_F16_BF8_F8
,
// 3
I8_I8_I8
// 4
};
};
#define OP_NAME "grouped_conv_bwd_weight"
#define OP_NAME "grouped_conv_bwd_weight"
...
@@ -35,7 +36,8 @@ static void print_helper_msg()
...
@@ -35,7 +36,8 @@ static void print_helper_msg()
<<
"arg2: data type (0: Input fp32, Weight fp32, Output fp32
\n
"
<<
"arg2: data type (0: Input fp32, Weight fp32, Output fp32
\n
"
<<
" 1: Input fp16, Weight fp16, Output fp16
\n
"
<<
" 1: Input fp16, Weight fp16, Output fp16
\n
"
<<
" 2: Input bf16, Weight fp32, Output bf16
\n
"
<<
" 2: Input bf16, Weight fp32, Output bf16
\n
"
<<
" 3: Input fp16, Weight fp16, Output fp16, Gemm bf8@fp8)
\n
"
<<
" 3: Input fp16, Weight fp16, Output fp16, Gemm bf8@fp8
\n
"
<<
" 4: Input int8, Weight int8, Output int8)
\n
"
<<
"arg3: tensor layout (0: Input[G, N, C, Hi, Wi], Weight[G, K, C, Y, X], Output[G, "
<<
"arg3: tensor layout (0: Input[G, N, C, Hi, Wi], Weight[G, K, C, Y, X], Output[G, "
"N, K, Ho, Wo]
\n
"
"N, K, Ho, Wo]
\n
"
<<
" 1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, "
<<
" 1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, "
...
@@ -196,6 +198,11 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
...
@@ -196,6 +198,11 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
// fp32 atomic add is used for weight tensor in bf16 kernel
// fp32 atomic add is used for weight tensor in bf16 kernel
return
profile
(
I3
,
GNDHWC
{},
GKZYXC
{},
GNDHWK
{},
BF16
{},
F32
{},
BF16
{},
BF16
{},
BF16
{});
return
profile
(
I3
,
GNDHWC
{},
GKZYXC
{},
GNDHWK
{},
BF16
{},
F32
{},
BF16
{},
BF16
{},
BF16
{});
}
}
else
if
(
data_type
==
ConvDataType
::
I8_I8_I8
)
{
return
profile
(
I3
,
GNDHWC
{},
GKZYXC
{},
GNDHWK
{},
int8_t
{},
int8_t
{},
int8_t
{},
int8_t
{},
int8_t
{});
}
}
}
else
if
(
num_dim_spatial
==
3
&&
layout
==
ConvLayout
::
NHWGC_GKYXC_NHWGK
)
else
if
(
num_dim_spatial
==
3
&&
layout
==
ConvLayout
::
NHWGC_GKYXC_NHWGK
)
{
{
...
@@ -216,6 +223,11 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
...
@@ -216,6 +223,11 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
{
{
return
profile
(
I3
,
NDHWGC
{},
GKZYXC
{},
NDHWGK
{},
F16
{},
F16
{},
F16
{},
BF8
{},
F8
{});
return
profile
(
I3
,
NDHWGC
{},
GKZYXC
{},
NDHWGK
{},
F16
{},
F16
{},
F16
{},
BF8
{},
F8
{});
}
}
else
if
(
data_type
==
ConvDataType
::
I8_I8_I8
)
{
return
profile
(
I3
,
NDHWGC
{},
GKZYXC
{},
NDHWGK
{},
int8_t
{},
int8_t
{},
int8_t
{},
int8_t
{},
int8_t
{});
}
}
}
std
::
cout
<<
"this data_type & layout is not implemented"
<<
std
::
endl
;
std
::
cout
<<
"this data_type & layout is not implemented"
<<
std
::
endl
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment