Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
cb13b5d2
"git@developer.sourcefind.cn:modelzoo/solov2-pytorch.git" did not exist on "c10ae5d2ca158ae506b93198afa4926ea48bb40b"
Commit
cb13b5d2
authored
Mar 01, 2023
by
aska-0096
Browse files
ATI Conv Instances
parent
59f33851
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
65 additions
and
54 deletions
+65
-54
example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc
...ple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc
+65
-54
No files found.
example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc
View file @
cb13b5d2
...
...
@@ -35,19 +35,8 @@ using ResidualLayout = typename LayoutSettingSelector<NDimSpatial>::ResidualLayo
// clang-format off
template
<
ck
::
index_t
NDimSpatial
>
using
DeviceConvFwdInstances
=
std
::
tuple
<
#if
0
#if
1
// Instances provide to AIT Fp16
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Wmma_CShuffle
<
NDimSpatial
,
InputLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
ck
::
Tuple
<
BiasLayout
<
NDimSpatial
>
,
ResidualLayout
<
NDimSpatial
>>
,
OutputLayout
<
NDimSpatial
>
,
InKernelDataType
,
WeiKernelDataType
,
ck
::
Tuple
<
BiasKernelDataType
,
ResidualKernelDataType
>
,
OutKernelDataType
,
AccDataType
,
CShuffleDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
ConvSpec
,
GemmSpec
,
256
,
// BlockSize
512
,
16
,
4
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
16
,
16
,
// MPerWMMA x NPerWMMA
4
,
1
,
// MRepeat x NRepeat
S
<
1
,
256
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
16
,
4
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
2
,
2
,
true
,
2
,
1
,
S
<
1
,
256
,
1
,
1
>
,
8
>
,
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Wmma_CShuffle
<
NDimSpatial
,
InputLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
ck
::
Tuple
<
BiasLayout
<
NDimSpatial
>
,
ResidualLayout
<
NDimSpatial
>>
,
OutputLayout
<
NDimSpatial
>
,
...
...
@@ -55,21 +44,21 @@ using DeviceConvFwdInstances = std::tuple<
InElementOp
,
WeiElementOp
,
OutElementOp
,
ConvSpec
,
GemmSpec
,
256
,
// BlockSize
256
,
64
,
8
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
16
,
16
,
// MPerWMMA x NPerWMMA
2
,
4
,
// MRepeat x NRepeat
S
<
1
,
25
6
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
2
,
2
,
S
<
1
,
2
56
,
1
,
1
>
,
8
>
,
8
,
1
,
// MRepeat x NRepeat
S
<
4
,
6
4
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
1
,
1
,
S
<
1
,
3
2
,
1
,
8
>
,
8
>
,
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Wmma_CShuffle
<
NDimSpatial
,
InputLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
ck
::
Tuple
<
BiasLayout
<
NDimSpatial
>
,
ResidualLayout
<
NDimSpatial
>>
,
OutputLayout
<
NDimSpatial
>
,
InKernelDataType
,
WeiKernelDataType
,
ck
::
Tuple
<
BiasKernelDataType
,
ResidualKernelDataType
>
,
OutKernelDataType
,
AccDataType
,
CShuffleDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
ConvSpec
,
GemmSpec
,
256
,
// BlockSize
256
,
64
,
4
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
64
,
256
,
8
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
16
,
16
,
// MPerWMMA x NPerWMMA
2
,
4
,
// MRepeat x NRepeat
S
<
1
,
25
6
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
2
,
2
,
S
<
1
,
2
56
,
1
,
1
>
,
8
>
,
S
<
4
,
6
4
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
1
,
1
,
S
<
1
,
3
2
,
1
,
8
>
,
8
>
,
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Wmma_CShuffle
<
NDimSpatial
,
InputLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
ck
::
Tuple
<
BiasLayout
<
NDimSpatial
>
,
ResidualLayout
<
NDimSpatial
>>
,
OutputLayout
<
NDimSpatial
>
,
...
...
@@ -77,10 +66,10 @@ using DeviceConvFwdInstances = std::tuple<
InElementOp
,
WeiElementOp
,
OutElementOp
,
ConvSpec
,
GemmSpec
,
256
,
// BlockSize
256
,
128
,
4
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
16
,
16
,
// MPerWMMA x NPerWMMA
4
,
4
,
// MRepeat x NRepeat
S
<
1
,
25
6
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
4
,
2
,
S
<
1
,
2
56
,
1
,
1
>
,
8
>
,
8
,
2
,
// MRepeat x NRepeat
S
<
4
,
6
4
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
1
,
1
,
S
<
1
,
3
2
,
1
,
8
>
,
8
>
,
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Wmma_CShuffle
<
NDimSpatial
,
InputLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
ck
::
Tuple
<
BiasLayout
<
NDimSpatial
>
,
ResidualLayout
<
NDimSpatial
>>
,
OutputLayout
<
NDimSpatial
>
,
...
...
@@ -88,10 +77,10 @@ using DeviceConvFwdInstances = std::tuple<
InElementOp
,
WeiElementOp
,
OutElementOp
,
ConvSpec
,
GemmSpec
,
256
,
// BlockSize
256
,
128
,
8
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
16
,
16
,
// MPerWMMA x NPerWMMA
4
,
4
,
// MRepeat x NRepeat
S
<
1
,
25
6
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
4
,
2
,
S
<
1
,
2
56
,
1
,
1
>
,
8
>
,
8
,
2
,
// MRepeat x NRepeat
S
<
4
,
6
4
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
1
,
1
,
S
<
1
,
3
2
,
1
,
8
>
,
8
>
,
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Wmma_CShuffle
<
NDimSpatial
,
InputLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
ck
::
Tuple
<
BiasLayout
<
NDimSpatial
>
,
ResidualLayout
<
NDimSpatial
>>
,
OutputLayout
<
NDimSpatial
>
,
...
...
@@ -100,9 +89,9 @@ using DeviceConvFwdInstances = std::tuple<
128
,
256
,
4
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
16
,
16
,
// MPerWMMA x NPerWMMA
4
,
4
,
// MRepeat x NRepeat
S
<
2
,
128
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
4
,
2
,
S
<
1
,
128
,
1
,
2
>
,
8
>
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
1
,
1
,
S
<
1
,
32
,
1
,
8
>
,
8
>
,
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Wmma_CShuffle
<
NDimSpatial
,
InputLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
ck
::
Tuple
<
BiasLayout
<
NDimSpatial
>
,
ResidualLayout
<
NDimSpatial
>>
,
OutputLayout
<
NDimSpatial
>
,
...
...
@@ -111,20 +100,20 @@ using DeviceConvFwdInstances = std::tuple<
128
,
256
,
8
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
16
,
16
,
// MPerWMMA x NPerWMMA
4
,
4
,
// MRepeat x NRepeat
S
<
2
,
128
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
4
,
2
,
S
<
1
,
128
,
1
,
2
>
,
8
>
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
1
,
1
,
S
<
1
,
32
,
1
,
8
>
,
8
>
,
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Wmma_CShuffle
<
NDimSpatial
,
InputLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
ck
::
Tuple
<
BiasLayout
<
NDimSpatial
>
,
ResidualLayout
<
NDimSpatial
>>
,
OutputLayout
<
NDimSpatial
>
,
InKernelDataType
,
WeiKernelDataType
,
ck
::
Tuple
<
BiasKernelDataType
,
ResidualKernelDataType
>
,
OutKernelDataType
,
AccDataType
,
CShuffleDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
ConvSpec
,
GemmSpec
,
192
,
// BlockSize
1
92
,
4
8
,
6
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
InElementOp
,
WeiElementOp
,
OutElementOp
,
ConvSpec
,
GemmSpec
,
256
,
// BlockSize
1
28
,
12
8
,
8
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
16
,
16
,
// MPerWMMA x NPerWMMA
6
,
1
,
// MRepeat x NRepeat
S
<
1
,
192
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
2
,
4
8
,
2
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
4
,
true
,
3
,
1
,
S
<
1
,
96
,
1
,
2
>
,
8
>
,
4
,
2
,
// MRepeat x NRepeat
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
6
4
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
1
,
1
,
S
<
1
,
32
,
1
,
8
>
,
8
>
,
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Wmma_CShuffle
<
NDimSpatial
,
InputLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
ck
::
Tuple
<
BiasLayout
<
NDimSpatial
>
,
ResidualLayout
<
NDimSpatial
>>
,
OutputLayout
<
NDimSpatial
>
,
...
...
@@ -132,21 +121,21 @@ using DeviceConvFwdInstances = std::tuple<
InElementOp
,
WeiElementOp
,
OutElementOp
,
ConvSpec
,
GemmSpec
,
128
,
// BlockSize
128
,
64
,
8
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
16
,
16
,
// MPerWMMA x NPerWMMA
2
,
4
,
// MRepeat x NRepeat
S
<
1
,
128
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
2
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
2
,
2
,
S
<
1
,
128
,
1
,
1
>
,
8
>
,
4
,
2
,
// MRepeat x NRepeat
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
1
,
1
,
S
<
1
,
32
,
1
,
4
>
,
8
>
,
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Wmma_CShuffle
<
NDimSpatial
,
InputLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
ck
::
Tuple
<
BiasLayout
<
NDimSpatial
>
,
ResidualLayout
<
NDimSpatial
>>
,
OutputLayout
<
NDimSpatial
>
,
InKernelDataType
,
WeiKernelDataType
,
ck
::
Tuple
<
BiasKernelDataType
,
ResidualKernelDataType
>
,
OutKernelDataType
,
AccDataType
,
CShuffleDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
ConvSpec
,
GemmSpec
,
96
,
// BlockSize
96
,
4
8
,
8
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
InElementOp
,
WeiElementOp
,
OutElementOp
,
ConvSpec
,
GemmSpec
,
128
,
// BlockSize
64
,
12
8
,
8
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
16
,
16
,
// MPerWMMA x NPerWMMA
6
,
1
,
// MRepeat x NRepeat
S
<
1
,
96
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
2
,
48
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
3
,
1
,
S
<
1
,
48
,
1
,
2
>
,
8
>
,
2
,
4
,
// MRepeat x NRepeat
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
1
,
1
,
S
<
1
,
32
,
1
,
4
>
,
8
>
,
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Wmma_CShuffle
<
NDimSpatial
,
InputLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
ck
::
Tuple
<
BiasLayout
<
NDimSpatial
>
,
ResidualLayout
<
NDimSpatial
>>
,
OutputLayout
<
NDimSpatial
>
,
...
...
@@ -155,11 +144,33 @@ using DeviceConvFwdInstances = std::tuple<
64
,
64
,
8
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
16
,
16
,
// MPerWMMA x NPerWMMA
4
,
2
,
// MRepeat x NRepeat
S
<
1
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
1
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
4
,
1
,
S
<
1
,
64
,
1
,
1
>
,
8
>
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
1
,
1
,
S
<
1
,
16
,
1
,
4
>
,
8
>
,
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Wmma_CShuffle
<
NDimSpatial
,
InputLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
ck
::
Tuple
<
BiasLayout
<
NDimSpatial
>
,
ResidualLayout
<
NDimSpatial
>>
,
OutputLayout
<
NDimSpatial
>
,
InKernelDataType
,
WeiKernelDataType
,
ck
::
Tuple
<
BiasKernelDataType
,
ResidualKernelDataType
>
,
OutKernelDataType
,
AccDataType
,
CShuffleDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
ConvSpec
,
GemmSpec
,
64
,
// BlockSize
128
,
32
,
8
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
16
,
16
,
// MPerWMMA x NPerWMMA
8
,
1
,
// MRepeat x NRepeat
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
1
,
1
,
S
<
1
,
16
,
1
,
4
>
,
8
>
,
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Wmma_CShuffle
<
NDimSpatial
,
InputLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
ck
::
Tuple
<
BiasLayout
<
NDimSpatial
>
,
ResidualLayout
<
NDimSpatial
>>
,
OutputLayout
<
NDimSpatial
>
,
InKernelDataType
,
WeiKernelDataType
,
ck
::
Tuple
<
BiasKernelDataType
,
ResidualKernelDataType
>
,
OutKernelDataType
,
AccDataType
,
CShuffleDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
ConvSpec
,
GemmSpec
,
64
,
// BlockSize
32
,
128
,
8
,
8
,
// MPerBlock x NPerBlock x K0PerBlock x K1
16
,
16
,
// MPerWMMA x NPerWMMA
2
,
4
,
// MRepeat x NRepeat
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
1
,
1
,
S
<
1
,
16
,
1
,
4
>
,
8
>
#endif
#if
1
#if
0
// GEMM_N = 16
// K0 = 8
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Wmma_CShuffle
<
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment