Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
4687ef88
Commit
4687ef88
authored
Feb 22, 2021
by
Chao Liu
Browse files
made multi-index transform support compile-time and run-time info
parent
2558d019
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
146 additions
and
102 deletions
+146
-102
composable_kernel/include/driver/driver_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
...convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
+49
-79
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
...lude/tensor_description/dynamic_multi_index_transform.hpp
+16
-18
composable_kernel/include/tensor_description/dynamic_multi_index_transform_helper.hpp
...nsor_description/dynamic_multi_index_transform_helper.hpp
+74
-0
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper.hpp
...e/tensor_description/dynamic_tensor_descriptor_helper.hpp
+3
-3
driver/include/device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
...convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
+4
-2
No files found.
composable_kernel/include/driver/driver_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
View file @
4687ef88
...
@@ -86,46 +86,43 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_pad
...
@@ -86,46 +86,43 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_pad
// weight tensor
// weight tensor
const
auto
wei_gemmk_gemmm_global_desc
=
transform_dynamic_tensor_descriptor
(
const
auto
wei_gemmk_gemmm_global_desc
=
transform_dynamic_tensor_descriptor
(
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_
multi_index
(
K
,
C
*
Y
*
X
)),
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_
tuple
(
K
,
C
*
Y
*
X
)),
make_tuple
(
DynamicP
ass
T
hrough
{
K
},
DynamicP
ass
T
hrough
{
C
*
Y
*
X
}
),
make_tuple
(
make_p
ass
_t
hrough
_transform
(
K
),
make_p
ass
_t
hrough
_transform
(
C
*
Y
*
X
)
),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
1
>
{},
Sequence
<
0
>
{}));
make_tuple
(
Sequence
<
1
>
{},
Sequence
<
0
>
{}));
// input tensor
// input tensor
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
(
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_hi_wi_global_desc
,
in_n_c_hi_wi_global_desc
,
make_tuple
(
DynamicP
ass
T
hrough
{
N
}
,
make_tuple
(
make_p
ass
_t
hrough
_transform
(
N
)
,
DynamicP
ass
T
hrough
{
C
}
,
make_p
ass
_t
hrough
_transform
(
C
)
,
DynamicPad
{
Hi
,
InLeftPadH
,
InRightPadH
}
,
make_pad_transform
(
Hi
,
InLeftPadH
,
InRightPadH
)
,
DynamicPad
{
Wi
,
InLeftPadW
,
InRightPadW
}
),
make_pad_transform
(
Wi
,
InLeftPadW
,
InRightPadW
)
),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}));
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}));
const
auto
Hip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
I2
);
const
auto
Wip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
I3
);
const
auto
in_n_c_y_ho_x_wo_global_desc
=
transform_dynamic_tensor_descriptor
(
const
auto
in_n_c_y_ho_x_wo_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_hip_wip_global_desc
,
in_n_c_hip_wip_global_desc
,
make_tuple
(
DynamicPassThrough
{
N
},
make_tuple
(
DynamicPassThrough
{
C
},
make_pass_through_transform
(
N
),
DynamicEmbed
<
2
>
{
make_multi_index
(
Y
,
Ho
),
make_pass_through_transform
(
C
),
make_multi_index
(
ConvDilationH
,
ConvStrideH
)},
make_embed_transform
(
make_tuple
(
Y
,
Ho
),
make_tuple
(
ConvDilationH
,
ConvStrideH
)),
DynamicEmbed
<
2
>
{
make_multi_index
(
X
,
Wo
),
make_embed_transform
(
make_tuple
(
X
,
Wo
),
make_tuple
(
ConvDilationW
,
ConvStrideW
))),
make_multi_index
(
ConvDilationW
,
ConvStrideW
)}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
const
auto
in_gemmk_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
const
auto
in_gemmk_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_y_ho_x_wo_global_desc
,
in_n_c_y_ho_x_wo_global_desc
,
make_tuple
(
DynamicMerge
<
3
>
{
make_multi_index
(
C
,
Y
,
X
)
}
,
make_tuple
(
make_merge_transform
(
make_tuple
(
C
,
Y
,
X
)
)
,
DynamicMerge
<
3
>
{
make_multi_index
(
N
,
Ho
,
Wo
)
}
),
make_merge_transform
(
make_tuple
(
N
,
Ho
,
Wo
)
)
),
make_tuple
(
Sequence
<
1
,
2
,
4
>
{},
Sequence
<
0
,
3
,
5
>
{}),
make_tuple
(
Sequence
<
1
,
2
,
4
>
{},
Sequence
<
0
,
3
,
5
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
// output tensor
// output tensor
const
auto
out_gemmm_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
const
auto
out_gemmm_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_multi_index
(
N
,
K
,
Ho
*
Wo
)),
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_tuple
(
N
,
K
,
Ho
*
Wo
)),
make_tuple
(
DynamicPassThrough
{
K
},
DynamicMerge
<
2
>
{
make_multi_index
(
N
,
Ho
*
Wo
)}),
make_tuple
(
make_pass_through_transform
(
K
),
make_merge_transform
(
make_tuple
(
N
,
Ho
*
Wo
))),
make_tuple
(
Sequence
<
1
>
{},
Sequence
<
0
,
2
>
{}),
make_tuple
(
Sequence
<
1
>
{},
Sequence
<
0
,
2
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
...
@@ -139,21 +136,17 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_pad
...
@@ -139,21 +136,17 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_pad
throw
std
::
runtime_error
(
"wrong! GEMM size no divisible"
);
throw
std
::
runtime_error
(
"wrong! GEMM size no divisible"
);
}
}
constexpr
auto
GemmM1
=
GemmMPerThread
*
GemmMLevel0Cluster
*
GemmMLevel1Cluster
;
constexpr
auto
GemmM1
=
Number
<
GemmMPerThread
*
GemmMLevel0Cluster
*
GemmMLevel1Cluster
>
{}
;
constexpr
auto
GemmN1
=
GemmNPerThread
*
GemmNLevel0Cluster
*
GemmNLevel1Cluster
;
constexpr
auto
GemmN1
=
Number
<
GemmNPerThread
*
GemmNLevel0Cluster
*
GemmNLevel1Cluster
>
{}
;
const
auto
GemmM0
=
GemmM
/
GemmM1
;
const
auto
GemmM0
=
GemmM
/
GemmM1
;
const
auto
GemmN0
=
GemmN
/
GemmN1
;
const
auto
GemmN0
=
GemmN
/
GemmN1
;
const
auto
GemmM0_GemmM1
=
make_tuple
(
GemmM0
,
Number
<
GemmM1
>
{});
const
auto
GemmN0_GemmN1
=
make_tuple
(
GemmN0
,
Number
<
GemmN1
>
{});
const
auto
out_gemmm0_gemmm1_gemmn0_gemmn1_global_desc
=
const
auto
out_gemmm0_gemmm1_gemmn0_gemmn1_global_desc
=
transform_dynamic_tensor_descriptor
(
transform_dynamic_tensor_descriptor
(
out_gemmm_gemmn_global_desc
,
out_gemmm_gemmn_global_desc
,
make_tuple
(
make_tuple
(
make_unmerge_transform
(
make_tuple
(
GemmM0
,
GemmM1
)),
DynamicUnMerge
<
2
,
false
,
remove_cv_t
<
decltype
(
GemmM0_GemmM1
)
>>
{
GemmM0_GemmM1
},
make_unmerge_transform
(
make_tuple
(
GemmN0
,
GemmN1
))),
DynamicUnMerge
<
2
,
false
,
remove_cv_t
<
decltype
(
GemmN0_GemmN1
)
>>
{
GemmN0_GemmN1
}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
0
,
1
>
{},
Sequence
<
2
,
3
>
{}));
make_tuple
(
Sequence
<
0
,
1
>
{},
Sequence
<
2
,
3
>
{}));
...
@@ -770,65 +763,41 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad
...
@@ -770,65 +763,41 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad
if
(
!
(
InLeftPadH
==
0
&&
InLeftPadW
==
0
&&
InRightPadH
==
0
&&
InRightPadW
==
0
))
if
(
!
(
InLeftPadH
==
0
&&
InLeftPadW
==
0
&&
InRightPadH
==
0
&&
InRightPadW
==
0
))
{
{
throw
std
::
runtime_error
(
"wrong!
1x1, stride 1,
no padding"
);
throw
std
::
runtime_error
(
"wrong! no padding"
);
}
}
// weight tensor
// weight tensor
#if 0
// TODO implement graph optimization of tensor descriptor transformation
const auto wei_gemmk_gemmm_global_desc = transform_dynamic_tensor_descriptor(
wei_k_c_y_x_global_desc,
make_tuple(DynamicPassThrough{K}, DynamicMerge<3>{make_multi_index(C, Y, X)}),
make_tuple(Sequence<0>{}, Sequence<1, 2, 3>{}),
make_tuple(Sequence<1>{}, Sequence<0>{}));
#else
const
auto
wei_gemmk_gemmm_global_desc
=
transform_dynamic_tensor_descriptor
(
const
auto
wei_gemmk_gemmm_global_desc
=
transform_dynamic_tensor_descriptor
(
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_
multi_index
(
K
,
C
*
Y
*
X
)),
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_
tuple
(
K
,
C
*
Y
*
X
)),
make_tuple
(
DynamicP
ass
T
hrough
{
K
},
DynamicP
ass
T
hrough
{
C
*
Y
*
X
}
),
make_tuple
(
make_p
ass
_t
hrough
_transform
(
K
),
make_p
ass
_t
hrough
_transform
(
C
*
Y
*
X
)
),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
1
>
{},
Sequence
<
0
>
{}));
make_tuple
(
Sequence
<
1
>
{},
Sequence
<
0
>
{}));
#endif
// input tensor
// input tensor
// debug: don't do padding
const
auto
in_n_c_hip_wip_global_desc
=
in_n_c_hi_wi_global_desc
;
const
auto
Hip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
I2
);
const
auto
Wip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
I3
);
const
auto
in_n_c_y_ho_x_wo_global_desc
=
transform_dynamic_tensor_descriptor
(
const
auto
in_n_c_y_ho_x_wo_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_hip_wip_global_desc
,
in_n_c_hi_wi_global_desc
,
make_tuple
(
DynamicPassThrough
{
N
},
make_tuple
(
DynamicPassThrough
{
C
},
make_pass_through_transform
(
N
),
DynamicEmbed
<
2
>
{
make_multi_index
(
Y
,
Ho
),
make_pass_through_transform
(
C
),
make_multi_index
(
ConvDilationH
,
ConvStrideH
)},
make_embed_transform
(
make_tuple
(
Y
,
Ho
),
make_tuple
(
ConvDilationH
,
ConvStrideH
)),
DynamicEmbed
<
2
>
{
make_multi_index
(
X
,
Wo
),
make_embed_transform
(
make_tuple
(
X
,
Wo
),
make_tuple
(
ConvDilationW
,
ConvStrideW
))),
make_multi_index
(
ConvDilationW
,
ConvStrideW
)}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
const
auto
in_gemmk_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
const
auto
in_gemmk_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_y_ho_x_wo_global_desc
,
in_n_c_y_ho_x_wo_global_desc
,
make_tuple
(
DynamicMerge
<
3
>
{
make_multi_index
(
C
,
Y
,
X
)
}
,
make_tuple
(
make_merge_transform
(
make_tuple
(
C
,
Y
,
X
)
)
,
DynamicMerge
<
3
>
{
make_multi_index
(
N
,
Ho
,
Wo
)
}
),
make_merge_transform
(
make_tuple
(
N
,
Ho
,
Wo
)
)
),
make_tuple
(
Sequence
<
1
,
2
,
4
>
{},
Sequence
<
0
,
3
,
5
>
{}),
make_tuple
(
Sequence
<
1
,
2
,
4
>
{},
Sequence
<
0
,
3
,
5
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
// output tensor
// output tensor
#if 0
//TODO: implement graph optimization of tensor descriptor transformation
const auto out_gemmm_gemmn_global_desc =
transform_dynamic_tensor_descriptor(out_n_k_ho_wo_global_desc,
make_tuple(DynamicPassThrough{K}, DynamicMerge<3>{make_mult_index(N, Ho, Wo)}),
make_tuple(Sequence<1>{}, Sequence<0, 2, 3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}));
#else
const
auto
out_gemmm_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
const
auto
out_gemmm_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_multi_index
(
N
,
K
,
Ho
*
Wo
)),
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_tuple
(
N
,
K
,
Ho
*
Wo
)),
make_tuple
(
DynamicPassThrough
{
K
},
DynamicMerge
<
2
>
{
make_multi_index
(
N
,
Ho
*
Wo
)}),
make_tuple
(
make_pass_through_transform
(
K
),
make_merge_transform
(
make_tuple
(
N
,
Ho
*
Wo
))),
make_tuple
(
Sequence
<
1
>
{},
Sequence
<
0
,
2
>
{}),
make_tuple
(
Sequence
<
1
>
{},
Sequence
<
0
,
2
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
#endif
const
auto
GemmM
=
out_gemmm_gemmn_global_desc
.
GetLength
(
I0
);
const
auto
GemmM
=
out_gemmm_gemmn_global_desc
.
GetLength
(
I0
);
const
auto
GemmN
=
out_gemmm_gemmn_global_desc
.
GetLength
(
I1
);
const
auto
GemmN
=
out_gemmm_gemmn_global_desc
.
GetLength
(
I1
);
...
@@ -840,8 +809,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad
...
@@ -840,8 +809,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad
throw
std
::
runtime_error
(
"wrong! GEMM size no divisible"
);
throw
std
::
runtime_error
(
"wrong! GEMM size no divisible"
);
}
}
constexpr
auto
GemmM1
=
GemmMPerThread
*
GemmMLevel0Cluster
*
GemmMLevel1Cluster
;
constexpr
auto
GemmM1
=
Number
<
GemmMPerThread
*
GemmMLevel0Cluster
*
GemmMLevel1Cluster
>
{}
;
constexpr
auto
GemmN1
=
GemmNPerThread
*
GemmNLevel0Cluster
*
GemmNLevel1Cluster
;
constexpr
auto
GemmN1
=
Number
<
GemmNPerThread
*
GemmNLevel0Cluster
*
GemmNLevel1Cluster
>
{}
;
const
auto
GemmM0
=
GemmM
/
GemmM1
;
const
auto
GemmM0
=
GemmM
/
GemmM1
;
const
auto
GemmN0
=
GemmN
/
GemmN1
;
const
auto
GemmN0
=
GemmN
/
GemmN1
;
...
@@ -849,8 +818,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad
...
@@ -849,8 +818,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad
const
auto
out_gemmm0_gemmm1_gemmn0_gemmn1_global_desc
=
const
auto
out_gemmm0_gemmm1_gemmn0_gemmn1_global_desc
=
transform_dynamic_tensor_descriptor
(
transform_dynamic_tensor_descriptor
(
out_gemmm_gemmn_global_desc
,
out_gemmm_gemmn_global_desc
,
make_tuple
(
DynamicUnMerge
<
2
>
{
make_multi_index
(
GemmM0
,
GemmM1
)
}
,
make_tuple
(
make_unmerge_transform
(
make_tuple
(
GemmM0
,
GemmM1
)
)
,
DynamicUnMerge
<
2
>
{
make_multi_index
(
GemmN0
,
GemmN1
)
}
),
make_unmerge_transform
(
make_tuple
(
GemmN0
,
GemmN1
)
)
),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
0
,
1
>
{},
Sequence
<
2
,
3
>
{}));
make_tuple
(
Sequence
<
0
,
1
>
{},
Sequence
<
2
,
3
>
{}));
...
@@ -1469,22 +1438,23 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
...
@@ -1469,22 +1438,23 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
// weight tensor
// weight tensor
const
auto
wei_gemmk_gemmm_global_desc
=
transform_dynamic_tensor_descriptor
(
const
auto
wei_gemmk_gemmm_global_desc
=
transform_dynamic_tensor_descriptor
(
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_
multi_index
(
K
,
C
)),
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_
tuple
(
K
,
C
)),
make_tuple
(
DynamicP
ass
T
hrough
{
K
},
DynamicP
ass
T
hrough
{
C
}
),
make_tuple
(
make_p
ass
_t
hrough
_transform
(
K
),
make_p
ass
_t
hrough
_transform
(
C
)
),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
1
>
{},
Sequence
<
0
>
{}));
make_tuple
(
Sequence
<
1
>
{},
Sequence
<
0
>
{}));
// input tensor
// input tensor
const
auto
in_gemmk_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
const
auto
in_gemmk_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_hi_wi_global_desc
,
in_n_c_hi_wi_global_desc
,
make_tuple
(
DynamicP
ass
T
hrough
{
C
},
DynamicMerge
<
3
>
{
make_multi_index
(
N
,
Ho
,
Wo
)
}
),
make_tuple
(
make_p
ass
_t
hrough
_transform
(
C
),
make_merge_transform
(
make_tuple
(
N
,
Ho
,
Wo
)
)
),
make_tuple
(
Sequence
<
1
>
{},
Sequence
<
0
,
2
,
3
>
{}),
make_tuple
(
Sequence
<
1
>
{},
Sequence
<
0
,
2
,
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
// output tensor
// output tensor
const
auto
out_gemmm_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
const
auto
out_gemmm_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_multi_index
(
N
,
K
,
Ho
*
Wo
)),
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_tuple
(
N
,
K
,
Ho
*
Wo
)),
make_tuple
(
DynamicPassThrough
{
K
},
DynamicMerge
<
2
>
{
make_multi_index
(
N
,
Ho
*
Wo
)}),
make_tuple
(
make_pass_through_transform
(
K
),
make_merge_transform
(
make_tuple
(
N
,
Ho
*
Wo
))),
make_tuple
(
Sequence
<
1
>
{},
Sequence
<
0
,
2
>
{}),
make_tuple
(
Sequence
<
1
>
{},
Sequence
<
0
,
2
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
...
@@ -1498,8 +1468,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
...
@@ -1498,8 +1468,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
throw
std
::
runtime_error
(
"wrong! GEMM size no divisible"
);
throw
std
::
runtime_error
(
"wrong! GEMM size no divisible"
);
}
}
constexpr
auto
GemmM1
=
GemmMPerThread
*
GemmMLevel0Cluster
*
GemmMLevel1Cluster
;
constexpr
auto
GemmM1
=
Number
<
GemmMPerThread
*
GemmMLevel0Cluster
*
GemmMLevel1Cluster
>
{}
;
constexpr
auto
GemmN1
=
GemmNPerThread
*
GemmNLevel0Cluster
*
GemmNLevel1Cluster
;
constexpr
auto
GemmN1
=
Number
<
GemmNPerThread
*
GemmNLevel0Cluster
*
GemmNLevel1Cluster
>
{}
;
const
auto
GemmM0
=
GemmM
/
GemmM1
;
const
auto
GemmM0
=
GemmM
/
GemmM1
;
const
auto
GemmN0
=
GemmN
/
GemmN1
;
const
auto
GemmN0
=
GemmN
/
GemmN1
;
...
@@ -1507,8 +1477,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
...
@@ -1507,8 +1477,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
const
auto
out_gemmm0_gemmm1_gemmn0_gemmn1_global_desc
=
const
auto
out_gemmm0_gemmm1_gemmn0_gemmn1_global_desc
=
transform_dynamic_tensor_descriptor
(
transform_dynamic_tensor_descriptor
(
out_gemmm_gemmn_global_desc
,
out_gemmm_gemmn_global_desc
,
make_tuple
(
DynamicUnMerge
<
2
>
{
make_multi_index
(
GemmM0
,
GemmM1
)
}
,
make_tuple
(
make_unmerge_transform
(
make_tuple
(
GemmM0
,
GemmM1
)
)
,
DynamicUnMerge
<
2
>
{
make_multi_index
(
GemmN0
,
GemmN1
)
}
),
make_unmerge_transform
(
make_tuple
(
GemmN0
,
GemmN1
)
)
),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
0
,
1
>
{},
Sequence
<
2
,
3
>
{}));
make_tuple
(
Sequence
<
0
,
1
>
{},
Sequence
<
2
,
3
>
{}));
...
...
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
View file @
4687ef88
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
namespace
ck
{
namespace
ck
{
template
<
typename
LowLength
=
index_t
>
template
<
typename
LowLength
>
struct
DynamicPassThrough
struct
DynamicPassThrough
{
{
using
LowerIndex
=
MultiIndex
<
1
>
;
using
LowerIndex
=
MultiIndex
<
1
>
;
...
@@ -84,10 +84,7 @@ struct DynamicPassThrough
...
@@ -84,10 +84,7 @@ struct DynamicPassThrough
}
}
};
};
template
<
bool
SkipIsValidCheck
=
false
,
template
<
typename
LowLength
,
typename
LeftPad
,
typename
RightPad
,
bool
SkipIsValidCheck
=
false
>
typename
LowLength
=
index_t
,
typename
LeftPad
=
index_t
,
typename
RightPad
=
index_t
>
struct
DynamicPad
struct
DynamicPad
{
{
using
LowerIndex
=
MultiIndex
<
1
>
;
using
LowerIndex
=
MultiIndex
<
1
>
;
...
@@ -175,7 +172,7 @@ struct DynamicPad
...
@@ -175,7 +172,7 @@ struct DynamicPad
}
}
};
};
template
<
bool
SkipIsValidCheck
=
false
,
typename
LowLength
=
index_t
,
typename
LeftPad
=
index_t
>
template
<
typename
LowLength
,
typename
LeftPad
,
bool
SkipIsValidCheck
=
false
>
struct
DynamicLeftPad
struct
DynamicLeftPad
{
{
using
LowerIndex
=
MultiIndex
<
1
>
;
using
LowerIndex
=
MultiIndex
<
1
>
;
...
@@ -257,7 +254,7 @@ struct DynamicLeftPad
...
@@ -257,7 +254,7 @@ struct DynamicLeftPad
}
}
};
};
template
<
bool
SkipIsValidCheck
=
false
,
typename
LowLength
=
index_t
,
typename
RightPad
=
index_t
>
template
<
typename
LowLength
,
typename
RightPad
,
bool
SkipIsValidCheck
=
false
>
struct
DynamicRightPad
struct
DynamicRightPad
{
{
using
LowerIndex
=
MultiIndex
<
1
>
;
using
LowerIndex
=
MultiIndex
<
1
>
;
...
@@ -349,13 +346,13 @@ struct DynamicRightPad
...
@@ -349,13 +346,13 @@ struct DynamicRightPad
// 2) Tuple of Number, which is known at compile-time, or
// 2) Tuple of Number, which is known at compile-time, or
// 3) Tuple of mixture of index_t and Number, which is known partially at run-time and partially
// 3) Tuple of mixture of index_t and Number, which is known partially at run-time and partially
// at compile-time
// at compile-time
template
<
index_t
NDimUp
,
template
<
typename
UpLengths
,
typename
UpLengths
=
MultiIndex
<
NDimUp
>,
typename
Coefficients
,
typename
Coefficients
=
MultiIndex
<
NDimUp
>
,
typename
std
::
enable_if
<
UpLengths
::
Size
()
==
Coefficients
::
Size
(),
bool
>
::
type
=
false
>
typename
std
::
enable_if
<
UpLengths
::
Size
()
==
NDimUp
&&
Coefficients
::
Size
()
==
NDimUp
,
bool
>::
type
=
false
>
struct
DynamicEmbed
struct
DynamicEmbed
{
{
static
constexpr
index_t
NDimUp
=
UpLengths
::
Size
();
using
LowerIndex
=
MultiIndex
<
1
>
;
using
LowerIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
NDimUp
>
;
using
UpperIndex
=
MultiIndex
<
NDimUp
>
;
...
@@ -439,9 +436,11 @@ struct DynamicEmbed
...
@@ -439,9 +436,11 @@ struct DynamicEmbed
}
}
};
};
template
<
index_t
NDimLow
,
typename
LowLengths
=
MultiIndex
<
NDimLow
>
>
template
<
typename
LowLengths
>
struct
DynamicMerge
struct
DynamicMerge
{
{
static
constexpr
index_t
NDimLow
=
LowLengths
::
Size
();
using
LowerIndex
=
MultiIndex
<
NDimLow
>
;
using
LowerIndex
=
MultiIndex
<
NDimLow
>
;
using
UpperIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
1
>
;
...
@@ -952,12 +951,11 @@ struct DynamicMerge
...
@@ -952,12 +951,11 @@ struct DynamicMerge
}
}
};
};
template
<
index_t
NDimUp
,
template
<
typename
UpLengths
,
bool
Use24BitIntegerCalculation
>
bool
Use24BitIntegerCalculation
=
false
,
typename
UpLengths
=
MultiIndex
<
NDimUp
>,
typename
std
::
enable_if
<
UpLengths
::
Size
()
==
NDimUp
,
bool
>::
type
=
false
>
struct
DynamicUnMerge
struct
DynamicUnMerge
{
{
static
constexpr
index_t
NDimUp
=
UpLengths
::
Size
();
using
LowerIndex
=
MultiIndex
<
1
>
;
using
LowerIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
NDimUp
>
;
using
UpperIndex
=
MultiIndex
<
NDimUp
>
;
...
@@ -1046,7 +1044,7 @@ struct DynamicUnMerge
...
@@ -1046,7 +1044,7 @@ struct DynamicUnMerge
}
}
};
};
template
<
typename
LowerIndex
=
index_t
>
template
<
typename
LowerIndex
>
struct
DynamicFreeze
struct
DynamicFreeze
{
{
LowerIndex
low_idx_
;
LowerIndex
low_idx_
;
...
...
composable_kernel/include/tensor_description/dynamic_multi_index_transform_helper.hpp
0 → 100644
View file @
4687ef88
#ifndef CK_DYNAMIC_MULTI_INDEX_TRANSFORM_HELPER_HPP
#define CK_DYNAMIC_MULTI_INDEX_TRANSFORM_HELPER_HPP
#include "common_header.hpp"
#include "dynamic_multi_index_transform.hpp"
namespace
ck
{
template
<
typename
LowLength
>
__host__
__device__
constexpr
auto
make_pass_through_transform
(
const
LowLength
&
low_length
)
{
return
DynamicPassThrough
<
LowLength
>
{
low_length
};
}
template
<
typename
LowLength
,
typename
LeftPad
,
typename
RightPad
,
bool
SkipIsValidCheck
=
false
>
__host__
__device__
constexpr
auto
make_pad_transform
(
const
LowLength
&
low_length
,
const
LeftPad
&
left_pad
,
const
RightPad
&
right_pad
,
integral_constant
<
bool
,
SkipIsValidCheck
>
=
integral_constant
<
bool
,
false
>
{})
{
return
DynamicPad
<
LowLength
,
LeftPad
,
RightPad
,
SkipIsValidCheck
>
{
low_length
,
left_pad
,
right_pad
};
}
template
<
typename
LowLength
,
typename
LeftPad
,
bool
SkipIsValidCheck
=
false
>
__host__
__device__
constexpr
auto
make_left_pad_transform
(
const
LowLength
&
low_length
,
const
LeftPad
&
left_pad
,
integral_constant
<
bool
,
SkipIsValidCheck
>
=
integral_constant
<
bool
,
false
>
{})
{
return
DynamicLeftPad
<
LowLength
,
LeftPad
,
SkipIsValidCheck
>
{
low_length
,
left_pad
};
}
template
<
typename
LowLength
,
typename
RightPad
,
bool
SkipIsValidCheck
>
__host__
__device__
constexpr
auto
make_right_pad_transform
(
const
LowLength
&
low_length
,
const
RightPad
&
right_pad
,
integral_constant
<
bool
,
SkipIsValidCheck
>
=
integral_constant
<
bool
,
false
>
{})
{
return
DynamicRightPad
<
LowLength
,
RightPad
,
SkipIsValidCheck
>
{
low_length
,
right_pad
};
}
template
<
typename
UpLengths
,
typename
Coefficients
,
typename
std
::
enable_if
<
UpLengths
::
Size
()
==
Coefficients
::
Size
(),
bool
>
::
type
=
false
>
__host__
__device__
constexpr
auto
make_embed_transform
(
const
UpLengths
&
up_lengths
,
const
Coefficients
&
coefficients
)
{
return
DynamicEmbed
<
UpLengths
,
Coefficients
>
{
up_lengths
,
coefficients
};
}
template
<
typename
LowLengths
>
__host__
__device__
constexpr
auto
make_merge_transform
(
const
LowLengths
&
low_lengths
)
{
return
DynamicMerge
<
LowLengths
>
{
low_lengths
};
}
template
<
typename
UpLengths
,
bool
Use24BitIntegerCalculation
=
false
>
__host__
__device__
constexpr
auto
make_unmerge_transform
(
const
UpLengths
&
up_lengths
,
integral_constant
<
bool
,
Use24BitIntegerCalculation
>
=
integral_constant
<
bool
,
false
>
{})
{
return
DynamicUnMerge
<
UpLengths
,
Use24BitIntegerCalculation
>
{
up_lengths
};
}
template
<
typename
LowerIndex
>
__host__
__device__
constexpr
auto
make_freeze_transform
(
const
LowerIndex
&
low_idx
)
{
return
DynamicFreeze
<
LowerIndex
>
{
low_idx
};
}
}
// namespace ck
#endif
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper.hpp
View file @
4687ef88
...
@@ -3,6 +3,7 @@
...
@@ -3,6 +3,7 @@
#include "common_header.hpp"
#include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp"
#include "dynamic_tensor_descriptor.hpp"
#include "dynamic_multi_index_transform_helper.hpp"
namespace
ck
{
namespace
ck
{
...
@@ -23,8 +24,7 @@ make_dynamic_naive_tensor_descriptor_v2(const Tuple<Lengths...>& lengths,
...
@@ -23,8 +24,7 @@ make_dynamic_naive_tensor_descriptor_v2(const Tuple<Lengths...>& lengths,
{
{
constexpr
index_t
N
=
sizeof
...(
Lengths
);
constexpr
index_t
N
=
sizeof
...(
Lengths
);
const
auto
transforms
=
const
auto
transforms
=
make_tuple
(
make_embed_transform
(
lengths
,
strides
));
make_tuple
(
DynamicEmbed
<
N
,
Tuple
<
Lengths
...
>
,
Tuple
<
Strides
...
>>
{
lengths
,
strides
});
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
...
@@ -66,7 +66,7 @@ make_dynamic_naive_tensor_descriptor_packed_v2(const Tuple<Lengths...>& lengths)
...
@@ -66,7 +66,7 @@ make_dynamic_naive_tensor_descriptor_packed_v2(const Tuple<Lengths...>& lengths)
{
{
constexpr
index_t
N
=
sizeof
...(
Lengths
);
constexpr
index_t
N
=
sizeof
...(
Lengths
);
const
auto
transforms
=
make_tuple
(
DynamicUnMerge
<
N
,
false
,
Tuple
<
Lengths
...
>>
{
lengths
}
);
const
auto
transforms
=
make_tuple
(
make_unmerge_transform
(
lengths
)
);
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
...
...
driver/include/device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
View file @
4687ef88
...
@@ -49,7 +49,8 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc
...
@@ -49,7 +49,8 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc
wei_kcyx_device_buf
.
ToDevice
(
wei_kcyx
.
mData
.
data
());
wei_kcyx_device_buf
.
ToDevice
(
wei_kcyx
.
mData
.
data
());
out_nkhw_device_buf
.
ToDevice
(
out_nkhw
.
mData
.
data
());
out_nkhw_device_buf
.
ToDevice
(
out_nkhw
.
mData
.
data
());
#if 1
#if 0
// run-time variables
const auto in_n_c_hi_wi_desc =
const auto in_n_c_hi_wi_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(to_multi_index(InDesc::GetLengths()));
make_dynamic_naive_tensor_descriptor_packed_v2(to_multi_index(InDesc::GetLengths()));
const auto wei_k_c_y_x_desc =
const auto wei_k_c_y_x_desc =
...
@@ -62,6 +63,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc
...
@@ -62,6 +63,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc
const auto in_left_pads = to_multi_index(InLeftPads{});
const auto in_left_pads = to_multi_index(InLeftPads{});
const auto in_right_pads = to_multi_index(InRightPads{});
const auto in_right_pads = to_multi_index(InRightPads{});
#else
#else
// compile-time variables
const
auto
in_n_c_hi_wi_desc
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
const
auto
in_n_c_hi_wi_desc
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
sequence_to_tuple_of_number
(
InDesc
::
GetLengths
()));
sequence_to_tuple_of_number
(
InDesc
::
GetLengths
()));
const
auto
wei_k_c_y_x_desc
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
const
auto
wei_k_c_y_x_desc
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
...
@@ -234,7 +236,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc
...
@@ -234,7 +236,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc
constexpr
auto
conv_driver
=
constexpr
auto
conv_driver
=
#if 1
#if 1
DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_pad
DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_pad
#elif
1
#elif
0
DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad
DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad
#elif 1
#elif 1
DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment