Commit 553cfa68 authored by aska-0096's avatar aska-0096
Browse files

temp save

parent a8687138
...@@ -35,6 +35,129 @@ using ResidualLayout = typename LayoutSettingSelector<NDimSpatial>::ResidualLayo ...@@ -35,6 +35,129 @@ using ResidualLayout = typename LayoutSettingSelector<NDimSpatial>::ResidualLayo
// clang-format off // clang-format off
template <ck::index_t NDimSpatial> template <ck::index_t NDimSpatial>
using DeviceConvFwdInstances = std::tuple< using DeviceConvFwdInstances = std::tuple<
// Instances provide to AIT Fp16
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle<
NDimSpatial,
InputLayout<NDimSpatial>, WeightLayout<NDimSpatial>, ck::Tuple<BiasLayout<NDimSpatial>, ResidualLayout<NDimSpatial>>, OutputLayout<NDimSpatial>,
InKernelDataType, WeiKernelDataType, ck::Tuple<BiasKernelDataType, ResidualKernelDataType>, OutKernelDataType, AccDataType, CShuffleDataType,
InElementOp, WeiElementOp, OutElementOp, ConvSpec, GemmSpec, 256, // BlockSize
512, 16, 4, 8, // MPerBlock x NPerBlock x K0PerBlock x K1
16, 16, // MPerWMMA x NPerWMMA
4, 1, // MRepeat x NRepeat
S<1, 256, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
S<4, 16, 4>, S<1, 0, 2>, S<1, 0, 2>, 2, 2, 2, true,
2, 1, S<1, 256, 1, 1>, 8>,
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle<
NDimSpatial,
InputLayout<NDimSpatial>, WeightLayout<NDimSpatial>, ck::Tuple<BiasLayout<NDimSpatial>, ResidualLayout<NDimSpatial>>, OutputLayout<NDimSpatial>,
InKernelDataType, WeiKernelDataType, ck::Tuple<BiasKernelDataType, ResidualKernelDataType>, OutKernelDataType, AccDataType, CShuffleDataType,
InElementOp, WeiElementOp, OutElementOp, ConvSpec, GemmSpec, 256, // BlockSize
256, 64, 8, 8, // MPerBlock x NPerBlock x K0PerBlock x K1
16, 16, // MPerWMMA x NPerWMMA
2, 4, // MRepeat x NRepeat
S<1, 256, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
2, 2, S<1, 256, 1, 1>, 8>,
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle<
NDimSpatial,
InputLayout<NDimSpatial>, WeightLayout<NDimSpatial>, ck::Tuple<BiasLayout<NDimSpatial>, ResidualLayout<NDimSpatial>>, OutputLayout<NDimSpatial>,
InKernelDataType, WeiKernelDataType, ck::Tuple<BiasKernelDataType, ResidualKernelDataType>, OutKernelDataType, AccDataType, CShuffleDataType,
InElementOp, WeiElementOp, OutElementOp, ConvSpec, GemmSpec, 256, // BlockSize
256, 64, 4, 8, // MPerBlock x NPerBlock x K0PerBlock x K1
16, 16, // MPerWMMA x NPerWMMA
2, 4, // MRepeat x NRepeat
S<1, 256, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
2, 2, S<1, 256, 1, 1>, 8>,
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle<
NDimSpatial,
InputLayout<NDimSpatial>, WeightLayout<NDimSpatial>, ck::Tuple<BiasLayout<NDimSpatial>, ResidualLayout<NDimSpatial>>, OutputLayout<NDimSpatial>,
InKernelDataType, WeiKernelDataType, ck::Tuple<BiasKernelDataType, ResidualKernelDataType>, OutKernelDataType, AccDataType, CShuffleDataType,
InElementOp, WeiElementOp, OutElementOp, ConvSpec, GemmSpec, 256, // BlockSize
256, 128, 4, 8, // MPerBlock x NPerBlock x K0PerBlock x K1
16, 16, // MPerWMMA x NPerWMMA
4, 4, // MRepeat x NRepeat
S<1, 256, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
4, 2, S<1, 256, 1, 1>, 8>,
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle<
NDimSpatial,
InputLayout<NDimSpatial>, WeightLayout<NDimSpatial>, ck::Tuple<BiasLayout<NDimSpatial>, ResidualLayout<NDimSpatial>>, OutputLayout<NDimSpatial>,
InKernelDataType, WeiKernelDataType, ck::Tuple<BiasKernelDataType, ResidualKernelDataType>, OutKernelDataType, AccDataType, CShuffleDataType,
InElementOp, WeiElementOp, OutElementOp, ConvSpec, GemmSpec, 256, // BlockSize
256, 128, 8, 8, // MPerBlock x NPerBlock x K0PerBlock x K1
16, 16, // MPerWMMA x NPerWMMA
4, 4, // MRepeat x NRepeat
S<1, 256, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
4, 2, S<1, 256, 1, 1>, 8>,
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle<
NDimSpatial,
InputLayout<NDimSpatial>, WeightLayout<NDimSpatial>, ck::Tuple<BiasLayout<NDimSpatial>, ResidualLayout<NDimSpatial>>, OutputLayout<NDimSpatial>,
InKernelDataType, WeiKernelDataType, ck::Tuple<BiasKernelDataType, ResidualKernelDataType>, OutKernelDataType, AccDataType, CShuffleDataType,
InElementOp, WeiElementOp, OutElementOp, ConvSpec, GemmSpec, 256, // BlockSize
128, 256, 4, 8, // MPerBlock x NPerBlock x K0PerBlock x K1
16, 16, // MPerWMMA x NPerWMMA
4, 4, // MRepeat x NRepeat
S<2, 128, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
4, 2, S<1, 128, 1, 2>, 8>,
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle<
NDimSpatial,
InputLayout<NDimSpatial>, WeightLayout<NDimSpatial>, ck::Tuple<BiasLayout<NDimSpatial>, ResidualLayout<NDimSpatial>>, OutputLayout<NDimSpatial>,
InKernelDataType, WeiKernelDataType, ck::Tuple<BiasKernelDataType, ResidualKernelDataType>, OutKernelDataType, AccDataType, CShuffleDataType,
InElementOp, WeiElementOp, OutElementOp, ConvSpec, GemmSpec, 256, // BlockSize
128, 256, 8, 8, // MPerBlock x NPerBlock x K0PerBlock x K1
16, 16, // MPerWMMA x NPerWMMA
4, 4, // MRepeat x NRepeat
S<2, 128, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
4, 2, S<1, 128, 1, 2>, 8>,
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle<
NDimSpatial,
InputLayout<NDimSpatial>, WeightLayout<NDimSpatial>, ck::Tuple<BiasLayout<NDimSpatial>, ResidualLayout<NDimSpatial>>, OutputLayout<NDimSpatial>,
InKernelDataType, WeiKernelDataType, ck::Tuple<BiasKernelDataType, ResidualKernelDataType>, OutKernelDataType, AccDataType, CShuffleDataType,
InElementOp, WeiElementOp, OutElementOp, ConvSpec, GemmSpec, 192, // BlockSize
192, 48, 6, 8, // MPerBlock x NPerBlock x K0PerBlock x K1
16, 16, // MPerWMMA x NPerWMMA
6, 1, // MRepeat x NRepeat
S<1, 192, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
S<2, 48, 2>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 4, true,
3, 1, S<1, 96, 1, 2>, 8>,
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle<
NDimSpatial,
InputLayout<NDimSpatial>, WeightLayout<NDimSpatial>, ck::Tuple<BiasLayout<NDimSpatial>, ResidualLayout<NDimSpatial>>, OutputLayout<NDimSpatial>,
InKernelDataType, WeiKernelDataType, ck::Tuple<BiasKernelDataType, ResidualKernelDataType>, OutKernelDataType, AccDataType, CShuffleDataType,
InElementOp, WeiElementOp, OutElementOp, ConvSpec, GemmSpec, 128, // BlockSize
128, 64, 8, 8, // MPerBlock x NPerBlock x K0PerBlock x K1
16, 16, // MPerWMMA x NPerWMMA
2, 4, // MRepeat x NRepeat
S<1, 128, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
S<2, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
2, 2, S<1, 128, 1, 1>, 8>,
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle<
NDimSpatial,
InputLayout<NDimSpatial>, WeightLayout<NDimSpatial>, ck::Tuple<BiasLayout<NDimSpatial>, ResidualLayout<NDimSpatial>>, OutputLayout<NDimSpatial>,
InKernelDataType, WeiKernelDataType, ck::Tuple<BiasKernelDataType, ResidualKernelDataType>, OutKernelDataType, AccDataType, CShuffleDataType,
InElementOp, WeiElementOp, OutElementOp, ConvSpec, GemmSpec, 96, // BlockSize
96, 48, 8, 8, // MPerBlock x NPerBlock x K0PerBlock x K1
16, 16, // MPerWMMA x NPerWMMA
6, 1, // MRepeat x NRepeat
S<1, 96, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
S<2, 48, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
3, 1, S<1, 48, 1, 2>, 8>,
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle<
NDimSpatial,
InputLayout<NDimSpatial>, WeightLayout<NDimSpatial>, ck::Tuple<BiasLayout<NDimSpatial>, ResidualLayout<NDimSpatial>>, OutputLayout<NDimSpatial>,
InKernelDataType, WeiKernelDataType, ck::Tuple<BiasKernelDataType, ResidualKernelDataType>, OutKernelDataType, AccDataType, CShuffleDataType,
InElementOp, WeiElementOp, OutElementOp, ConvSpec, GemmSpec, 64, // BlockSize
64, 64, 8, 8, // MPerBlock x NPerBlock x K0PerBlock x K1
16, 16, // MPerWMMA x NPerWMMA
4, 2, // MRepeat x NRepeat
S<1, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
S<1, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
4, 1, S<1, 64, 1, 1>, 8>
#if 0
// GEMM_N = 16 // GEMM_N = 16
// K0 = 8 // K0 = 8
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle< ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle<
...@@ -53,12 +176,12 @@ using DeviceConvFwdInstances = std::tuple< ...@@ -53,12 +176,12 @@ using DeviceConvFwdInstances = std::tuple<
InputLayout<NDimSpatial>, WeightLayout<NDimSpatial>, ck::Tuple<BiasLayout<NDimSpatial>, ResidualLayout<NDimSpatial>>, OutputLayout<NDimSpatial>, InputLayout<NDimSpatial>, WeightLayout<NDimSpatial>, ck::Tuple<BiasLayout<NDimSpatial>, ResidualLayout<NDimSpatial>>, OutputLayout<NDimSpatial>,
InKernelDataType, WeiKernelDataType, ck::Tuple<BiasKernelDataType, ResidualKernelDataType>, OutKernelDataType, AccDataType, CShuffleDataType, InKernelDataType, WeiKernelDataType, ck::Tuple<BiasKernelDataType, ResidualKernelDataType>, OutKernelDataType, AccDataType, CShuffleDataType,
InElementOp, WeiElementOp, OutElementOp, ConvSpec, GemmSpec, 256, // BlockSize InElementOp, WeiElementOp, OutElementOp, ConvSpec, GemmSpec, 256, // BlockSize
1024, 16, 3, 16, // MPerBlock x NPerBlock x K0PerBlock x K1 1024, 16, 2, 8, // MPerBlock x NPerBlock x K0PerBlock x K1
16, 16, // MPerWMMA x NPerWMMA 16, 16, // MPerWMMA x NPerWMMA
8, 1, // MRepeat x NRepeat 8, 1, // MRepeat x NRepeat
S<1, 256, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, true, // ABlockTransferThreadClusterLengths_AK0_M_AK1, ArrangeOrder, SrcAccessOrder, VectorDim, SrcScalarPerVector, DstScalarPerVector_AK1, LdsExtraM S<1, 256, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, // ABlockTransferThreadClusterLengths_AK0_M_AK1, ArrangeOrder, SrcAccessOrder, VectorDim, SrcScalarPerVector, DstScalarPerVector_AK1, LdsExtraM
S<1, 16, 8>, S<1, 0, 2>, S<1, 0, 2>, 2, 2, 2, true, S<2, 16, 4>, S<1, 0, 2>, S<1, 0, 2>, 2, 2, 2, true,
8, 1, S<1, 256, 1, 1>, 16>, 4, 1, S<1, 256, 1, 1>, 8>,
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle< ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle<
NDimSpatial, NDimSpatial,
InputLayout<NDimSpatial>, WeightLayout<NDimSpatial>, ck::Tuple<BiasLayout<NDimSpatial>, ResidualLayout<NDimSpatial>>, OutputLayout<NDimSpatial>, InputLayout<NDimSpatial>, WeightLayout<NDimSpatial>, ck::Tuple<BiasLayout<NDimSpatial>, ResidualLayout<NDimSpatial>>, OutputLayout<NDimSpatial>,
...@@ -103,7 +226,6 @@ using DeviceConvFwdInstances = std::tuple< ...@@ -103,7 +226,6 @@ using DeviceConvFwdInstances = std::tuple<
S<1, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, true, // ABlockTransferThreadClusterLengths_AK0_M_AK1, ArrangeOrder, SrcAccessOrder, VectorDim, SrcScalarPerVector, DstScalarPerVector_AK1, LdsExtraM S<1, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, true, // ABlockTransferThreadClusterLengths_AK0_M_AK1, ArrangeOrder, SrcAccessOrder, VectorDim, SrcScalarPerVector, DstScalarPerVector_AK1, LdsExtraM
S<1, 16, 4>, S<1, 0, 2>, S<1, 0, 2>, 2, 4, 4, true, S<1, 16, 4>, S<1, 0, 2>, S<1, 0, 2>, 2, 4, 4, true,
2, 1, S<1, 64, 1, 1>, 16> 2, 1, S<1, 64, 1, 1>, 16>
/*
// K0 = 2 // K0 = 2
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle< ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Wmma_CShuffle<
NDimSpatial, NDimSpatial,
...@@ -1509,7 +1631,7 @@ using DeviceConvFwdInstances = std::tuple< ...@@ -1509,7 +1631,7 @@ using DeviceConvFwdInstances = std::tuple<
S<1, 96, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, true, // ABlockTransferThreadClusterLengths_AK0_M_AK1, ArrangeOrder, SrcAccessOrder, VectorDim, SrcScalarPerVector, DstScalarPerVector_AK1, LdsExtraM S<1, 96, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, true, // ABlockTransferThreadClusterLengths_AK0_M_AK1, ArrangeOrder, SrcAccessOrder, VectorDim, SrcScalarPerVector, DstScalarPerVector_AK1, LdsExtraM
S<1, 48, 2>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<1, 48, 2>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true,
6, 1, S<1, 32, 1, 3>, 16> 6, 1, S<1, 32, 1, 3>, 16>
*/ #endif
>; >;
// clang-format on // clang-format on
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment