add smallest config

64c58894 · ltqin · 21885e9c · 64c58894
Commit 64c58894 authored May 16, 2022 by ltqin
Show whitespace changes
Inline Side-by-side

Showing with 15 additions and 2 deletions

example/01_gemm/gemm_xdl_skip_lds_fp16.cpp example/01_gemm/gemm_xdl_skip_lds_fp16.cpp +15 -2

No files found.
--- a/example/01_gemm/gemm_xdl_skip_lds_fp16.cpp
+++ b/example/01_gemm/gemm_xdl_skip_lds_fp16.cpp
@@ -43,15 +43,18 @@ using BElementOp = ck::tensor_operation::element_wise::PassThrough;
 using CElementOp = ck::tensor_operation::element_wise::PassThrough;

 static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
-
+#define NORMAL_CONFIG 0
 // clang-format off
 using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmXdlSkipLds
        //###########| AData| BData| CData| AccData| ALayout| BLayout| CLayout|           A|           B|           C|          GEMM| Block|  MPer|  NPer| K0Per| K1| MPer| NPer| MXdl| NXdl|  ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds|  BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer|
        //###########|  Type|  Type|  Type|    Type|        |        |        | Elementwise| Elementwise| Elementwise|Spacialization|  Size| Block| Block| Block|   |  XDL|  XDL|  Per|  Per|   ThreadCluster|  ThreadCluster| SrcAccessOrder|   SrcVectorDim|      SrcScalar|      DstScalar| AddExtraM|   ThreadCluster|  ThreadCluster| SrcAccessOrder|  SrcVectorDim|      SrcScalar|      DstScalar| AddExtraN| SrcDstVectorDim|       DstScalar|
        //###########|      |      |      |        |        |        |        |   Operation|   Operation|   Operation|              |      |      |      |      |   |     |     | Wave| Wave| Lengths_K0_M_K1|   ArrangeOrder|               |               |      PerVector|   PerVector_K1|          | Lengths_K0_N_K1|   ArrangeOrder|               |              |      PerVector|   PerVector_K1|          |                |       PerVector|
        //###########|      |      |      |        |        |        |        |            |            |            |              |      |      |      |      |   |     |     |     |     |                |               |               |               |               |               |          |                |               |               |              |               |               |          |                |                |
-        
+#if NORMAL_CONFIG       
                    <   F16,   F16,   F16,     F32,     Row,     Col,     Row, PassThrough, PassThrough, PassThrough,   GemmDefault,   128,    32,   64,     4,  8,   32,   32,    1,    1,     S<4, 32, 1>,     S<1, 0, 2>,     S<1, 0, 2>,              2,              8,              8,      true,     S<4, 32, 1>,     S<1, 0, 2>,     S<1, 0, 2>,             2,              8,              8,      true,               7,               1>;
+#else
+                    <   F16,   F16,   F16,     F32,     Row,     Col,     Row, PassThrough, PassThrough, PassThrough,   GemmDefault,   64,    16,   16,     4,    4,   16,   16,    1,    1,     S<4, 16, 1>,     S<1, 0, 2>,     S<1, 0, 2>,              2,              1,              1,      true,     S<4, 16, 1>,     S<1, 0, 2>,     S<1, 0, 2>,             2,              1,              1,      true,               7,               1>;
+#endif
 // clang-format on

 using ReferenceGemmInstance = ck::tensor_operation::host::
@@ -80,6 +83,7 @@ int main(int argc, char* argv[])
    int nrepeat          = 5;

    // GEMM shape
+#if NORMAL_CONFIG
    ck::index_t M = 64;
    ck::index_t N = 128;
    ck::index_t K = 64;
@@ -87,6 +91,15 @@ int main(int argc, char* argv[])
    ck::index_t StrideA = 64;
    ck::index_t StrideB = 64;
    ck::index_t StrideC = 128;
+#else
+    ck::index_t M = 16;
+    ck::index_t N = 16;
+    ck::index_t K = 16;
+
+    ck::index_t StrideA = 16;
+    ck::index_t StrideB = 16;
+    ck::index_t StrideC = 16;
+#endif

    if(argc == 4)
    {