add comparsion config

d443a7a6 · qinletao · eff586ac · d443a7a6
Commit d443a7a6 authored Apr 21, 2022 by qinletao
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 8 deletions

example/01_gemm/gemm_xdl_fp64.cpp example/01_gemm/gemm_xdl_fp64.cpp +17 -8

No files found.
--- a/example/01_gemm/gemm_xdl_fp64.cpp
+++ b/example/01_gemm/gemm_xdl_fp64.cpp
@@ -23,16 +23,13 @@ using S = ck::Sequence<Is...>;
 using F64 = double;
 using F32 = float;
+using F16 = ck::half_t;
 using Row = ck::tensor_layout::gemm::RowMajor;
 using Col = ck::tensor_layout::gemm::ColumnMajor;
 using PassThrough = ck::tensor_operation::element_wise::PassThrough;
-using ADataType   = double;
-using BDataType   = double;
-using CDataType   = double;
-using AccDataType = double;
 using ALayout = ck::tensor_layout::gemm::RowMajor;
 using BLayout = ck::tensor_layout::gemm::ColumnMajor;
@@ -50,7 +47,19 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmXdl
 //##########|  Type|  Type|  Type|    Type|        |        |        | Elementwise| Elementwise| Elementwise|Spacialization|  Size| Block| Block| Block|   |  XDL|  XDL|  Per|  Per|   ThreadCluster|  ThreadCluster| SrcAccessOrder|   SrcVectorDim|      SrcScalar|      DstScalar| AddExtraM|   ThreadCluster|  ThreadCluster| SrcAccessOrder|  SrcVectorDim|      SrcScalar|      DstScalar| AddExtraN| SrcDstVectorDim|       DstScalar|
 //##########|      |      |      |        |        |        |        |   Operation|   Operation|   Operation|              |      |      |      |      |   |     |     | Wave| Wave| Lengths_K0_M_K1|   ArrangeOrder|               |               |      PerVector|   PerVector_K1|          | Lengths_K0_N_K1|   ArrangeOrder|               |              |      PerVector|   PerVector_K1|          |                |       PerVector|
 //##########|      |      |      |        |        |        |        |            |            |            |              |      |      |      |      |   |     |     |     |     |                |               |               |               |               |               |          |                |               |               |              |               |               |          |                |                |
-             <  F64,   F64,   F64,     F64,     Row,     Col,     Row, PassThrough, PassThrough, PassThrough,   GemmDefault,   256,   128,   128,     4,  2,   16,   16,    4,    4,     S<4, 64, 1>,     S<1, 0, 2>,     S<1, 0, 2>,              2,              1,              1,      true,     S<4, 64, 1>,     S<1, 0, 2>,     S<1, 0, 2>,             2,              1,              1,      true,               7,               1>;
+#if 1
+             <  F64,   F64,   F64,     F64,     Row,     Col,     Row, PassThrough, PassThrough, PassThrough,   GemmDefault,   256,   128,   128,     4,  2,   16,   16,    4,    4,     S<4, 64, 1>,     S<1, 0, 2>,     S<1, 0, 2>,              2,              2,              2,      true,     S<4, 64, 1>,     S<1, 0, 2>,     S<1, 0, 2>,             2,              2,              2,      true,               7,               1>;
+using ADataType   = double;
+using BDataType   = double;
+using CDataType   = double;
+using AccDataType = double;
+#else
+            <   F32,   F32,   F32,     F32,     Row,     Col,     Row, PassThrough, PassThrough, PassThrough,   GemmDefault,   256,   128,   128,     4,  4,   16,   16,    4,    4,     S<4, 64, 1>,     S<1, 0, 2>,     S<1, 0, 2>,              2,              4,              4,      true,     S<4, 64, 1>,     S<1, 0, 2>,     S<1, 0, 2>,             2,              4,              4,      true,               7,               1>;
+using ADataType   = float;
+using BDataType   = float;
+using CDataType   = float;
+using AccDataType = float;
+#endif
 // clang-format on
 using ReferenceGemmInstance = ck::tensor_operation::host::
@@ -135,8 +144,8 @@ int main(int argc, char* argv[])
        b_k_n.GenerateTensorValue(GeneratorTensor_3<BDataType>{-0.5, 0.5});
        break;
    default:
-        //a_m_k.GenerateTensorValue(GeneratorTensor_2<ADataType>{-5, 5});
+        //a_m_k.GenerateTensorValue(GeneratorTensor_2<ADataType>{-5, 5}); 
-        b_k_n.GenerateTensorValue(GeneratorTensor_2<BDataType>{-5, 5});
+        b_k_n.GenerateTensorValue(GeneratorTensor_3<BDataType>{-0.5, 0.5});
        a_m_k.GenerateTensorValue(GeneratorTensor_1<ADataType>{1});
        //b_k_n.GenerateTensorValue(GeneratorTensor_1<BDataType>{1});
    }
@@ -205,7 +214,7 @@ int main(int argc, char* argv[])
            LogRangeAsType<double>(std::cout << "a : ", a_m_k.mData, ",") << std::endl;
            LogRangeAsType<double>(std::cout << "b: ", b_k_n.mData, ",") << std::endl;
            LogRangeAsType<double>(std::cout << "c_device: ", c_m_n_device_result.mData, ",") << std::endl;
-            LogRangeAsType<double>(std::cout << "c_host: ", c_m_n_host_result.mData, ",")
+            LogRangeAsType<double>(std::cout << "c_host  : ", c_m_n_host_result.mData, ",")
                << std::endl;
        }
 #endif