Introduce two new tensor generators

1fb3bb8d · Andriy Roshchenko · 3dea7cc8 · 1fb3bb8d · 1fb3bb8d · 1fb3bb8d
Commit 1fb3bb8d authored Nov 07, 2024 by Andriy Roshchenko
3 changed files
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -11,7 +11,7 @@
            "environment": {
                "MY_ENVIRONMENT_VARIABLE": "NONE",
                "PATH": "/usr/local/.cargo/bin:$penv{PATH}",
-                "SCCACHE_IDLE_TIMEOUT": "7200"
+                "SCCACHE_IDLE_TIMEOUT": "11000"
            },
            "cacheVariables": {
                "CMAKE_BUILD_TYPE": "Debug",

--- a/example/01_gemm/run_gemm_example.inc
+++ b/example/01_gemm/run_gemm_example.inc
@@ -166,6 +166,14 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
        ck::utils::FillUniformDistributionIntegerValue<ADataType>{-2.f, 2.f}(a_m_k);
        ck::utils::FillUniformDistributionIntegerValue<BDataType>{-2.f, 2.f}(b_k_n);
        break;
+    case 6:
+        a_m_k.GenerateTensorValue(GeneratorTensor_PI<ADataType>{});
+        b_k_n.GenerateTensorValue(GeneratorTensor_1<BDataType>{1});
+        break;
+    case 7:
+        a_m_k.GenerateTensorValue(GeneratorTensor_PI_A<ADataType>{});
+        b_k_n.GenerateTensorValue(GeneratorTensor_PI_B<BDataType>{});
+        break;
    default:
        ck::utils::FillUniformDistribution<ADataType>{-0.1f, 0.1f}(a_m_k);
        ck::utils::FillUniformDistribution<BDataType>{-0.1f, 0.1f}(b_k_n);
@@ -368,6 +376,32 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
 #endif
        if(pass)
            std::cout << "Verification on CPU: PASS" << std::endl;
+        if(config.init_method == 6 || config.init_method == 7)
+        {
+            std::cout << std::fixed << std::setprecision(16);
+            // AccDataType a = ck::type_convert<AccDataType>(a_m_k(0, 10));
+            // AccDataType b = ck::type_convert<AccDataType>(b_k_n(0, 10));
+            // std::cout << "a(0,10): " << a << std::endl;
+            // std::cout << "b(0,10): " << b << std::endl;
+            // std::cout << "a: " << ck::type_convert<AccDataType>(a_m_k(0, 0)) << std::endl;
+            // std::cout << "a: " << ck::type_convert<AccDataType>(a_m_k(0, 1)) << std::endl;
+            // std::cout << "a: " << ck::type_convert<AccDataType>(a_m_k(0, 2)) << std::endl;
+            // std::cout << "b: " << ck::type_convert<AccDataType>(b_k_n(0, 0)) << std::endl;
+            // std::cout << "b: " << ck::type_convert<AccDataType>(b_k_n(1, 0)) << std::endl;
+            // std::cout << "b: " << ck::type_convert<AccDataType>(b_k_n(2, 0)) << std::endl;
+            AccDataType d = ck::type_convert<AccDataType>(c_m_n_device_result(0, 10));
+            AccDataType h = ck::type_convert<AccDataType>(c_m_n_host_result(10, 0));
+            std::cout << "device result: " << d << std::endl;
+            std::cout << "host result: " << h << std::endl;
+            std::cout << "expected result: " << M_PI << std::endl;
+            std::cout << "device - host: " << std::abs(d - h) << std::endl;
+            std::cout << "device - expected: " << std::abs(d - M_PI) << std::endl;
+            std::cout << "atol: " << get_atol<CDataType>() << std::endl;
+            std::cout << std::endl << std::endl;
+        }
    }
    if((config.do_verification == 2) || (config.do_verification == 3))

--- a/library/include/ck/library/utility/host_tensor_generator.hpp
+++ b/library/include/ck/library/utility/host_tensor_generator.hpp
@@ -304,3 +304,107 @@ struct GeneratorTensor_Diagonal
        return pred ? value : T{0};
    }
 };
+/**
+ * @brief Used to generate tensor entries from coefficients of Leibniz formula for Pi.
+ *
+ * @tparam T The type of the tensor values.
+ *
+ * Usage: For verification of GEMM
+ *    a_m_k.GenerateTensorValue(GeneratorTensor_PI<ADataType>{});
+ *    b_k_n.GenerateTensorValue(GeneratorTensor_1<BDataType>{1});
+ *
+ *    c = a * b;
+ *
+ *    We expect that |c[i][j]-M_PI| <= truncation_error(K)
+ */
+template <typename T>
+struct GeneratorTensor_PI
+{
+    template <typename... Ts>
+    T operator()(Ts... Xs) const
+    {
+        static constexpr double pi = 3.14159265358979323846;
+        std::array<ck::index_t, sizeof...(Ts)> dims = {{static_cast<ck::index_t>(Xs)...}};
+        if constexpr(dims.size() > 0)
+        {
+            constexpr auto last_dim = dims.size() - 1;
+            size_t i                = dims[last_dim];
+            float fi                = i;
+            float tmp               = (i % 2 == 0) ? 4.0 : -4.0;
+            tmp /= (2.0 * fi + 1.0);
+            return ck::type_convert<T>(tmp);
+        }
+        else
+        {
+            return ck::type_convert<T>(pi);
+        }
+    }
+    static double truncation_error(size_t N) { return 4.0 / (2.0 * N + 1.0); }
+};
+/**
+ * @brief Used to generate tensor entries from coefficients of non-alternating version of Leibniz
+ * formula for Pi.
+ *
+ * @tparam T The type of the tensor values.
+ *
+ * Usage: For verification of GEMM
+ *    a_m_k.GenerateTensorValue(GeneratorTensor_PI_A<ADataType>{});
+ *    b_k_n.GenerateTensorValue(GeneratorTensor_PI_B<BDataType>{});
+ *
+ *    c = a * b;
+ *
+ *    We expect that |c[i][j]-M_PI| <= 0.00013 for K >= 4096 and a,b,c are float.
+ */
+template <typename T>
+struct GeneratorTensor_PI_A
+{
+    static constexpr double pi = 3.14159265358979323846;
+    template <typename... Ts>
+    T operator()(Ts... Xs) const
+    {
+        std::array<ck::index_t, sizeof...(Ts)> dims = {{static_cast<ck::index_t>(Xs)...}};
+        if constexpr(dims.size() > 0)
+        {
+            constexpr auto last_dim = dims.size() - 1;
+            size_t i                = dims[last_dim];
+            float fi                = i;
+            float tmp               = 2.0 / (4.0 * fi + 1.0);
+            return ck::type_convert<T>(tmp);
+        }
+        else
+        {
+            return ck::type_convert<T>(pi / 2.0);
+        }
+    }
+};
+template <typename T>
+struct GeneratorTensor_PI_B
+{
+    static constexpr double pi = 3.14159265358979323846;
+    template <typename... Ts>
+    T operator()(Ts... Xs) const
+    {
+        std::array<ck::index_t, sizeof...(Ts)> dims = {{static_cast<ck::index_t>(Xs)...}};
+        if constexpr(dims.size() > 0)
+        {
+            size_t i  = dims[0];
+            float fi  = i;
+            float tmp = 4.0 / (4.0 * fi + 3.0);
+            return ck::type_convert<T>(tmp);
+        }
+        else
+        {
+            return ck::type_convert<T>(2.0);
+        }
+    }
+};