Merge branch 'develop' of...

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/composable_kernel into simple_gemm_dl

Merge branch 'develop' of...
Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/composable_kernel into simple_gemm_dl
b460bff9 · Harisankar Sadasivan · 83d9c3fb · f7331c60 · b460bff9 · b460bff9
Commit b460bff9 authored Oct 20, 2023 by Harisankar Sadasivan
7 changed files
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -790,8 +790,8 @@ pipeline {
                    }
                    agent{ label rocmnode("navi32") }
                    environment{
-                        setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1101" """
+                        setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1101" -DDL_KERNELS=ON """
-                        execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -DGPU_TARGETS="gfx1101" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """
+                        execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -DGPU_TARGETS="gfx1101" -DDL_KERNELS=ON -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """
                    }
                    steps{
                        Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local')

--- a/include/ck/ck.hpp
+++ b/include/ck/ck.hpp
@@ -66,6 +66,10 @@
 #define CK_USE_AMD_V_FMAC_F32
 #define CK_USE_AMD_V_DOT2_F32_F16
 #define CK_USE_AMD_V_DOT4_I32_I8
+#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__)
+#define CK_USE_AMD_V_FMAC_F32
+#define CK_USE_AMD_V_DOT2_F32_F16
+#define CK_USE_AMD_V_DOT4_I32_I8_GFX11
 #endif
 // MFMA instruction

--- a/include/ck/host_utility/hip_check_error.hpp
+++ b/include/ck/host_utility/hip_check_error.hpp
@@ -3,8 +3,10 @@
 #pragma once
+#include <sstream>
 #include <hip/hip_runtime.h>
+// To be removed, which really does not tell the location of failed HIP functional call
 inline void hip_check_error(hipError_t x)
 {
    if(x != hipSuccess)
@@ -15,3 +17,16 @@ inline void hip_check_error(hipError_t x)
        throw std::runtime_error(ss.str());
    }
 }
+#define HIP_CHECK_ERROR(retval_or_funcall)                                         \
+    do                                                                             \
+    {                                                                              \
+        hipError_t _tmpVal = retval_or_funcall;                                    \
+        if(_tmpVal != hipSuccess)                                                  \
+        {                                                                          \
+            std::ostringstream ostr;                                               \
+            ostr << "HIP Function Failed (" << __FILE__ << "," << __LINE__ << ") " \
+                 << hipGetErrorString(_tmpVal);                                    \
+            throw std::runtime_error(ostr.str());                                  \
+        }                                                                          \
+    } while(0)
--- a/include/ck/tensor_operation/gpu/device/impl/device_elementwise_impl.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_impl.hpp
@@ -296,6 +296,28 @@ struct DeviceElementwiseImpl
    {
        return std::make_unique<Invoker>();
    };
+    std::string GetTypeString() const override
+    {
+        auto str = std::stringstream();
+        // clang-format off
+        str << "DeviceElementwiseImpl<" ;
+        str << "NumDim_" << NumDim << ","; 
+	str << "MPerThread_" << MPerThread << ","; 
+        str << "InScalarPerVector"; 
+        static_for<0, InScalarPerVectorSeq::Size(), 1>{}([&](auto i) { str << "_" << InScalarPerVectorSeq::At(i).value; });
+        str << ","; 
+        str << "OutScalarPerVector"; 
+        static_for<0, OutScalarPerVectorSeq::Size(), 1>{}([&](auto i) { str << "_" << OutScalarPerVectorSeq::At(i).value; });
+        str << ">";
+        // clang-format on
+        return str.str();
+    }
 }; // namespace device
 } // namespace device

--- a/include/ck/utility/inner_product.hpp
+++ b/include/ck/utility/inner_product.hpp
@@ -192,6 +192,8 @@ inner_product<int8x4_t, int8x4_t, int32_t>(const int8x4_t& a, const int8x4_t& b,
 #else
    c = __builtin_amdgcn_sdot4(bit_cast<int32_t>(a), bit_cast<int32_t>(b), c, false);
 #endif
+#elif defined(CK_USE_AMD_V_DOT4_I32_I8_GFX11)
+    c = __builtin_amdgcn_sudot4(true, bit_cast<int32_t>(a), true, bit_cast<int32_t>(b), c, false);
 #else
    const vector_type<int8_t, 4> a_vector{a};
    const vector_type<int8_t, 4> b_vector{b};

--- a/library/include/ck/library/utility/host_common_util.hpp
+++ b/library/include/ck/library/utility/host_common_util.hpp
@@ -22,7 +22,7 @@ static inline void dumpBufferToFile(const char* fileName, T* data, size_t dataNu
    std::ofstream outFile(fileName, std::ios::binary);
    if(outFile)
    {
-        outFile.write(reinterpret_cast<char*>(data), dataNumItems * sizeof(T));
+        outFile.write(reinterpret_cast<const char*>(data), dataNumItems * sizeof(T));
        outFile.close();
        std::cout << "Write output to file " << fileName << std::endl;
    }

--- a/library/include/ck/library/utility/host_tensor_generator.hpp
+++ b/library/include/ck/library/utility/host_tensor_generator.hpp
@@ -200,10 +200,11 @@ struct GeneratorTensor_3<ck::bf8_t>
 template <typename T>
 struct GeneratorTensor_4
 {
-    std::default_random_engine generator;
+    std::mt19937 generator;
    std::normal_distribution<float> distribution;
-    GeneratorTensor_4(float mean, float stddev) : generator(1), distribution(mean, stddev){};
+    GeneratorTensor_4(float mean, float stddev, unsigned int seed = 1)
+        : generator(seed), distribution(mean, stddev){};
    template <typename... Is>
    T operator()(Is...)