"...composable_kernel.git" did not exist on "1b62bfaa2a42ed83da2692f6797a5f929c39946f"
Commit b460bff9 authored by Harisankar Sadasivan's avatar Harisankar Sadasivan
Browse files

Merge branch 'develop' of...

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/composable_kernel into simple_gemm_dl
parents 83d9c3fb f7331c60
...@@ -790,8 +790,8 @@ pipeline { ...@@ -790,8 +790,8 @@ pipeline {
} }
agent{ label rocmnode("navi32") } agent{ label rocmnode("navi32") }
environment{ environment{
setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1101" """ setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1101" -DDL_KERNELS=ON """
execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -DGPU_TARGETS="gfx1101" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -DGPU_TARGETS="gfx1101" -DDL_KERNELS=ON -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """
} }
steps{ steps{
Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local') Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local')
......
...@@ -66,6 +66,10 @@ ...@@ -66,6 +66,10 @@
#define CK_USE_AMD_V_FMAC_F32 #define CK_USE_AMD_V_FMAC_F32
#define CK_USE_AMD_V_DOT2_F32_F16 #define CK_USE_AMD_V_DOT2_F32_F16
#define CK_USE_AMD_V_DOT4_I32_I8 #define CK_USE_AMD_V_DOT4_I32_I8
#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__)
#define CK_USE_AMD_V_FMAC_F32
#define CK_USE_AMD_V_DOT2_F32_F16
#define CK_USE_AMD_V_DOT4_I32_I8_GFX11
#endif #endif
// MFMA instruction // MFMA instruction
......
...@@ -3,8 +3,10 @@ ...@@ -3,8 +3,10 @@
#pragma once #pragma once
#include <sstream>
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
// To be removed, which really does not tell the location of failed HIP functional call
inline void hip_check_error(hipError_t x) inline void hip_check_error(hipError_t x)
{ {
if(x != hipSuccess) if(x != hipSuccess)
...@@ -15,3 +17,16 @@ inline void hip_check_error(hipError_t x) ...@@ -15,3 +17,16 @@ inline void hip_check_error(hipError_t x)
throw std::runtime_error(ss.str()); throw std::runtime_error(ss.str());
} }
} }
#define HIP_CHECK_ERROR(retval_or_funcall) \
do \
{ \
hipError_t _tmpVal = retval_or_funcall; \
if(_tmpVal != hipSuccess) \
{ \
std::ostringstream ostr; \
ostr << "HIP Function Failed (" << __FILE__ << "," << __LINE__ << ") " \
<< hipGetErrorString(_tmpVal); \
throw std::runtime_error(ostr.str()); \
} \
} while(0)
...@@ -296,6 +296,28 @@ struct DeviceElementwiseImpl ...@@ -296,6 +296,28 @@ struct DeviceElementwiseImpl
{ {
return std::make_unique<Invoker>(); return std::make_unique<Invoker>();
}; };
std::string GetTypeString() const override
{
auto str = std::stringstream();
// clang-format off
str << "DeviceElementwiseImpl<" ;
str << "NumDim_" << NumDim << ",";
str << "MPerThread_" << MPerThread << ",";
str << "InScalarPerVector";
static_for<0, InScalarPerVectorSeq::Size(), 1>{}([&](auto i) { str << "_" << InScalarPerVectorSeq::At(i).value; });
str << ",";
str << "OutScalarPerVector";
static_for<0, OutScalarPerVectorSeq::Size(), 1>{}([&](auto i) { str << "_" << OutScalarPerVectorSeq::At(i).value; });
str << ">";
// clang-format on
return str.str();
}
}; // namespace device }; // namespace device
} // namespace device } // namespace device
......
...@@ -192,6 +192,8 @@ inner_product<int8x4_t, int8x4_t, int32_t>(const int8x4_t& a, const int8x4_t& b, ...@@ -192,6 +192,8 @@ inner_product<int8x4_t, int8x4_t, int32_t>(const int8x4_t& a, const int8x4_t& b,
#else #else
c = __builtin_amdgcn_sdot4(bit_cast<int32_t>(a), bit_cast<int32_t>(b), c, false); c = __builtin_amdgcn_sdot4(bit_cast<int32_t>(a), bit_cast<int32_t>(b), c, false);
#endif #endif
#elif defined(CK_USE_AMD_V_DOT4_I32_I8_GFX11)
c = __builtin_amdgcn_sudot4(true, bit_cast<int32_t>(a), true, bit_cast<int32_t>(b), c, false);
#else #else
const vector_type<int8_t, 4> a_vector{a}; const vector_type<int8_t, 4> a_vector{a};
const vector_type<int8_t, 4> b_vector{b}; const vector_type<int8_t, 4> b_vector{b};
......
...@@ -22,7 +22,7 @@ static inline void dumpBufferToFile(const char* fileName, T* data, size_t dataNu ...@@ -22,7 +22,7 @@ static inline void dumpBufferToFile(const char* fileName, T* data, size_t dataNu
std::ofstream outFile(fileName, std::ios::binary); std::ofstream outFile(fileName, std::ios::binary);
if(outFile) if(outFile)
{ {
outFile.write(reinterpret_cast<char*>(data), dataNumItems * sizeof(T)); outFile.write(reinterpret_cast<const char*>(data), dataNumItems * sizeof(T));
outFile.close(); outFile.close();
std::cout << "Write output to file " << fileName << std::endl; std::cout << "Write output to file " << fileName << std::endl;
} }
......
...@@ -200,10 +200,11 @@ struct GeneratorTensor_3<ck::bf8_t> ...@@ -200,10 +200,11 @@ struct GeneratorTensor_3<ck::bf8_t>
template <typename T> template <typename T>
struct GeneratorTensor_4 struct GeneratorTensor_4
{ {
std::default_random_engine generator; std::mt19937 generator;
std::normal_distribution<float> distribution; std::normal_distribution<float> distribution;
GeneratorTensor_4(float mean, float stddev) : generator(1), distribution(mean, stddev){}; GeneratorTensor_4(float mean, float stddev, unsigned int seed = 1)
: generator(seed), distribution(mean, stddev){};
template <typename... Is> template <typename... Is>
T operator()(Is...) T operator()(Is...)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment