"library/src/host_tensor/device.cpp" did not exist on "1566b31736d191fe3a43dd5efa59968e44191729"
Unverified Commit 61dc9aa9 authored by Rostyslav Geyyer's avatar Rostyslav Geyyer Committed by GitHub
Browse files

Add the missing archs (#785)

parent 1cf50031
...@@ -364,7 +364,7 @@ struct intrin_mfma_f32_32x32x16f8f8<32, 32> ...@@ -364,7 +364,7 @@ struct intrin_mfma_f32_32x32x16f8f8<32, 32>
template <class FloatC> template <class FloatC>
__device__ static void Run(const f8x8_t& reg_a, const f8x8_t& reg_b, FloatC& reg_c) __device__ static void Run(const f8x8_t& reg_a, const f8x8_t& reg_b, FloatC& reg_c)
{ {
#if defined(__gfx940__) #if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
reg_c.template AsType<float16_t>()(Number<0>{}) = reg_c.template AsType<float16_t>()(Number<0>{}) =
__builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8( __builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8(
bit_cast<long>(reg_a), bit_cast<long>(reg_a),
...@@ -396,7 +396,7 @@ struct intrin_mfma_f32_16x16x32f8f8<16, 16> ...@@ -396,7 +396,7 @@ struct intrin_mfma_f32_16x16x32f8f8<16, 16>
template <class FloatC> template <class FloatC>
__device__ static void Run(const f8x8_t& reg_a, const f8x8_t& reg_b, FloatC& reg_c) __device__ static void Run(const f8x8_t& reg_a, const f8x8_t& reg_b, FloatC& reg_c)
{ {
#if defined(__gfx940__) #if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
reg_c.template AsType<float4_t>()(Number<0>{}) = __builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8( reg_c.template AsType<float4_t>()(Number<0>{}) = __builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8(
bit_cast<long>(reg_a), bit_cast<long>(reg_a),
bit_cast<long>(reg_b), bit_cast<long>(reg_b),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment