Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
46abeca3
"...resnet50_tensorflow.git" did not exist on "417aa07384824ab567bd651444f5210e9f2e5bfb"
Commit
46abeca3
authored
Jan 07, 2022
by
Jing Zhang
Browse files
add fp16 buildins
parent
01192e26
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
26 deletions
+10
-26
composable_kernel/include/utility/amd_xdlops.hpp
composable_kernel/include/utility/amd_xdlops.hpp
+9
-25
script/cmake-rocm.sh
script/cmake-rocm.sh
+1
-1
No files found.
composable_kernel/include/utility/amd_xdlops.hpp
View file @
46abeca3
...
...
@@ -6,22 +6,6 @@
namespace
ck
{
// A, B, C, cbsz, abid, blgp
// fp16
extern
"C"
__device__
float32_t
llvm_intrin_amdgcn_mfma_f32_32x32x4f16
(
half4_t
,
half4_t
,
float32_t
,
int
,
int
,
int
)
__asm
(
"llvm.amdgcn.mfma.f32.32x32x4f16"
);
extern
"C"
__device__
float16_t
llvm_intrin_amdgcn_mfma_f32_32x32x8f16
(
half4_t
,
half4_t
,
float16_t
,
int
,
int
,
int
)
__asm
(
"llvm.amdgcn.mfma.f32.32x32x8f16"
);
extern
"C"
__device__
float4_t
llvm_intrin_amdgcn_mfma_f32_16x16x16f16
(
half4_t
,
half4_t
,
float4_t
,
int
,
int
,
int
)
__asm
(
"llvm.amdgcn.mfma.f32.16x16x16f16"
);
extern
"C"
__device__
float16_t
llvm_intrin_amdgcn_mfma_f32_16x16x4f16
(
half4_t
,
half4_t
,
float16_t
,
int
,
int
,
int
)
__asm
(
"llvm.amdgcn.mfma.f32.16x16x4f16"
);
extern
"C"
__device__
float4_t
llvm_intrin_amdgcn_mfma_f32_4x4x4f16
(
half4_t
,
half4_t
,
float4_t
,
int
,
int
,
int
)
__asm
(
"llvm.amdgcn.mfma.f32.4x4x4f16"
);
// bfp16
extern
"C"
__device__
float16_t
llvm_intrin_amdgcn_mfma_f32_32x32x8bf16_1k
(
ushort4_t
,
ushort4_t
,
float16_t
,
int
,
int
,
int
)
__asm
(
"llvm.amdgcn.mfma.f32.32x32x8bf16.1k"
);
...
...
@@ -167,9 +151,9 @@ struct intrin_mfma_f32_32x32x4f16<64, 64>
template
<
class
FloatC
>
__device__
static
void
Run
(
const
half4_t
&
reg_a
,
const
half4_t
&
reg_b
,
FloatC
&
reg_c
)
{
reg_c
.
template
AsType
<
float32_t
>()(
Number
<
0
>
{})
=
llvm_intr
in_amdgcn_mfma_f32_32x32x4f16
(
reg_c
.
template
AsType
<
float32_t
>()(
Number
<
0
>
{})
=
__built
in_amdgcn_mfma_f32_32x32x4f16
(
reg_a
,
reg_b
,
reg_c
.
template
AsType
<
float32_t
>()[
Number
<
0
>
{}],
1
,
0
,
0
);
reg_c
.
template
AsType
<
float32_t
>()(
Number
<
1
>
{})
=
llvm_intr
in_amdgcn_mfma_f32_32x32x4f16
(
reg_c
.
template
AsType
<
float32_t
>()(
Number
<
1
>
{})
=
__built
in_amdgcn_mfma_f32_32x32x4f16
(
reg_a
,
reg_b
,
reg_c
.
template
AsType
<
float32_t
>()[
Number
<
1
>
{}],
1
,
1
,
0
);
}
};
...
...
@@ -180,7 +164,7 @@ struct intrin_mfma_f32_32x32x4f16<32, 64>
template
<
class
FloatC
>
__device__
static
void
Run
(
const
half4_t
&
reg_a
,
const
half4_t
&
reg_b
,
FloatC
&
reg_c
)
{
reg_c
.
template
AsType
<
float32_t
>()(
Number
<
0
>
{})
=
llvm_intr
in_amdgcn_mfma_f32_32x32x4f16
(
reg_c
.
template
AsType
<
float32_t
>()(
Number
<
0
>
{})
=
__built
in_amdgcn_mfma_f32_32x32x4f16
(
reg_a
,
reg_b
,
reg_c
.
template
AsType
<
float32_t
>()[
Number
<
0
>
{}],
1
,
0
,
0
);
}
};
...
...
@@ -194,7 +178,7 @@ struct intrin_mfma_f32_32x32x8f16<32, 32>
template
<
class
FloatC
>
__device__
static
void
Run
(
const
half4_t
&
reg_a
,
const
half4_t
&
reg_b
,
FloatC
&
reg_c
)
{
reg_c
.
template
AsType
<
float16_t
>()(
Number
<
0
>
{})
=
llvm_intr
in_amdgcn_mfma_f32_32x32x8f16
(
reg_c
.
template
AsType
<
float16_t
>()(
Number
<
0
>
{})
=
__built
in_amdgcn_mfma_f32_32x32x8f16
(
reg_a
,
reg_b
,
reg_c
.
template
AsType
<
float16_t
>()[
Number
<
0
>
{}],
0
,
0
,
0
);
}
};
...
...
@@ -208,7 +192,7 @@ struct intrin_mfma_f32_16x16x16f16<16, 16>
template
<
class
FloatC
>
__device__
static
void
Run
(
const
half4_t
&
reg_a
,
const
half4_t
&
reg_b
,
FloatC
&
reg_c
)
{
reg_c
.
template
AsType
<
float4_t
>()(
Number
<
0
>
{})
=
llvm_intr
in_amdgcn_mfma_f32_16x16x16f16
(
reg_c
.
template
AsType
<
float4_t
>()(
Number
<
0
>
{})
=
__built
in_amdgcn_mfma_f32_16x16x16f16
(
reg_a
,
reg_b
,
reg_c
.
template
AsType
<
float4_t
>()[
Number
<
0
>
{}],
0
,
0
,
0
);
}
};
...
...
@@ -222,7 +206,7 @@ struct intrin_mfma_f32_16x16x4f16<16, 64>
template
<
class
FloatC
>
__device__
static
void
Run
(
const
half4_t
&
reg_a
,
const
half4_t
&
reg_b
,
FloatC
&
reg_c
)
{
reg_c
.
template
AsType
<
float16_t
>()(
Number
<
0
>
{})
=
llvm_intr
in_amdgcn_mfma_f32_16x16x4f16
(
reg_c
.
template
AsType
<
float16_t
>()(
Number
<
0
>
{})
=
__built
in_amdgcn_mfma_f32_16x16x4f16
(
reg_a
,
reg_b
,
reg_c
.
template
AsType
<
float16_t
>()[
Number
<
0
>
{}],
2
,
0
,
0
);
}
};
...
...
@@ -236,7 +220,7 @@ struct intrin_mfma_f32_4x4x4f16<4, 64>
template
<
class
FloatC
>
__device__
static
void
Run
(
const
half4_t
&
reg_a
,
const
half4_t
&
reg_b
,
FloatC
&
reg_c
)
{
reg_c
.
template
AsType
<
float4_t
>()(
Number
<
0
>
{})
=
llvm_intr
in_amdgcn_mfma_f32_4x4x4f16
(
reg_c
.
template
AsType
<
float4_t
>()(
Number
<
0
>
{})
=
__built
in_amdgcn_mfma_f32_4x4x4f16
(
reg_a
,
reg_b
,
reg_c
.
template
AsType
<
float4_t
>()[
Number
<
0
>
{}],
4
,
0
,
0
);
}
};
...
...
@@ -247,9 +231,9 @@ struct intrin_mfma_f32_4x4x4f16<8, 64>
template
<
class
FloatC
>
__device__
static
void
Run
(
const
half4_t
&
reg_a
,
const
half4_t
&
reg_b
,
FloatC
&
reg_c
)
{
reg_c
.
template
AsType
<
float4_t
>()(
Number
<
0
>
{})
=
llvm_intr
in_amdgcn_mfma_f32_4x4x4f16
(
reg_c
.
template
AsType
<
float4_t
>()(
Number
<
0
>
{})
=
__built
in_amdgcn_mfma_f32_4x4x4f16
(
reg_a
,
reg_b
,
reg_c
.
template
AsType
<
float4_t
>()[
Number
<
0
>
{}],
4
,
0
,
0
);
reg_c
.
template
AsType
<
float4_t
>()(
Number
<
1
>
{})
=
llvm_intr
in_amdgcn_mfma_f32_4x4x4f16
(
reg_c
.
template
AsType
<
float4_t
>()(
Number
<
1
>
{})
=
__built
in_amdgcn_mfma_f32_4x4x4f16
(
reg_a
,
reg_b
,
reg_c
.
template
AsType
<
float4_t
>()[
Number
<
1
>
{}],
4
,
1
,
0
);
}
};
...
...
script/cmake-rocm.sh
View file @
46abeca3
...
...
@@ -3,7 +3,7 @@ rm -f CMakeCache.txt
rm
-f
*
.cmake
rm
-rf
CMakeFiles
MY_PROJECT_SOURCE
=
../
../..
MY_PROJECT_SOURCE
=
../
MY_PROJECT_INSTALL
=
../install.dir
cmake
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment