Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
1b409ffe
Commit
1b409ffe
authored
Feb 28, 2023
by
Jing Zhang
Browse files
fix mfma_int8 on MI300
parent
3973caa4
Changes
43
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
36 additions
and
17 deletions
+36
-17
include/ck/utility/amd_xdlops.hpp
include/ck/utility/amd_xdlops.hpp
+32
-15
include/ck/utility/data_type.hpp
include/ck/utility/data_type.hpp
+2
-0
script/cmake-ck-dev.sh
script/cmake-ck-dev.sh
+2
-2
No files found.
include/ck/utility/amd_xdlops.hpp
View file @
1b409ffe
...
...
@@ -259,7 +259,6 @@ struct intrin_mfma_f32_16x16x8bf16<16, 16>
}
};
#if (defined(__gfx908__) || defined(__gfx90a__))
template
<
index_t
MPerWave
,
index_t
NPerWave
>
struct
intrin_mfma_i32_32x32x8i8
;
...
...
@@ -278,7 +277,26 @@ struct intrin_mfma_i32_32x32x8i8<32, 32>
0
);
}
};
#elif (defined(__gfx940__))
template
<
index_t
MPerWave
,
index_t
NPerWave
>
struct
intrin_mfma_i32_16x16x16i8
;
template
<
>
struct
intrin_mfma_i32_16x16x16i8
<
16
,
16
>
{
template
<
class
FloatC
>
__device__
static
void
Run
(
const
int8x4_t
&
reg_a
,
const
int8x4_t
&
reg_b
,
FloatC
&
reg_c
)
{
reg_c
.
template
AsType
<
int32x4_t
>()(
Number
<
0
>
{})
=
__builtin_amdgcn_mfma_i32_16x16x16i8
(
bit_cast
<
int32_t
>
(
reg_a
),
bit_cast
<
int32_t
>
(
reg_b
),
reg_c
.
template
AsType
<
int32x4_t
>()[
Number
<
0
>
{}],
0
,
0
,
0
);
}
};
template
<
index_t
MPerWave
,
index_t
NPerWave
>
struct
intrin_mfma_i32_32x32x16i8
;
...
...
@@ -286,31 +304,30 @@ template <>
struct
intrin_mfma_i32_32x32x16i8
<
32
,
32
>
{
template
<
class
FloatC
>
__device__
static
void
Run
(
const
int8x
4
_t
&
reg_a
,
const
int8x
4
_t
&
reg_b
,
FloatC
&
reg_c
)
__device__
static
void
Run
(
const
int8x
8
_t
&
reg_a
,
const
int8x
8
_t
&
reg_b
,
FloatC
&
reg_c
)
{
reg_c
.
template
AsType
<
int32x16_t
>()(
Number
<
0
>
{})
=
__builtin_amdgcn_mfma_i32_32x32x16_i8
(
bit_cast
<
int
32
_t
>
(
reg_a
),
bit_cast
<
int
32
_t
>
(
reg_b
),
reg_c
.
template
AsType
<
int32x16_t
>()[
Number
<
0
>
{}],
0
,
0
,
0
);
__builtin_amdgcn_mfma_i32_32x32x16_i8
(
bit_cast
<
int
64
_t
>
(
reg_a
),
bit_cast
<
int
64
_t
>
(
reg_b
),
reg_c
.
template
AsType
<
int32x16_t
>()[
Number
<
0
>
{}],
0
,
0
,
0
);
}
};
#endif
template
<
index_t
MPerWave
,
index_t
NPerWave
>
struct
intrin_mfma_i32_16x16x
16
i8
;
struct
intrin_mfma_i32_16x16x
32
i8
;
template
<
>
struct
intrin_mfma_i32_16x16x
16
i8
<
16
,
16
>
struct
intrin_mfma_i32_16x16x
32
i8
<
16
,
16
>
{
template
<
class
FloatC
>
__device__
static
void
Run
(
const
int8x
4
_t
&
reg_a
,
const
int8x
4
_t
&
reg_b
,
FloatC
&
reg_c
)
__device__
static
void
Run
(
const
int8x
8
_t
&
reg_a
,
const
int8x
8
_t
&
reg_b
,
FloatC
&
reg_c
)
{
reg_c
.
template
AsType
<
int32x4_t
>()(
Number
<
0
>
{})
=
__builtin_amdgcn_mfma_i32_16x16x
16
i8
(
bit_cast
<
int
32
_t
>
(
reg_a
),
bit_cast
<
int
32
_t
>
(
reg_b
),
__builtin_amdgcn_mfma_i32_16x16x
32
i8
(
bit_cast
<
int
64
_t
>
(
reg_a
),
bit_cast
<
int
64
_t
>
(
reg_b
),
reg_c
.
template
AsType
<
int32x4_t
>()[
Number
<
0
>
{}],
0
,
0
,
...
...
include/ck/utility/data_type.hpp
View file @
1b409ffe
...
...
@@ -898,6 +898,8 @@ struct vector_type<T, 256>
}
};
using
int64_t
=
long
;
// fp64
using
double2_t
=
typename
vector_type
<
double
,
2
>::
type
;
using
double4_t
=
typename
vector_type
<
double
,
4
>::
type
;
...
...
script/cmake-ck-dev.sh
View file @
1b409ffe
...
...
@@ -10,8 +10,8 @@ cmake
-D
CMAKE_CXX_COMPILER
=
/opt/rocm/bin/hipcc
\
-D
CMAKE_CXX_FLAGS
=
"-O3 -ftemplate-backtrace-limit=0 -gline-tables-only -save-temps=
$PWD
"
\
-D
CMAKE_BUILD_TYPE
=
Release
\
-D
BUILD_DEV
=
O
N
\
-D
GPU_TARGETS
=
"gfx90
a
"
\
-D
BUILD_DEV
=
O
FF
\
-D
GPU_TARGETS
=
"gfx9
4
0"
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
-D
USE_BITINT_EXTENSION_INT4
=
OFF
\
${
MY_PROJECT_SOURCE
}
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment