Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
5ab137f4
Commit
5ab137f4
authored
Sep 19, 2024
by
danyao12
Browse files
add traits
parent
a0491b67
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
47 additions
and
37 deletions
+47
-37
example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py
example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py
+33
-31
example/ck_tile/01_fmha/fmha_bwd.cpp
example/ck_tile/01_fmha/fmha_bwd.cpp
+12
-6
example/ck_tile/01_fmha/fmha_bwd.hpp
example/ck_tile/01_fmha/fmha_bwd.hpp
+2
-0
No files found.
example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py
View file @
5ab137f4
...
...
@@ -333,6 +333,7 @@ float fmha_ext_bwd_(const ck_tile::stream_config& s, fmha_bwd_args a, unsigned
float fmha_bwd(fmha_bwd_traits t, fmha_bwd_args a, const ck_tile::stream_config& s){{
float r = -1;
if (t.uses_ext_asm == true){{
if ((t.is_group_mode == false) && (t.bias_type == bias_enum::no_bias) && (t.has_dbias == false) && (t.has_dropout == false) &&
(a.seqlen_q == a.seqlen_k) && (a.seqlen_k % 128 == 0) && (a.hdim_q == 128) && (a.hdim_v == 128) && (t.is_deterministic == false)) {{
if(t.data_type.compare("fp16") == 0){{
...
...
@@ -368,6 +369,7 @@ float fmha_bwd(fmha_bwd_traits t, fmha_bwd_args a, const ck_tile::stream_config&
}}
}}
}}
}}
{F_dispatch}
return r;
...
...
example/ck_tile/01_fmha/fmha_bwd.cpp
View file @
5ab137f4
...
...
@@ -91,7 +91,9 @@ auto create_args(int argc, char* argv[])
.
insert
(
"deterministic"
,
"0"
,
"if set to 1 will use multi-buffer reduction strategy for dq, atomic opeartion "
"will not be used"
);
"will not be used"
)
.
insert
(
"ext_asm"
,
"0"
,
"if set to 1, some cases will call the ext asm dqdkdv kernel"
)
.
insert
(
"asm_atomic_fp32"
,
"1"
,
"if set to 0, atomic fp16/bf16 is used when calling the ext asm dqdkdv kernel"
);
bool
result
=
arg_parser
.
parse
(
argc
,
argv
);
return
std
::
make_tuple
(
result
,
arg_parser
);
...
...
@@ -180,6 +182,8 @@ bool run(const ck_tile::ArgParser& arg_parser)
int
stream_repeat
=
arg_parser
.
get_int
(
"repeat"
);
bool
kname
=
arg_parser
.
get_bool
(
"kname"
);
bool
deterministic
=
arg_parser
.
get_bool
(
"deterministic"
);
bool
ext_asm
=
arg_parser
.
get_bool
(
"ext_asm"
);
bool
asm_atomic_fp32
=
arg_parser
.
get_bool
(
"asm_atomic_fp32"
);
ck_tile
::
stream_config
stream_config
{
nullptr
,
true
,
...
...
@@ -416,7 +420,9 @@ bool run(const ck_tile::ArgParser& arg_parser)
use_dbias
,
p_drop
>
0.0
f
,
s_randval
,
deterministic
};
deterministic
,
ext_asm
,
asm_atomic_fp32
};
auto
fmha_args
=
[
&
]()
{
assert
(
nhead
%
nhead_k
==
0
);
/// NOTE: we broadcast bias from [1, 1, seqlen_q, seqlen_k] to [batch, nhead, seqlen_q,
...
...
example/ck_tile/01_fmha/fmha_bwd.hpp
View file @
5ab137f4
...
...
@@ -438,6 +438,8 @@ struct fmha_bwd_traits
bool
has_dropout
;
bool
is_store_randval
;
bool
is_deterministic
;
bool
uses_ext_asm
;
bool
is_asm_atomic_fp32
;
// TODO: padding check is inside this api
};
float
fmha_bwd
(
fmha_bwd_traits
,
fmha_bwd_args
,
const
ck_tile
::
stream_config
&
);
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment