Commit 5ab137f4 authored by danyao12's avatar danyao12
Browse files

add traits

parent a0491b67
...@@ -333,6 +333,7 @@ float fmha_ext_bwd_(const ck_tile::stream_config& s, fmha_bwd_args a, unsigned ...@@ -333,6 +333,7 @@ float fmha_ext_bwd_(const ck_tile::stream_config& s, fmha_bwd_args a, unsigned
float fmha_bwd(fmha_bwd_traits t, fmha_bwd_args a, const ck_tile::stream_config& s){{ float fmha_bwd(fmha_bwd_traits t, fmha_bwd_args a, const ck_tile::stream_config& s){{
float r = -1; float r = -1;
if (t.uses_ext_asm == true){{
if ((t.is_group_mode == false) && (t.bias_type == bias_enum::no_bias) && (t.has_dbias == false) && (t.has_dropout == false) && if ((t.is_group_mode == false) && (t.bias_type == bias_enum::no_bias) && (t.has_dbias == false) && (t.has_dropout == false) &&
(a.seqlen_q == a.seqlen_k) && (a.seqlen_k % 128 == 0) && (a.hdim_q == 128) && (a.hdim_v == 128) && (t.is_deterministic == false)) {{ (a.seqlen_q == a.seqlen_k) && (a.seqlen_k % 128 == 0) && (a.hdim_q == 128) && (a.hdim_v == 128) && (t.is_deterministic == false)) {{
if(t.data_type.compare("fp16") == 0){{ if(t.data_type.compare("fp16") == 0){{
...@@ -368,6 +369,7 @@ float fmha_bwd(fmha_bwd_traits t, fmha_bwd_args a, const ck_tile::stream_config& ...@@ -368,6 +369,7 @@ float fmha_bwd(fmha_bwd_traits t, fmha_bwd_args a, const ck_tile::stream_config&
}} }}
}} }}
}} }}
}}
{F_dispatch} {F_dispatch}
return r; return r;
......
...@@ -91,7 +91,9 @@ auto create_args(int argc, char* argv[]) ...@@ -91,7 +91,9 @@ auto create_args(int argc, char* argv[])
.insert("deterministic", .insert("deterministic",
"0", "0",
"if set to 1 will use multi-buffer reduction strategy for dq, atomic opeartion " "if set to 1 will use multi-buffer reduction strategy for dq, atomic opeartion "
"will not be used"); "will not be used")
.insert("ext_asm", "0", "if set to 1, some cases will call the ext asm dqdkdv kernel")
.insert("asm_atomic_fp32", "1", "if set to 0, atomic fp16/bf16 is used when calling the ext asm dqdkdv kernel");
bool result = arg_parser.parse(argc, argv); bool result = arg_parser.parse(argc, argv);
return std::make_tuple(result, arg_parser); return std::make_tuple(result, arg_parser);
...@@ -180,6 +182,8 @@ bool run(const ck_tile::ArgParser& arg_parser) ...@@ -180,6 +182,8 @@ bool run(const ck_tile::ArgParser& arg_parser)
int stream_repeat = arg_parser.get_int("repeat"); int stream_repeat = arg_parser.get_int("repeat");
bool kname = arg_parser.get_bool("kname"); bool kname = arg_parser.get_bool("kname");
bool deterministic = arg_parser.get_bool("deterministic"); bool deterministic = arg_parser.get_bool("deterministic");
bool ext_asm = arg_parser.get_bool("ext_asm");
bool asm_atomic_fp32 = arg_parser.get_bool("asm_atomic_fp32");
ck_tile::stream_config stream_config{nullptr, ck_tile::stream_config stream_config{nullptr,
true, true,
...@@ -416,7 +420,9 @@ bool run(const ck_tile::ArgParser& arg_parser) ...@@ -416,7 +420,9 @@ bool run(const ck_tile::ArgParser& arg_parser)
use_dbias, use_dbias,
p_drop > 0.0f, p_drop > 0.0f,
s_randval, s_randval,
deterministic}; deterministic,
ext_asm,
asm_atomic_fp32};
auto fmha_args = [&]() { auto fmha_args = [&]() {
assert(nhead % nhead_k == 0); assert(nhead % nhead_k == 0);
/// NOTE: we broadcast bias from [1, 1, seqlen_q, seqlen_k] to [batch, nhead, seqlen_q, /// NOTE: we broadcast bias from [1, 1, seqlen_q, seqlen_k] to [batch, nhead, seqlen_q,
......
...@@ -438,6 +438,8 @@ struct fmha_bwd_traits ...@@ -438,6 +438,8 @@ struct fmha_bwd_traits
bool has_dropout; bool has_dropout;
bool is_store_randval; bool is_store_randval;
bool is_deterministic; bool is_deterministic;
bool uses_ext_asm;
bool is_asm_atomic_fp32;
// TODO: padding check is inside this api // TODO: padding check is inside this api
}; };
float fmha_bwd(fmha_bwd_traits, fmha_bwd_args, const ck_tile::stream_config&); float fmha_bwd(fmha_bwd_traits, fmha_bwd_args, const ck_tile::stream_config&);
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment