Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
16dc96eb
Commit
16dc96eb
authored
Nov 14, 2024
by
root
Browse files
remove print runing info
parent
c8e91d41
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
90 deletions
+5
-90
example/ck_tile/16_fused_moe_general/main.cpp
example/ck_tile/16_fused_moe_general/main.cpp
+2
-87
include/ck_tile/host/reference/reference_fused_moe.hpp
include/ck_tile/host/reference/reference_fused_moe.hpp
+1
-1
include/ck_tile/ops/flatmm/pipeline/uk/flatmm_uk_gfx9_32x512x128_1x4x1_16x16x16.hpp
.../pipeline/uk/flatmm_uk_gfx9_32x512x128_1x4x1_16x16x16.hpp
+2
-2
No files found.
example/ck_tile/16_fused_moe_general/main.cpp
View file @
16dc96eb
...
@@ -207,17 +207,6 @@ bool run(const ck_tile::ArgParser& arg_parser)
...
@@ -207,17 +207,6 @@ bool run(const ck_tile::ArgParser& arg_parser)
{(
max_num_tokens_padded
+
block_m
-
1
)
/
block_m
});
{(
max_num_tokens_padded
+
block_m
-
1
)
/
block_m
});
ck_tile
::
HostTensor
<
IndexDataType
>
num_sorted_tiles_host
({
1
});
ck_tile
::
HostTensor
<
IndexDataType
>
num_sorted_tiles_host
({
1
});
#if 1
# if 0
ck_tile
::
FillStepRange
<
ADataType
>
{
-
.5
f
,
.5
f
,
0.01
f
}(
a_host
);
ck_tile
::
FillStepRange
<
GDataType
>
{
-
.5
f
,
.5
f
,
0.01
f
}(
g_host
);
ck_tile
::
FillStepRange
<
DDataType
,
false
>
{
.5
f
,
-
.5
f
,
-
0.01
f
}(
d_host
);
ck_tile
::
FillStepRange
<
AScaleDataType
>
{
0.
f
,
1.
f
,
0.01
f
}(
sa_host
);
ck_tile
::
FillStepRange
<
GScaleDataType
>
{
0.
f
,
1.
f
,
0.01
f
}(
sg_host
);
ck_tile
::
FillStepRange
<
DScaleDataType
>
{
0.
f
,
1.
f
,
0.01
f
}(
sd_host
);
ck_tile
::
FillStepRange
<
YSmoothScaleDataType
>
{
0.
f
,
1.
f
,
0.01
f
}(
sy_host
);
ck_tile
::
FillStepRange
<
TopkWeightDataType
>
{
-
.5
f
,
.5
f
,
0.01
f
}(
topk_weight_host
);
# else
ck_tile
::
FillUniformDistribution
<
ADataType
>
{
-
.5
f
,
.5
f
}(
a_host
);
ck_tile
::
FillUniformDistribution
<
ADataType
>
{
-
.5
f
,
.5
f
}(
a_host
);
ck_tile
::
FillUniformDistribution
<
GDataType
>
{
-
.5
f
,
.5
f
}(
g_host
);
ck_tile
::
FillUniformDistribution
<
GDataType
>
{
-
.5
f
,
.5
f
}(
g_host
);
ck_tile
::
FillUniformDistribution
<
DDataType
>
{
-
.5
f
,
.5
f
}(
d_host
);
ck_tile
::
FillUniformDistribution
<
DDataType
>
{
-
.5
f
,
.5
f
}(
d_host
);
...
@@ -226,7 +215,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
...
@@ -226,7 +215,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
ck_tile
::
FillUniformDistribution
<
DScaleDataType
>
{
-
.5
f
,
.5
f
}(
sd_host
);
ck_tile
::
FillUniformDistribution
<
DScaleDataType
>
{
-
.5
f
,
.5
f
}(
sd_host
);
ck_tile
::
FillUniformDistribution
<
YSmoothScaleDataType
>
{
-
.5
f
,
.5
f
}(
sy_host
);
ck_tile
::
FillUniformDistribution
<
YSmoothScaleDataType
>
{
-
.5
f
,
.5
f
}(
sy_host
);
ck_tile
::
FillUniformDistribution
<
TopkWeightDataType
>
{
0.0
f
,
1.0
f
}(
topk_weight_host
);
ck_tile
::
FillUniformDistribution
<
TopkWeightDataType
>
{
0.0
f
,
1.0
f
}(
topk_weight_host
);
# endif
// permute weight
// permute weight
ck_tile
::
HostTensor
<
GDataType
>
g_perm_host
=
shuffle_moe_weight
(
g_host
,
prec_w
,
1
);
ck_tile
::
HostTensor
<
GDataType
>
g_perm_host
=
shuffle_moe_weight
(
g_host
,
prec_w
,
1
);
...
@@ -248,81 +237,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
...
@@ -248,81 +237,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
{
{
topid_unique_gen
<
IndexDataType
>
(
topk_ids_host
.
mData
,
tokens
,
topk
,
experts
,
11913
);
topid_unique_gen
<
IndexDataType
>
(
topk_ids_host
.
mData
,
tokens
,
topk
,
experts
,
11913
);
}
}
#else
a_host
.
loadtxt
(
"../../ater/input_torch.txt"
);
topk_ids_host
.
loadtxt
(
"../../ater/topk_ids_torch.txt"
,
"int"
);
// topk_ids_host.savetxt("topk_ids_2.txt");
topk_weight_host
.
loadtxt
(
"../../ater/topk_weights_torch.txt"
,
"float"
);
std
::
cout
<<
"------- @@@ "
<<
__LINE__
<<
std
::
flush
<<
std
::
endl
;
g_host
.
loadtxt
(
"../../ater/w1_torch.txt"
,
"float"
);
std
::
cout
<<
"------- @@@ "
<<
__LINE__
<<
std
::
flush
<<
std
::
endl
;
d_host
.
loadtxt
(
"../../ater/w2_torch.txt"
,
"float"
);
std
::
cout
<<
"------- @@@ "
<<
__LINE__
<<
std
::
flush
<<
std
::
endl
;
ck_tile
::
HostTensor
<
GDataType
>
g_perm_host
=
shuffle_moe_weight
(
g_host
,
prec_w
,
1
);
std
::
cout
<<
"------- @@@ "
<<
__LINE__
<<
std
::
flush
<<
std
::
endl
;
ck_tile
::
HostTensor
<
DDataType
>
d_perm_host
=
shuffle_moe_weight
(
d_host
,
prec_w
,
1
);
std
::
cout
<<
"------- @@@ "
<<
__LINE__
<<
std
::
flush
<<
std
::
endl
;
# if 0
ck_tile
::
reference_moe_sorting
<
TopkWeightDataType
,
IndexDataType
>
(
topk_ids_host
,
topk_weight_host
,
sorted_token_ids_host
,
sorted_weight_host
,
sorted_expert_ids_host
,
num_sorted_tiles_host
.
mData
[
0
],
experts
,
block_m
);
std
::
cout
<<
"------- @@@ "
<<
__LINE__
<<
std
::
flush
<<
std
::
endl
;
std
::
cout
<<
sorted_token_ids_host
<<
std
::
endl
;
std
::
cout
<<
num_sorted_tiles_host
<<
std
::
endl
;
std
::
cout
<<
sorted_expert_ids_host
<<
std
::
endl
;
ck_tile
::
reference_fused_moe
<
AccDataType
,
ck_tile
::
element_wise
::
Gelu
>
(
a_host
,
g_host
,
d_host
,
sa_host
,
sg_host
,
sd_host
,
sy_host
,
o_host
,
sorted_token_ids_host
,
sorted_weight_host
,
sorted_expert_ids_host
,
num_sorted_tiles_host
,
topk_ids_host
,
block_m
,
tokens
,
experts
,
hidden_size
,
shared_intermediate_size_0
,
topk
,
gate_only
);
std
::
cout
<<
"------- >"
<<
std
::
endl
;
std
::
cout
<<
o_host
<<
std
::
endl
;
(
void
)
balance
;
{
ck_tile
::
HostTensor
<
ODataType
>
o_host_torch
({
tokens
,
hidden_size
},
{
stride
,
1
});
o_host_torch
.
loadtxt
(
"../../ater/ref2_torch.txt"
);
auto
[
rtol
,
atol
]
=
get_elimit
<
ADataType
>
();
bool
pass
=
ck_tile
::
check_err
(
o_host
,
o_host_torch
,
std
::
string
(
"OUT-Torch Error: Incorrect results!"
),
rtol
,
atol
);
std
::
cout
<<
", valid:"
<<
(
pass
?
"y"
:
"n"
)
<<
std
::
flush
;
}
return
1
;
# endif
#endif
(
void
)
balance
;
ck_tile
::
reference_moe_sorting
<
TopkWeightDataType
,
IndexDataType
>
(
ck_tile
::
reference_moe_sorting
<
TopkWeightDataType
,
IndexDataType
>
(
topk_ids_host
,
topk_ids_host
,
topk_weight_host
,
topk_weight_host
,
...
...
include/ck_tile/host/reference/reference_fused_moe.hpp
View file @
16dc96eb
...
@@ -135,7 +135,7 @@ void reference_fused_moe(
...
@@ -135,7 +135,7 @@ void reference_fused_moe(
for
(
ck_tile
::
index_t
i_n
=
0
;
i_n
<
intermediate_size_1
;
i_n
++
)
for
(
ck_tile
::
index_t
i_n
=
0
;
i_n
<
intermediate_size_1
;
i_n
++
)
{
{
Activation
{}(
y
(
0
,
i_n
),
acc_0
(
0
,
i_n
));
Activation
{}(
y
(
0
,
i_n
),
acc_0
(
0
,
i_n
));
printf
(
"ie:%2d, it:%3d, in:%d, %f
\n
"
,
i_expert
,
i_token
,
i_n
,
y
(
0
,
i_n
));
//
printf("ie:%2d, it:%3d, in:%d, %f\n", i_expert, i_token, i_n, y(0, i_n));
}
}
}
}
else
else
...
...
include/ck_tile/ops/flatmm/pipeline/uk/flatmm_uk_gfx9_32x512x128_1x4x1_16x16x16.hpp
View file @
16dc96eb
...
@@ -292,8 +292,8 @@ struct FlatmmUK_GFX9_32x512x128_1x4x1_16x16x16_BF16
...
@@ -292,8 +292,8 @@ struct FlatmmUK_GFX9_32x512x128_1x4x1_16x16x16_BF16
number
<
a_sld
.
get_num_of_access
()
>
{});
number
<
a_sld
.
get_num_of_access
()
>
{});
printf
(
"----- tid:%d, a_sld:%d
\n
"
,
static_cast
<
index_t
>
(
threadIdx
.
x
),
//
printf("----- tid:%d, a_sld:%d\n", static_cast<index_t>(threadIdx.x),
static_cast
<
index_t
>
(
a_sld
.
cached_coords_
[
number
<
0
>
{}].
get_offset
()));
//
static_cast<index_t>(a_sld.cached_coords_[number<0>{}].get_offset()));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment