Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
dec32dc6
Commit
dec32dc6
authored
Jan 31, 2025
by
ThomasNing
Browse files
Finish the feature and merge with develop on the computeV2
parents
71352c44
c5fff071
Changes
215
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
267 additions
and
186 deletions
+267
-186
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n4096_instance.cpp
...thquant/instances/moe_smoothquant_bf16_n4096_instance.cpp
+8
-4
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n4096_tp_instance.cpp
...uant/instances/moe_smoothquant_bf16_n4096_tp_instance.cpp
+8
-4
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n512_instance.cpp
...othquant/instances/moe_smoothquant_bf16_n512_instance.cpp
+9
-4
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n64_n128_instance.cpp
...uant/instances/moe_smoothquant_bf16_n64_n128_instance.cpp
+7
-3
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n768_instance.cpp
...othquant/instances/moe_smoothquant_bf16_n768_instance.cpp
+7
-3
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n1024_instance.cpp
...thquant/instances/moe_smoothquant_fp16_n1024_instance.cpp
+9
-4
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n1536_instance.cpp
...thquant/instances/moe_smoothquant_fp16_n1536_instance.cpp
+9
-4
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n2048_instance.cpp
...thquant/instances/moe_smoothquant_fp16_n2048_instance.cpp
+8
-4
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n256_instance.cpp
...othquant/instances/moe_smoothquant_fp16_n256_instance.cpp
+7
-3
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n3072_instance.cpp
...thquant/instances/moe_smoothquant_fp16_n3072_instance.cpp
+8
-4
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n4096_instance.cpp
...thquant/instances/moe_smoothquant_fp16_n4096_instance.cpp
+8
-4
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n4096_tp_instance.cpp
...uant/instances/moe_smoothquant_fp16_n4096_tp_instance.cpp
+8
-4
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n512_instance.cpp
...othquant/instances/moe_smoothquant_fp16_n512_instance.cpp
+9
-4
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n64_n128_instance.cpp
...uant/instances/moe_smoothquant_fp16_n64_n128_instance.cpp
+7
-3
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n768_instance.cpp
...othquant/instances/moe_smoothquant_fp16_n768_instance.cpp
+7
-3
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fwd_api.cpp
.../14_moe_smoothquant/instances/moe_smoothquant_fwd_api.cpp
+55
-45
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_instance_common.hpp
...smoothquant/instances/moe_smoothquant_instance_common.hpp
+12
-9
example/ck_tile/14_moe_smoothquant/moe_smoothquant.cpp
example/ck_tile/14_moe_smoothquant/moe_smoothquant.cpp
+40
-28
example/ck_tile/14_moe_smoothquant/moe_smoothquant.hpp
example/ck_tile/14_moe_smoothquant/moe_smoothquant.hpp
+14
-24
example/ck_tile/14_moe_smoothquant/script/smoke_test.sh
example/ck_tile/14_moe_smoothquant/script/smoke_test.sh
+27
-25
No files found.
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n4096_instance.cpp
View file @
dec32dc6
...
@@ -6,9 +6,13 @@
...
@@ -6,9 +6,13 @@
// clang-format off
// clang-format off
// rm rn tm tn vn pd 2p
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
2
,
1
,
256
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
2
,
1
,
256
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
4
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
1
,
256
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n4096_tp_instance.cpp
View file @
dec32dc6
...
@@ -6,9 +6,13 @@
...
@@ -6,9 +6,13 @@
// clang-format off
// clang-format off
// rm rn tm tn vn pd 2p
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
2
,
1
,
256
,
8
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
2
,
1
,
256
,
8
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
4
,
1
,
256
,
4
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
256
,
4
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
1
,
256
,
8
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
256
,
4
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
true
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n512_instance.cpp
View file @
dec32dc6
...
@@ -6,8 +6,13 @@
...
@@ -6,8 +6,13 @@
// clang-format off
// clang-format off
// rm rn tm tn vn pd 2p
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
1
,
4
,
64
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
1
,
4
,
64
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
2
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
2
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
4
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
4
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
8
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
8
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
1
,
4
,
64
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
8
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n64_n128_instance.cpp
View file @
dec32dc6
...
@@ -6,7 +6,11 @@
...
@@ -6,7 +6,11 @@
// clang-format off
// clang-format off
// rm rn tm tn vn pd 2p
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
1
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
1
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
1
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
1
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
2
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
2
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
1
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
1
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n768_instance.cpp
View file @
dec32dc6
...
@@ -6,7 +6,11 @@
...
@@ -6,7 +6,11 @@
// clang-format off
// clang-format off
// rm rn tm tn vn pd 2p
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
3
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
3
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
6
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
6
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
12
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
12
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
3
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
6
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
12
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n1024_instance.cpp
View file @
dec32dc6
...
@@ -15,8 +15,13 @@ template float moe_smoothquant_<trait_<ck_tile::fp16_t, 1, 16, 4, 64, 1, true
...
@@ -15,8 +15,13 @@ template float moe_smoothquant_<trait_<ck_tile::fp16_t, 1, 16, 4, 64, 1, true
template float moe_smoothquant_<trait_<ck_tile::fp16_t, 1, 1, 1, 256, 4, true ,false>>(const S&, A);
template float moe_smoothquant_<trait_<ck_tile::fp16_t, 1, 1, 1, 256, 4, true ,false>>(const S&, A);
#endif
#endif
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
1
,
2
,
128
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
1
,
2
,
128
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
2
,
128
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
2
,
2
,
128
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
2
,
128
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
2
,
128
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
1
,
256
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
256
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
1
,
2
,
128
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
2
,
128
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
2
,
128
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
256
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n1536_instance.cpp
View file @
dec32dc6
...
@@ -6,8 +6,13 @@
...
@@ -6,8 +6,13 @@
// clang-format off
// clang-format off
// rm rn tm tn vn pd 2p
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
3
,
4
,
64
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
3
,
4
,
64
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
3
,
2
,
128
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
3
,
2
,
128
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
3
,
1
,
256
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
3
,
1
,
256
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
6
,
1
,
256
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
6
,
1
,
256
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
3
,
4
,
64
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
3
,
2
,
128
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
3
,
1
,
256
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
6
,
1
,
256
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n2048_instance.cpp
View file @
dec32dc6
...
@@ -6,9 +6,13 @@
...
@@ -6,9 +6,13 @@
// clang-format off
// clang-format off
// rm rn tm tn vn pd 2p
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
1
,
1
,
256
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
1
,
1
,
256
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
2
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
1
,
256
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
256
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
8
,
1
,
256
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
8
,
1
,
256
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
1
,
1
,
256
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
256
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
8
,
1
,
256
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n256_instance.cpp
View file @
dec32dc6
...
@@ -6,7 +6,11 @@
...
@@ -6,7 +6,11 @@
// clang-format off
// clang-format off
// rm rn tm tn vn pd 2p
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
1
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
1
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
2
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
1
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n3072_instance.cpp
View file @
dec32dc6
...
@@ -6,9 +6,13 @@
...
@@ -6,9 +6,13 @@
// clang-format off
// clang-format off
// rm rn tm tn vn pd 2p
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
3
,
1
,
128
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
3
,
1
,
128
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
3
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
3
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
6
,
1
,
256
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
6
,
1
,
256
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
3
,
1
,
1024
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
3
,
1
,
1024
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
3
,
1
,
128
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
3
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
6
,
1
,
256
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
3
,
1
,
1024
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n4096_instance.cpp
View file @
dec32dc6
...
@@ -6,9 +6,13 @@
...
@@ -6,9 +6,13 @@
// clang-format off
// clang-format off
// rm rn tm tn vn pd 2p
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
1
,
256
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
2
,
1
,
256
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
1
,
256
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n4096_tp_instance.cpp
View file @
dec32dc6
...
@@ -6,9 +6,13 @@
...
@@ -6,9 +6,13 @@
// clang-format off
// clang-format off
// rm rn tm tn vn pd 2p
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
1
,
256
,
8
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
2
,
1
,
256
,
8
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
1
,
256
,
4
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
256
,
4
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
1
,
256
,
8
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
256
,
4
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
true
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n512_instance.cpp
View file @
dec32dc6
...
@@ -6,8 +6,13 @@
...
@@ -6,8 +6,13 @@
// clang-format off
// clang-format off
// rm rn tm tn vn pd 2p
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
1
,
4
,
64
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
1
,
4
,
64
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
2
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
8
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
8
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
1
,
4
,
64
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
8
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n64_n128_instance.cpp
View file @
dec32dc6
...
@@ -6,7 +6,11 @@
...
@@ -6,7 +6,11 @@
// clang-format off
// clang-format off
// rm rn tm tn vn pd 2p
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
1
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
1
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
1
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
1
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
2
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
1
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
1
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n768_instance.cpp
View file @
dec32dc6
...
@@ -6,7 +6,11 @@
...
@@ -6,7 +6,11 @@
// clang-format off
// clang-format off
// rm rn tm tn vn pd 2p
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
3
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
3
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
6
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
6
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
12
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
12
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
3
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
6
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
12
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fwd_api.cpp
View file @
dec32dc6
...
@@ -4,7 +4,8 @@
...
@@ -4,7 +4,8 @@
#include <ck_tile/core.hpp>
#include <ck_tile/core.hpp>
#include "moe_smoothquant.hpp"
#include "moe_smoothquant.hpp"
template
<
typename
DataType_
,
template
<
typename
InType
,
typename
OutType
,
ck_tile
::
index_t
Repeat_M_
,
// each thread repeat along M
ck_tile
::
index_t
Repeat_M_
,
// each thread repeat along M
ck_tile
::
index_t
Repeat_N_
,
// each thread repeat along N
ck_tile
::
index_t
Repeat_N_
,
// each thread repeat along N
ck_tile
::
index_t
ThreadPerBlock_M_
,
// num threads along M
ck_tile
::
index_t
ThreadPerBlock_M_
,
// num threads along M
...
@@ -12,7 +13,8 @@ template <typename DataType_,
...
@@ -12,7 +13,8 @@ template <typename DataType_,
ck_tile
::
index_t
Vector_N_
,
// vector size along N
ck_tile
::
index_t
Vector_N_
,
// vector size along N
bool
kPadN_
,
bool
kPadN_
,
bool
kTwoPass_
>
bool
kTwoPass_
>
using
trait_
=
moe_smoothquant_traits_
<
DataType_
,
using
trait_
=
moe_smoothquant_traits_
<
InType
,
OutType
,
Repeat_M_
,
Repeat_M_
,
Repeat_N_
,
Repeat_N_
,
ThreadPerBlock_M_
,
ThreadPerBlock_M_
,
...
@@ -21,7 +23,7 @@ using trait_ = moe_smoothquant_traits_<DataType_,
...
@@ -21,7 +23,7 @@ using trait_ = moe_smoothquant_traits_<DataType_,
kPadN_
,
kPadN_
,
kTwoPass_
>
;
kTwoPass_
>
;
template
<
typename
data
_type
>
template
<
typename
in_type
,
typename
out
_type
>
float
moe_smoothquant_dispatch
(
moe_smoothquant_traits
/*t*/
,
float
moe_smoothquant_dispatch
(
moe_smoothquant_traits
/*t*/
,
moe_smoothquant_args
a
,
moe_smoothquant_args
a
,
const
ck_tile
::
stream_config
&
s
)
const
ck_tile
::
stream_config
&
s
)
...
@@ -30,99 +32,99 @@ float moe_smoothquant_dispatch(moe_smoothquant_traits /*t*/,
...
@@ -30,99 +32,99 @@ float moe_smoothquant_dispatch(moe_smoothquant_traits /*t*/,
// clang-format off
// clang-format off
// rm rn tm tn vn pd 2p
// rm rn tm tn vn pd 2p
if
(
a
.
hidden_size
<=
64
)
{
if
(
a
.
hidden_size
<=
64
)
{
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
1
,
4
,
64
,
1
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
1
,
4
,
64
,
1
,
true
,
false
>>
(
s
,
a
);
}
}
else
if
(
a
.
hidden_size
<=
128
)
{
else
if
(
a
.
hidden_size
<=
128
)
{
if
(
a
.
hidden_size
%
2
==
0
)
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
1
,
4
,
64
,
2
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
1
,
4
,
64
,
2
,
true
,
false
>>
(
s
,
a
);
else
else
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
2
,
4
,
64
,
1
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
2
,
4
,
64
,
1
,
true
,
false
>>
(
s
,
a
);
}
}
else
if
(
a
.
hidden_size
<=
256
)
{
else
if
(
a
.
hidden_size
<=
256
)
{
if
(
a
.
hidden_size
%
4
==
0
)
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
1
,
4
,
64
,
4
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
1
,
4
,
64
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
2
,
4
,
64
,
2
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
2
,
4
,
64
,
2
,
true
,
false
>>
(
s
,
a
);
else
else
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
4
,
4
,
64
,
1
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
4
,
4
,
64
,
1
,
true
,
false
>>
(
s
,
a
);
}
}
else
if
(
a
.
hidden_size
<=
512
)
{
else
if
(
a
.
hidden_size
<=
512
)
{
if
(
a
.
hidden_size
%
8
==
0
)
if
(
a
.
hidden_size
%
8
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
1
,
4
,
64
,
8
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
1
,
4
,
64
,
8
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
4
==
0
)
else
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
2
,
4
,
64
,
4
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
2
,
4
,
64
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
4
,
4
,
64
,
2
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
4
,
4
,
64
,
2
,
true
,
false
>>
(
s
,
a
);
else
else
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
8
,
4
,
64
,
1
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
8
,
4
,
64
,
1
,
true
,
false
>>
(
s
,
a
);
}
}
else
if
(
a
.
hidden_size
<=
768
)
{
else
if
(
a
.
hidden_size
<=
768
)
{
if
(
a
.
hidden_size
%
4
==
0
)
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
3
,
4
,
64
,
4
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
3
,
4
,
64
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
6
,
4
,
64
,
2
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
6
,
4
,
64
,
2
,
true
,
false
>>
(
s
,
a
);
else
else
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
12
,
4
,
64
,
1
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
12
,
4
,
64
,
1
,
true
,
false
>>
(
s
,
a
);
}
}
else
if
(
a
.
hidden_size
<=
1024
)
{
else
if
(
a
.
hidden_size
<=
1024
)
{
if
(
a
.
hidden_size
%
8
==
0
)
if
(
a
.
hidden_size
%
8
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
1
,
2
,
128
,
8
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
1
,
2
,
128
,
8
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
4
==
0
)
else
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
2
,
2
,
128
,
4
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
2
,
2
,
128
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
4
,
2
,
128
,
2
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
4
,
2
,
128
,
2
,
true
,
false
>>
(
s
,
a
);
else
else
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
4
,
1
,
256
,
1
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
4
,
1
,
256
,
1
,
true
,
false
>>
(
s
,
a
);
}
}
else
if
(
a
.
hidden_size
<=
1536
)
{
else
if
(
a
.
hidden_size
<=
1536
)
{
if
(
a
.
hidden_size
%
8
==
0
)
if
(
a
.
hidden_size
%
8
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
3
,
4
,
64
,
8
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
3
,
4
,
64
,
8
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
4
==
0
)
else
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
3
,
2
,
128
,
4
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
3
,
2
,
128
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
3
,
1
,
256
,
2
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
3
,
1
,
256
,
2
,
true
,
false
>>
(
s
,
a
);
else
else
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
6
,
1
,
256
,
1
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
6
,
1
,
256
,
1
,
true
,
false
>>
(
s
,
a
);
}
}
else
if
(
a
.
hidden_size
<=
2048
)
{
else
if
(
a
.
hidden_size
<=
2048
)
{
if
(
a
.
hidden_size
%
8
==
0
)
if
(
a
.
hidden_size
%
8
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
1
,
1
,
256
,
8
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
1
,
1
,
256
,
8
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
4
==
0
)
else
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
2
,
1
,
256
,
4
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
2
,
1
,
256
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
4
,
1
,
256
,
2
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
4
,
1
,
256
,
2
,
true
,
false
>>
(
s
,
a
);
else
else
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
8
,
1
,
256
,
1
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
8
,
1
,
256
,
1
,
true
,
false
>>
(
s
,
a
);
}
}
else
if
(
a
.
hidden_size
<=
3072
)
{
else
if
(
a
.
hidden_size
<=
3072
)
{
if
(
a
.
hidden_size
%
8
==
0
)
if
(
a
.
hidden_size
%
8
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
3
,
1
,
128
,
8
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
3
,
1
,
128
,
8
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
4
==
0
)
else
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
3
,
1
,
256
,
4
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
3
,
1
,
256
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
6
,
1
,
256
,
2
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
6
,
1
,
256
,
2
,
true
,
false
>>
(
s
,
a
);
else
else
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
3
,
1
,
1024
,
1
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
3
,
1
,
1024
,
1
,
true
,
false
>>
(
s
,
a
);
}
}
else
if
(
a
.
hidden_size
<=
4096
)
{
else
if
(
a
.
hidden_size
<=
4096
)
{
if
(
a
.
hidden_size
%
8
==
0
)
if
(
a
.
hidden_size
%
8
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
2
,
1
,
256
,
8
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
2
,
1
,
256
,
8
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
4
==
0
)
else
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
4
,
1
,
256
,
4
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
4
,
1
,
256
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
2
,
1
,
1024
,
2
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
2
,
1
,
1024
,
2
,
true
,
false
>>
(
s
,
a
);
else
else
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
4
,
1
,
1024
,
1
,
true
,
false
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
4
,
1
,
1024
,
1
,
true
,
false
>>
(
s
,
a
);
}
}
else
if
(
a
.
hidden_size
>
4096
)
{
else
if
(
a
.
hidden_size
>
4096
)
{
if
(
a
.
hidden_size
%
8
==
0
)
if
(
a
.
hidden_size
%
8
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
2
,
1
,
256
,
8
,
true
,
true
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
2
,
1
,
256
,
8
,
true
,
true
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
4
==
0
)
else
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
4
,
1
,
256
,
4
,
true
,
true
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
4
,
1
,
256
,
4
,
true
,
true
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
2
,
1
,
1024
,
2
,
true
,
true
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
2
,
1
,
1024
,
2
,
true
,
true
>>
(
s
,
a
);
else
else
r
=
moe_smoothquant_
<
trait_
<
data
_type
,
1
,
4
,
1
,
1024
,
1
,
true
,
true
>>
(
s
,
a
);
r
=
moe_smoothquant_
<
trait_
<
in_type
,
out
_type
,
1
,
4
,
1
,
1024
,
1
,
true
,
true
>>
(
s
,
a
);
}
}
return
r
;
return
r
;
// clang-format on
// clang-format on
...
@@ -132,13 +134,21 @@ float moe_smoothquant(moe_smoothquant_traits t,
...
@@ -132,13 +134,21 @@ float moe_smoothquant(moe_smoothquant_traits t,
moe_smoothquant_args
a
,
moe_smoothquant_args
a
,
const
ck_tile
::
stream_config
&
s
)
const
ck_tile
::
stream_config
&
s
)
{
{
if
(
t
.
data
_type
.
compare
(
"fp16"
)
==
0
)
if
(
t
.
in
_type
.
compare
(
"fp16"
)
==
0
&&
t
.
out_type
==
"int8"
)
{
{
return
moe_smoothquant_dispatch
<
ck_tile
::
fp16_t
>
(
t
,
a
,
s
);
return
moe_smoothquant_dispatch
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
>
(
t
,
a
,
s
);
}
}
else
if
(
t
.
data
_type
.
compare
(
"
b
f16"
)
==
0
)
else
if
(
t
.
in
_type
.
compare
(
"f
p
16"
)
==
0
&&
t
.
out_type
==
"fp8"
)
{
{
return
moe_smoothquant_dispatch
<
ck_tile
::
bf16_t
>
(
t
,
a
,
s
);
return
moe_smoothquant_dispatch
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
>
(
t
,
a
,
s
);
}
else
if
(
t
.
in_type
.
compare
(
"bf16"
)
==
0
&&
t
.
out_type
==
"int8"
)
{
return
moe_smoothquant_dispatch
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
>
(
t
,
a
,
s
);
}
else
if
(
t
.
in_type
.
compare
(
"bf16"
)
==
0
&&
t
.
out_type
==
"fp8"
)
{
return
moe_smoothquant_dispatch
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
>
(
t
,
a
,
s
);
}
}
else
else
throw
std
::
runtime_error
(
"Without supported instances!"
);
throw
std
::
runtime_error
(
"Without supported instances!"
);
...
...
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_instance_common.hpp
View file @
dec32dc6
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
5
, Advanced Micro Devices, Inc. All rights reserved.
#include <ck_tile/core.hpp>
#include <ck_tile/core.hpp>
#include "moe_smoothquant.hpp"
#include "moe_smoothquant.hpp"
...
@@ -11,7 +11,8 @@
...
@@ -11,7 +11,8 @@
using
S
=
ck_tile
::
stream_config
;
using
S
=
ck_tile
::
stream_config
;
using
A
=
moe_smoothquant_args
;
using
A
=
moe_smoothquant_args
;
template
<
typename
DataType_
,
template
<
typename
InputType_
,
typename
OutputType_
,
ck_tile
::
index_t
Repeat_M_
,
// each thread repeat along M
ck_tile
::
index_t
Repeat_M_
,
// each thread repeat along M
ck_tile
::
index_t
Repeat_N_
,
// each thread repeat along N
ck_tile
::
index_t
Repeat_N_
,
// each thread repeat along N
ck_tile
::
index_t
ThreadPerBlock_M_
,
// num threads along M
ck_tile
::
index_t
ThreadPerBlock_M_
,
// num threads along M
...
@@ -19,7 +20,8 @@ template <typename DataType_,
...
@@ -19,7 +20,8 @@ template <typename DataType_,
ck_tile
::
index_t
Vector_N_
,
// vector size along N
ck_tile
::
index_t
Vector_N_
,
// vector size along N
bool
kPadN_
,
bool
kPadN_
,
bool
kTwoPass_
>
bool
kTwoPass_
>
using
trait_
=
moe_smoothquant_traits_
<
DataType_
,
using
trait_
=
moe_smoothquant_traits_
<
InputType_
,
OutputType_
,
Repeat_M_
,
Repeat_M_
,
Repeat_N_
,
Repeat_N_
,
ThreadPerBlock_M_
,
ThreadPerBlock_M_
,
...
@@ -31,14 +33,15 @@ using trait_ = moe_smoothquant_traits_<DataType_,
...
@@ -31,14 +33,15 @@ using trait_ = moe_smoothquant_traits_<DataType_,
template
<
typename
Traits_
>
template
<
typename
Traits_
>
float
moe_smoothquant_
(
const
S
&
s
,
A
a
)
float
moe_smoothquant_
(
const
S
&
s
,
A
a
)
{
{
using
DataType
=
typename
Traits_
::
DataType
;
using
InputType
=
typename
Traits_
::
InputType
;
using
OutputType
=
typename
Traits_
::
OutputType
;
using
PipelineProblem
=
ck_tile
::
SmoothquantPipelineProblem
<
using
PipelineProblem
=
ck_tile
::
SmoothquantPipelineProblem
<
typename
MoeSmoothquantTypeConfig
<
Data
Type
>::
XDataType
,
typename
MoeSmoothquantTypeConfig
<
InputType
,
Output
Type
>::
XDataType
,
typename
MoeSmoothquantTypeConfig
<
Data
Type
>::
X
ScaleDataType
,
typename
MoeSmoothquantTypeConfig
<
InputType
,
Output
Type
>::
Smooth
ScaleDataType
,
typename
MoeSmoothquantTypeConfig
<
Data
Type
>::
ComputeDataType
,
typename
MoeSmoothquantTypeConfig
<
InputType
,
Output
Type
>::
ComputeDataType
,
typename
MoeSmoothquantTypeConfig
<
Data
Type
>::
YScaleDataType
,
typename
MoeSmoothquantTypeConfig
<
InputType
,
Output
Type
>::
YScaleDataType
,
typename
MoeSmoothquantTypeConfig
<
Data
Type
>::
QYDataType
,
typename
MoeSmoothquantTypeConfig
<
InputType
,
Output
Type
>::
QYDataType
,
typename
Traits_
::
Shape
,
typename
Traits_
::
Shape
,
Traits_
::
kPadN
,
Traits_
::
kPadN
,
Traits_
::
kTwoPass
>
;
Traits_
::
kTwoPass
>
;
...
...
example/ck_tile/14_moe_smoothquant/moe_smoothquant.cpp
View file @
dec32dc6
...
@@ -63,7 +63,8 @@ auto create_args(int argc, char* argv[])
...
@@ -63,7 +63,8 @@ auto create_args(int argc, char* argv[])
.
insert
(
"stride"
,
"-1"
,
"stride per row, if -1 then equal to hidden_size"
)
.
insert
(
"stride"
,
"-1"
,
"stride per row, if -1 then equal to hidden_size"
)
.
insert
(
"v"
,
"1"
,
"cpu validation or not"
)
.
insert
(
"v"
,
"1"
,
"cpu validation or not"
)
.
insert
(
"kname"
,
"1"
,
"print kernel name or not"
)
.
insert
(
"kname"
,
"1"
,
"print kernel name or not"
)
.
insert
(
"prec"
,
"fp16"
,
"precision"
)
.
insert
(
"prec_i"
,
"fp16"
,
"input precision, fp16/bf16"
)
.
insert
(
"prec_o"
,
"int8"
,
"precision, int8/fp8"
)
.
insert
(
"warmup"
,
"5"
,
"cold iter"
)
.
insert
(
"warmup"
,
"5"
,
"cold iter"
)
.
insert
(
"repeat"
,
"20"
,
"hot iter"
);
.
insert
(
"repeat"
,
"20"
,
"hot iter"
);
...
@@ -71,7 +72,7 @@ auto create_args(int argc, char* argv[])
...
@@ -71,7 +72,7 @@ auto create_args(int argc, char* argv[])
return
std
::
make_tuple
(
result
,
arg_parser
);
return
std
::
make_tuple
(
result
,
arg_parser
);
}
}
template
<
typename
Data
Type
>
template
<
typename
InputType
,
typename
Output
Type
>
bool
run
(
const
ck_tile
::
ArgParser
&
arg_parser
)
bool
run
(
const
ck_tile
::
ArgParser
&
arg_parser
)
{
{
ck_tile
::
index_t
tokens
=
arg_parser
.
get_int
(
"t"
);
ck_tile
::
index_t
tokens
=
arg_parser
.
get_int
(
"t"
);
...
@@ -81,7 +82,8 @@ bool run(const ck_tile::ArgParser& arg_parser)
...
@@ -81,7 +82,8 @@ bool run(const ck_tile::ArgParser& arg_parser)
stride
=
hidden_size
;
stride
=
hidden_size
;
ck_tile
::
index_t
experts
=
arg_parser
.
get_int
(
"e"
);
ck_tile
::
index_t
experts
=
arg_parser
.
get_int
(
"e"
);
ck_tile
::
index_t
topk
=
arg_parser
.
get_int
(
"k"
);
ck_tile
::
index_t
topk
=
arg_parser
.
get_int
(
"k"
);
std
::
string
data_type
=
arg_parser
.
get_str
(
"prec"
);
std
::
string
prec_i
=
arg_parser
.
get_str
(
"prec_i"
);
std
::
string
prec_o
=
arg_parser
.
get_str
(
"prec_o"
);
int
kname
=
arg_parser
.
get_int
(
"kname"
);
int
kname
=
arg_parser
.
get_int
(
"kname"
);
int
do_validation
=
arg_parser
.
get_int
(
"v"
);
int
do_validation
=
arg_parser
.
get_int
(
"v"
);
int
warmup
=
arg_parser
.
get_int
(
"warmup"
);
int
warmup
=
arg_parser
.
get_int
(
"warmup"
);
...
@@ -89,17 +91,17 @@ bool run(const ck_tile::ArgParser& arg_parser)
...
@@ -89,17 +91,17 @@ bool run(const ck_tile::ArgParser& arg_parser)
assert
(
stride
>=
hidden_size
);
assert
(
stride
>=
hidden_size
);
using
TypeConfig
=
MoeSmoothquantTypeConfig
<
Data
Type
>
;
using
TypeConfig
=
MoeSmoothquantTypeConfig
<
InputType
,
Output
Type
>
;
using
XDataType
=
typename
TypeConfig
::
XDataType
;
using
XDataType
=
typename
TypeConfig
::
XDataType
;
using
X
ScaleDataType
=
typename
TypeConfig
::
X
ScaleDataType
;
using
Smooth
ScaleDataType
=
typename
TypeConfig
::
Smooth
ScaleDataType
;
using
YScaleDataType
=
typename
TypeConfig
::
YScaleDataType
;
using
YScaleDataType
=
typename
TypeConfig
::
YScaleDataType
;
using
QYDataType
=
typename
TypeConfig
::
QYDataType
;
using
QYDataType
=
typename
TypeConfig
::
QYDataType
;
using
ComputeDataType
=
typename
TypeConfig
::
ComputeDataType
;
using
ComputeDataType
=
typename
TypeConfig
::
ComputeDataType
;
// host verify
// host verify
ck_tile
::
HostTensor
<
XDataType
>
x_host
({
tokens
,
hidden_size
},
{
stride
,
1
});
ck_tile
::
HostTensor
<
XDataType
>
x_host
({
tokens
,
hidden_size
},
{
stride
,
1
});
ck_tile
::
HostTensor
<
X
ScaleDataType
>
x
scale_host
({
experts
*
hidden_size
});
ck_tile
::
HostTensor
<
Smooth
ScaleDataType
>
sm
scale_host
({
experts
*
hidden_size
});
ck_tile
::
HostTensor
<
ck_tile
::
index_t
>
topk_ids_host
({
tokens
,
topk
});
ck_tile
::
HostTensor
<
ck_tile
::
index_t
>
topk_ids_host
({
tokens
,
topk
});
ck_tile
::
HostTensor
<
YScaleDataType
>
yscale_host_ref
({
topk
*
tokens
},
{
1
});
ck_tile
::
HostTensor
<
YScaleDataType
>
yscale_host_ref
({
topk
*
tokens
},
{
1
});
...
@@ -110,26 +112,26 @@ bool run(const ck_tile::ArgParser& arg_parser)
...
@@ -110,26 +112,26 @@ bool run(const ck_tile::ArgParser& arg_parser)
topid_unique_gen
<
ck_tile
::
index_t
>
(
topk_ids_host
.
mData
,
tokens
,
topk
,
experts
,
11937
);
topid_unique_gen
<
ck_tile
::
index_t
>
(
topk_ids_host
.
mData
,
tokens
,
topk
,
experts
,
11937
);
ck_tile
::
FillUniformDistribution
<
XDataType
>
{
-
.5
f
,
.5
f
}(
x_host
);
ck_tile
::
FillUniformDistribution
<
XDataType
>
{
-
.5
f
,
.5
f
}(
x_host
);
ck_tile
::
FillUniformDistribution
<
X
ScaleDataType
>
{
1e-3
,
.5
f
}(
x
scale_host
);
ck_tile
::
FillUniformDistribution
<
Smooth
ScaleDataType
>
{
1e-3
,
.5
f
}(
sm
scale_host
);
ck_tile
::
DeviceMem
x_buf
(
x_host
.
get_element_space_size_in_bytes
());
ck_tile
::
DeviceMem
x_buf
(
x_host
.
get_element_space_size_in_bytes
());
ck_tile
::
DeviceMem
x
scale_buf
(
x
scale_host
.
get_element_space_size_in_bytes
());
ck_tile
::
DeviceMem
sm
scale_buf
(
sm
scale_host
.
get_element_space_size_in_bytes
());
ck_tile
::
DeviceMem
topk_ids_buf
(
topk_ids_host
.
get_element_space_size_in_bytes
());
ck_tile
::
DeviceMem
topk_ids_buf
(
topk_ids_host
.
get_element_space_size_in_bytes
());
ck_tile
::
DeviceMem
yscale_buf
(
yscale_host_dev
.
get_element_space_size_in_bytes
());
ck_tile
::
DeviceMem
yscale_buf
(
yscale_host_dev
.
get_element_space_size_in_bytes
());
ck_tile
::
DeviceMem
qy_buf
(
qy_host_dev
.
get_element_space_size_in_bytes
());
ck_tile
::
DeviceMem
qy_buf
(
qy_host_dev
.
get_element_space_size_in_bytes
());
x_buf
.
ToDevice
(
x_host
.
data
());
x_buf
.
ToDevice
(
x_host
.
data
());
x
scale_buf
.
ToDevice
(
x
scale_host
.
data
());
sm
scale_buf
.
ToDevice
(
sm
scale_host
.
data
());
topk_ids_buf
.
ToDevice
(
topk_ids_host
.
data
());
topk_ids_buf
.
ToDevice
(
topk_ids_host
.
data
());
std
::
cout
<<
"["
<<
data_type
<<
"]"
std
::
cout
<<
"["
<<
prec_i
<<
"-"
<<
prec_o
<<
"]"
<<
" tokens:"
<<
tokens
<<
", hidden_size:"
<<
hidden_size
<<
", stride:"
<<
stride
<<
" tokens:"
<<
tokens
<<
", hidden_size:"
<<
hidden_size
<<
", stride:"
<<
stride
<<
", experts:"
<<
experts
<<
", topk:"
<<
topk
<<
std
::
flush
;
<<
", experts:"
<<
experts
<<
", topk:"
<<
topk
<<
std
::
flush
;
moe_smoothquant_traits
traits
{
data_type
};
moe_smoothquant_traits
traits
{
prec_i
,
prec_o
};
moe_smoothquant_args
args
{
x_buf
.
GetDeviceBuffer
(),
moe_smoothquant_args
args
{
x_buf
.
GetDeviceBuffer
(),
x
scale_buf
.
GetDeviceBuffer
(),
sm
scale_buf
.
GetDeviceBuffer
(),
topk_ids_buf
.
GetDeviceBuffer
(),
topk_ids_buf
.
GetDeviceBuffer
(),
yscale_buf
.
GetDeviceBuffer
(),
yscale_buf
.
GetDeviceBuffer
(),
qy_buf
.
GetDeviceBuffer
(),
qy_buf
.
GetDeviceBuffer
(),
...
@@ -143,9 +145,10 @@ bool run(const ck_tile::ArgParser& arg_parser)
...
@@ -143,9 +145,10 @@ bool run(const ck_tile::ArgParser& arg_parser)
float
ave_time
=
moe_smoothquant
(
float
ave_time
=
moe_smoothquant
(
traits
,
args
,
ck_tile
::
stream_config
{
nullptr
,
true
,
kname
?
1
:
0
,
warmup
,
repeat
});
traits
,
args
,
ck_tile
::
stream_config
{
nullptr
,
true
,
kname
?
1
:
0
,
warmup
,
repeat
});
std
::
size_t
num_byte
=
std
::
size_t
num_byte
=
sizeof
(
XDataType
)
*
tokens
*
hidden_size
+
sizeof
(
XDataType
)
*
tokens
*
hidden_size
+
sizeof
(
XScaleDataType
)
*
topk
*
hidden_size
+
sizeof
(
SmoothScaleDataType
)
*
topk
*
hidden_size
+
sizeof
(
YScaleDataType
)
*
topk
*
tokens
+
sizeof
(
QYDataType
)
*
topk
*
tokens
*
hidden_size
;
sizeof
(
YScaleDataType
)
*
topk
*
tokens
+
sizeof
(
QYDataType
)
*
topk
*
tokens
*
hidden_size
;
float
gb_per_sec
=
num_byte
/
1.E6
/
ave_time
;
float
gb_per_sec
=
num_byte
/
1.E6
/
ave_time
;
std
::
cout
<<
", "
<<
ave_time
*
1.E3
<<
" us, "
<<
gb_per_sec
<<
" GB/s"
<<
std
::
flush
;
std
::
cout
<<
", "
<<
ave_time
*
1.E3
<<
" us, "
<<
gb_per_sec
<<
" GB/s"
<<
std
::
flush
;
...
@@ -165,11 +168,11 @@ bool run(const ck_tile::ArgParser& arg_parser)
...
@@ -165,11 +168,11 @@ bool run(const ck_tile::ArgParser& arg_parser)
for
(
int
i_h
=
0
;
i_h
<
hidden_size
;
++
i_h
)
for
(
int
i_h
=
0
;
i_h
<
hidden_size
;
++
i_h
)
{
{
auto
v_
x
scale
=
ck_tile
::
type_convert
<
ComputeDataType
>
(
auto
v_
sm
scale
=
ck_tile
::
type_convert
<
ComputeDataType
>
(
x
scale_host
(
i_expert
*
hidden_size
+
i_h
));
sm
scale_host
(
i_expert
*
hidden_size
+
i_h
));
auto
v_x
=
ck_tile
::
type_convert
<
ComputeDataType
>
(
x_host
(
i_token
,
i_h
));
auto
v_x
=
ck_tile
::
type_convert
<
ComputeDataType
>
(
x_host
(
i_token
,
i_h
));
// y_host(i_token * topk + i_topk, i_h) = v_x * v_
x
scale;
// y_host(i_token * topk + i_topk, i_h) = v_x * v_
sm
scale;
y_host
(
i_topk
*
tokens
+
i_token
,
i_h
)
=
v_x
*
v_
x
scale
;
y_host
(
i_topk
*
tokens
+
i_token
,
i_h
)
=
v_x
*
v_
sm
scale
;
}
}
}
}
};
};
...
@@ -250,14 +253,23 @@ int main(int argc, char* argv[])
...
@@ -250,14 +253,23 @@ int main(int argc, char* argv[])
if
(
!
result
)
if
(
!
result
)
return
-
1
;
return
-
1
;
const
std
::
string
data_type
=
arg_parser
.
get_str
(
"prec"
);
const
std
::
string
prec_i
=
arg_parser
.
get_str
(
"prec_i"
);
if
(
data_type
==
"fp16"
)
const
std
::
string
prec_o
=
arg_parser
.
get_str
(
"prec_o"
);
if
(
prec_i
==
"fp16"
&&
prec_o
==
"int8"
)
{
return
run
<
ck_tile
::
half_t
,
ck_tile
::
int8_t
>
(
arg_parser
)
?
0
:
-
2
;
}
else
if
(
prec_i
==
"fp16"
&&
prec_o
==
"fp8"
)
{
return
run
<
ck_tile
::
half_t
,
ck_tile
::
fp8_t
>
(
arg_parser
)
?
0
:
-
2
;
}
else
if
(
prec_i
==
"bf16"
&&
prec_o
==
"int8"
)
{
{
return
run
<
ck_tile
::
half
_t
>
(
arg_parser
)
?
0
:
-
2
;
return
run
<
ck_tile
::
bf16_t
,
ck_tile
::
int8
_t
>
(
arg_parser
)
?
0
:
-
2
;
}
}
else
if
(
data_type
==
"bf16
"
)
else
if
(
prec_i
==
"bf16"
&&
prec_o
==
"fp8
"
)
{
{
return
run
<
ck_tile
::
bf16_t
>
(
arg_parser
)
?
0
:
-
2
;
return
run
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
>
(
arg_parser
)
?
0
:
-
2
;
}
}
return
-
3
;
return
-
3
;
...
...
example/ck_tile/14_moe_smoothquant/moe_smoothquant.hpp
View file @
dec32dc6
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
5
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#pragma once
...
@@ -8,26 +8,13 @@
...
@@ -8,26 +8,13 @@
#include "ck_tile/ops/smoothquant.hpp"
#include "ck_tile/ops/smoothquant.hpp"
#include <string>
#include <string>
template
<
typename
DataType
>
template
<
typename
InputType
,
typename
OutputType
>
struct
MoeSmoothquantTypeConfig
;
struct
MoeSmoothquantTypeConfig
template
<
>
struct
MoeSmoothquantTypeConfig
<
ck_tile
::
half_t
>
{
using
XDataType
=
ck_tile
::
half_t
;
using
XScaleDataType
=
float
;
using
YScaleDataType
=
float
;
using
QYDataType
=
ck_tile
::
int8_t
;
using
ComputeDataType
=
float
;
};
template
<
>
struct
MoeSmoothquantTypeConfig
<
ck_tile
::
bf16_t
>
{
{
using
XDataType
=
ck_tile
::
bf16_t
;
using
XDataType
=
InputType
;
using
X
ScaleDataType
=
float
;
using
Smooth
ScaleDataType
=
float
;
using
YScaleDataType
=
float
;
using
YScaleDataType
=
float
;
using
QYDataType
=
ck_tile
::
int8_t
;
using
QYDataType
=
OutputType
;
using
ComputeDataType
=
float
;
using
ComputeDataType
=
float
;
};
};
...
@@ -37,7 +24,8 @@ struct moe_smoothquant_args : public ck_tile::MoeSmoothquantHostArgs
...
@@ -37,7 +24,8 @@ struct moe_smoothquant_args : public ck_tile::MoeSmoothquantHostArgs
};
};
// this is used to pattern-match internl kernel implementation, not to instantiate kernel
// this is used to pattern-match internl kernel implementation, not to instantiate kernel
template
<
typename
DataType_
,
template
<
typename
InputType_
,
typename
OutputType_
,
ck_tile
::
index_t
Repeat_M_
,
// each thread repeat along M
ck_tile
::
index_t
Repeat_M_
,
// each thread repeat along M
ck_tile
::
index_t
Repeat_N_
,
// each thread repeat along N
ck_tile
::
index_t
Repeat_N_
,
// each thread repeat along N
ck_tile
::
index_t
ThreadPerBlock_M_
,
// num threads along M
ck_tile
::
index_t
ThreadPerBlock_M_
,
// num threads along M
...
@@ -47,7 +35,8 @@ template <typename DataType_,
...
@@ -47,7 +35,8 @@ template <typename DataType_,
bool
kTwoPass_
>
bool
kTwoPass_
>
struct
moe_smoothquant_traits_
struct
moe_smoothquant_traits_
{
{
using
DataType
=
ck_tile
::
remove_cvref_t
<
DataType_
>
;
using
InputType
=
ck_tile
::
remove_cvref_t
<
InputType_
>
;
using
OutputType
=
ck_tile
::
remove_cvref_t
<
OutputType_
>
;
static
constexpr
bool
is_warp_per_row
=
ThreadPerBlock_N_
<=
warpSize
;
static
constexpr
bool
is_warp_per_row
=
ThreadPerBlock_N_
<=
warpSize
;
static_assert
((
ThreadPerBlock_M_
*
ThreadPerBlock_N_
)
%
warpSize
==
0
);
static_assert
((
ThreadPerBlock_M_
*
ThreadPerBlock_N_
)
%
warpSize
==
0
);
...
@@ -108,7 +97,8 @@ float moe_smoothquant_(const ck_tile::stream_config& s, moe_smoothquant_args a);
...
@@ -108,7 +97,8 @@ float moe_smoothquant_(const ck_tile::stream_config& s, moe_smoothquant_args a);
// This is the public API, will be generated by script
// This is the public API, will be generated by script
struct
moe_smoothquant_traits
struct
moe_smoothquant_traits
{
{
std
::
string
data_type
;
std
::
string
in_type
;
// input type
std
::
string
out_type
;
// output type
};
};
float
moe_smoothquant
(
moe_smoothquant_traits
,
moe_smoothquant_args
,
const
ck_tile
::
stream_config
&
);
float
moe_smoothquant
(
moe_smoothquant_traits
,
moe_smoothquant_args
,
const
ck_tile
::
stream_config
&
);
example/ck_tile/14_moe_smoothquant/script/smoke_test.sh
View file @
dec32dc6
...
@@ -2,29 +2,31 @@
...
@@ -2,29 +2,31 @@
EXE
=
build/bin/tile_example_moe_smoothquant
EXE
=
build/bin/tile_example_moe_smoothquant
for
pr_i
in
"fp16"
"bf16"
;
do
for
pr_i
in
"fp16"
"bf16"
;
do
$EXE
-prec
=
$pr_i
-t
=
99
-h
=
13
for
pr_o
in
"int8"
"fp8"
;
do
$EXE
-prec
=
$pr_i
-t
=
17
-h
=
16
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
99
-h
=
13
$EXE
-prec
=
$pr_i
-t
=
1
-h
=
100
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
17
-h
=
16
$EXE
-prec
=
$pr_i
-t
=
4
-h
=
128
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
1
-h
=
100
$EXE
-prec
=
$pr_i
-t
=
80
-h
=
127
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
4
-h
=
128
$EXE
-prec
=
$pr_i
-t
=
22
-h
=
255
-stride
=
256
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
80
-h
=
127
$EXE
-prec
=
$pr_i
-t
=
7
-h
=
599
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
22
-h
=
255
-stride
=
256
$EXE
-prec
=
$pr_i
-t
=
19
-h
=
512
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
7
-h
=
599
$EXE
-prec
=
$pr_i
-t
=
33
-h
=
313
-stride
=
1000
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
19
-h
=
512
$EXE
-prec
=
$pr_i
-t
=
11
-h
=
510
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
33
-h
=
313
-stride
=
1000
$EXE
-prec
=
$pr_i
-t
=
171
-h
=
676
-stride
=
818
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
11
-h
=
510
$EXE
-prec
=
$pr_i
-t
=
91
-h
=
636
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
171
-h
=
676
-stride
=
818
$EXE
-prec
=
$pr_i
-t
=
12
-h
=
768
-stride
=
800
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
91
-h
=
636
$EXE
-prec
=
$pr_i
-t
=
100
-h
=
766
-stride
=
812
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
12
-h
=
768
-stride
=
800
$EXE
-prec
=
$pr_i
-t
=
31
-h
=
1024
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
100
-h
=
766
-stride
=
812
$EXE
-prec
=
$pr_i
-t
=
64
-h
=
1000
-stride
=
1004
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
31
-h
=
1024
$EXE
-prec
=
$pr_i
-t
=
8
-h
=
1501
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
64
-h
=
1000
-stride
=
1004
$EXE
-prec
=
$pr_i
-t
=
3
-h
=
1826
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
8
-h
=
1501
$EXE
-prec
=
$pr_i
-t
=
5
-h
=
2040
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
3
-h
=
1826
$EXE
-prec
=
$pr_i
-t
=
7
-h
=
2734
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
5
-h
=
2040
$EXE
-prec
=
$pr_i
-t
=
1
-h
=
3182
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
7
-h
=
2734
$EXE
-prec
=
$pr_i
-t
=
9
-h
=
4096
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
1
-h
=
3182
$EXE
-prec
=
$pr_i
-t
=
3
-h
=
8192
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
9
-h
=
4096
$EXE
-prec
=
$pr_i
-t
=
1
-h
=
10547
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
3
-h
=
8192
$EXE
-prec
=
$pr_i
-t
=
3
-h
=
17134
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
1
-h
=
10547
$EXE
-prec_i
=
$pr_i
-prec_o
=
$pr_o
-t
=
3
-h
=
17134
done
done
done
Prev
1
2
3
4
5
6
7
8
…
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment