Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
4525c5d7
Commit
4525c5d7
authored
Dec 02, 2024
by
coderfeli
Browse files
merge upstream
parents
a8d88d8d
44828b7c
Changes
308
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
403 additions
and
0 deletions
+403
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n1536_instance.cpp
...thquant/instances/moe_smoothquant_bf16_n1536_instance.cpp
+13
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n2048_instance.cpp
...thquant/instances/moe_smoothquant_bf16_n2048_instance.cpp
+14
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n256_instance.cpp
...othquant/instances/moe_smoothquant_bf16_n256_instance.cpp
+12
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n3072_instance.cpp
...thquant/instances/moe_smoothquant_bf16_n3072_instance.cpp
+14
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n4096_instance.cpp
...thquant/instances/moe_smoothquant_bf16_n4096_instance.cpp
+14
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n4096_tp_instance.cpp
...uant/instances/moe_smoothquant_bf16_n4096_tp_instance.cpp
+14
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n512_instance.cpp
...othquant/instances/moe_smoothquant_bf16_n512_instance.cpp
+13
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n64_n128_instance.cpp
...uant/instances/moe_smoothquant_bf16_n64_n128_instance.cpp
+12
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n768_instance.cpp
...othquant/instances/moe_smoothquant_bf16_n768_instance.cpp
+12
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n1024_instance.cpp
...thquant/instances/moe_smoothquant_fp16_n1024_instance.cpp
+22
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n1536_instance.cpp
...thquant/instances/moe_smoothquant_fp16_n1536_instance.cpp
+13
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n2048_instance.cpp
...thquant/instances/moe_smoothquant_fp16_n2048_instance.cpp
+14
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n256_instance.cpp
...othquant/instances/moe_smoothquant_fp16_n256_instance.cpp
+12
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n3072_instance.cpp
...thquant/instances/moe_smoothquant_fp16_n3072_instance.cpp
+14
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n4096_instance.cpp
...thquant/instances/moe_smoothquant_fp16_n4096_instance.cpp
+14
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n4096_tp_instance.cpp
...uant/instances/moe_smoothquant_fp16_n4096_tp_instance.cpp
+14
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n512_instance.cpp
...othquant/instances/moe_smoothquant_fp16_n512_instance.cpp
+13
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n64_n128_instance.cpp
...uant/instances/moe_smoothquant_fp16_n64_n128_instance.cpp
+12
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n768_instance.cpp
...othquant/instances/moe_smoothquant_fp16_n768_instance.cpp
+12
-0
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fwd_api.cpp
.../14_moe_smoothquant/instances/moe_smoothquant_fwd_api.cpp
+145
-0
No files found.
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n1536_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
3
,
4
,
64
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
3
,
2
,
128
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
3
,
1
,
256
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
6
,
1
,
256
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n2048_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
1
,
1
,
256
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
2
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
4
,
1
,
256
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
8
,
1
,
256
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n256_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
1
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
2
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
4
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n3072_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
3
,
1
,
128
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
3
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
6
,
1
,
256
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
3
,
1
,
1024
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n4096_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
2
,
1
,
256
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
4
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n4096_tp_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
2
,
1
,
256
,
8
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
4
,
1
,
256
,
4
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
true
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n512_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
1
,
4
,
64
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
2
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
4
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
8
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n64_n128_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
1
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
1
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
2
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_bf16_n768_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
3
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
6
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
bf16_t
,
1
,
12
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n1024_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
#if 0
template float moe_smoothquant_<trait_<ck_tile::fp16_t, 1, 2, 4, 64, 8, true ,false>>(const S&, A);
template float moe_smoothquant_<trait_<ck_tile::fp16_t, 1, 4, 4, 64, 4, true ,false>>(const S&, A);
template float moe_smoothquant_<trait_<ck_tile::fp16_t, 1, 8, 4, 64, 2, true ,false>>(const S&, A);
template float moe_smoothquant_<trait_<ck_tile::fp16_t, 1, 16, 4, 64, 1, true ,false>>(const S&, A);
template float moe_smoothquant_<trait_<ck_tile::fp16_t, 1, 1, 1, 256, 4, true ,false>>(const S&, A);
#endif
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
1
,
2
,
128
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
2
,
128
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
2
,
128
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
1
,
256
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n1536_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
3
,
4
,
64
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
3
,
2
,
128
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
3
,
1
,
256
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
6
,
1
,
256
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n2048_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
1
,
1
,
256
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
1
,
256
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
8
,
1
,
256
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n256_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
1
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n3072_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
3
,
1
,
128
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
3
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
6
,
1
,
256
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
3
,
1
,
1024
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n4096_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
1
,
256
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
1
,
256
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n4096_tp_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
1
,
256
,
8
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
1
,
256
,
4
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
1
,
1024
,
2
,
true
,
true
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
1
,
1024
,
1
,
true
,
true
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n512_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
1
,
4
,
64
,
8
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
4
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
8
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n64_n128_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
1
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
1
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
2
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fp16_n768_instance.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "moe_smoothquant_instance_common.hpp"
// clang-format off
// rm rn tm tn vn pd 2p
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
3
,
4
,
64
,
4
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
6
,
4
,
64
,
2
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
moe_smoothquant_
<
trait_
<
ck_tile
::
fp16_t
,
1
,
12
,
4
,
64
,
1
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
example/ck_tile/14_moe_smoothquant/instances/moe_smoothquant_fwd_api.cpp
0 → 100644
View file @
4525c5d7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <ck_tile/core.hpp>
#include "moe_smoothquant.hpp"
template
<
typename
DataType_
,
ck_tile
::
index_t
Repeat_M_
,
// each thread repeat along M
ck_tile
::
index_t
Repeat_N_
,
// each thread repeat along N
ck_tile
::
index_t
ThreadPerBlock_M_
,
// num threads along M
ck_tile
::
index_t
ThreadPerBlock_N_
,
// num threads along N
ck_tile
::
index_t
Vector_N_
,
// vector size along N
bool
kPadN_
,
bool
kTwoPass_
>
using
trait_
=
moe_smoothquant_traits_
<
DataType_
,
Repeat_M_
,
Repeat_N_
,
ThreadPerBlock_M_
,
ThreadPerBlock_N_
,
Vector_N_
,
kPadN_
,
kTwoPass_
>
;
template
<
typename
data_type
>
float
moe_smoothquant_dispatch
(
moe_smoothquant_traits
/*t*/
,
moe_smoothquant_args
a
,
const
ck_tile
::
stream_config
&
s
)
{
float
r
=
-
1
;
// clang-format off
// rm rn tm tn vn pd 2p
if
(
a
.
hidden_size
<=
64
)
{
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
1
,
4
,
64
,
1
,
true
,
false
>>
(
s
,
a
);
}
else
if
(
a
.
hidden_size
<=
128
)
{
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
1
,
4
,
64
,
2
,
true
,
false
>>
(
s
,
a
);
else
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
2
,
4
,
64
,
1
,
true
,
false
>>
(
s
,
a
);
}
else
if
(
a
.
hidden_size
<=
256
)
{
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
1
,
4
,
64
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
2
,
4
,
64
,
2
,
true
,
false
>>
(
s
,
a
);
else
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
4
,
4
,
64
,
1
,
true
,
false
>>
(
s
,
a
);
}
else
if
(
a
.
hidden_size
<=
512
)
{
if
(
a
.
hidden_size
%
8
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
1
,
4
,
64
,
8
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
2
,
4
,
64
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
4
,
4
,
64
,
2
,
true
,
false
>>
(
s
,
a
);
else
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
8
,
4
,
64
,
1
,
true
,
false
>>
(
s
,
a
);
}
else
if
(
a
.
hidden_size
<=
768
)
{
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
3
,
4
,
64
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
6
,
4
,
64
,
2
,
true
,
false
>>
(
s
,
a
);
else
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
12
,
4
,
64
,
1
,
true
,
false
>>
(
s
,
a
);
}
else
if
(
a
.
hidden_size
<=
1024
)
{
if
(
a
.
hidden_size
%
8
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
1
,
2
,
128
,
8
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
2
,
2
,
128
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
4
,
2
,
128
,
2
,
true
,
false
>>
(
s
,
a
);
else
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
4
,
1
,
256
,
1
,
true
,
false
>>
(
s
,
a
);
}
else
if
(
a
.
hidden_size
<=
1536
)
{
if
(
a
.
hidden_size
%
8
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
3
,
4
,
64
,
8
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
3
,
2
,
128
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
3
,
1
,
256
,
2
,
true
,
false
>>
(
s
,
a
);
else
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
6
,
1
,
256
,
1
,
true
,
false
>>
(
s
,
a
);
}
else
if
(
a
.
hidden_size
<=
2048
)
{
if
(
a
.
hidden_size
%
8
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
1
,
1
,
256
,
8
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
2
,
1
,
256
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
4
,
1
,
256
,
2
,
true
,
false
>>
(
s
,
a
);
else
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
8
,
1
,
256
,
1
,
true
,
false
>>
(
s
,
a
);
}
else
if
(
a
.
hidden_size
<=
3072
)
{
if
(
a
.
hidden_size
%
8
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
3
,
1
,
128
,
8
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
3
,
1
,
256
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
6
,
1
,
256
,
2
,
true
,
false
>>
(
s
,
a
);
else
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
3
,
1
,
1024
,
1
,
true
,
false
>>
(
s
,
a
);
}
else
if
(
a
.
hidden_size
<=
4096
)
{
if
(
a
.
hidden_size
%
8
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
2
,
1
,
256
,
8
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
4
,
1
,
256
,
4
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
2
,
1
,
1024
,
2
,
true
,
false
>>
(
s
,
a
);
else
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
4
,
1
,
1024
,
1
,
true
,
false
>>
(
s
,
a
);
}
else
if
(
a
.
hidden_size
>
4096
)
{
if
(
a
.
hidden_size
%
8
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
2
,
1
,
256
,
8
,
true
,
true
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
4
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
4
,
1
,
256
,
4
,
true
,
true
>>
(
s
,
a
);
else
if
(
a
.
hidden_size
%
2
==
0
)
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
2
,
1
,
1024
,
2
,
true
,
true
>>
(
s
,
a
);
else
r
=
moe_smoothquant_
<
trait_
<
data_type
,
1
,
4
,
1
,
1024
,
1
,
true
,
true
>>
(
s
,
a
);
}
return
r
;
// clang-format on
}
float
moe_smoothquant
(
moe_smoothquant_traits
t
,
moe_smoothquant_args
a
,
const
ck_tile
::
stream_config
&
s
)
{
if
(
t
.
data_type
.
compare
(
"fp16"
)
==
0
)
{
return
moe_smoothquant_dispatch
<
ck_tile
::
fp16_t
>
(
t
,
a
,
s
);
}
else
if
(
t
.
data_type
.
compare
(
"bf16"
)
==
0
)
{
return
moe_smoothquant_dispatch
<
ck_tile
::
bf16_t
>
(
t
,
a
,
s
);
}
else
throw
std
::
runtime_error
(
"Without supported instances!"
);
}
Prev
1
2
3
4
5
6
7
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment