Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
b755f375
Commit
b755f375
authored
Jan 13, 2025
by
aska-0096
Browse files
add save_x=true instance
parent
35ba0864
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
57 additions
and
29 deletions
+57
-29
example/ck_tile/11_add_rmsnorm2d_rdquant/instances/add_rmsnorm2d_rdquant_fwd_api.cpp
...orm2d_rdquant/instances/add_rmsnorm2d_rdquant_fwd_api.cpp
+40
-28
example/ck_tile/11_add_rmsnorm2d_rdquant/instances/add_rmsnorm2d_rdquant_fwd_bf16_n8192_instance.cpp
...stances/add_rmsnorm2d_rdquant_fwd_bf16_n8192_instance.cpp
+8
-0
example/ck_tile/11_add_rmsnorm2d_rdquant/instances/add_rmsnorm2d_rdquant_fwd_fp16_n8192_instance.cpp
...stances/add_rmsnorm2d_rdquant_fwd_fp16_n8192_instance.cpp
+9
-1
No files found.
example/ck_tile/11_add_rmsnorm2d_rdquant/instances/add_rmsnorm2d_rdquant_fwd_api.cpp
View file @
b755f375
...
@@ -120,36 +120,48 @@ float add_rmsnorm2d_rdquant_fwd_b16_(add_rmsnorm2d_rdquant_fwd_traits t,
...
@@ -120,36 +120,48 @@ float add_rmsnorm2d_rdquant_fwd_b16_(add_rmsnorm2d_rdquant_fwd_traits t,
}
}
else
if
(
a
.
n
<=
8192
)
{
else
if
(
a
.
n
<=
8192
)
{
if
(
a
.
n
<
8192
){
if
(
a
.
n
<
8192
){
if
(
t
.
save_x
){
if
(
t
.
save_x
){
if
(
a
.
n
%
8
==
0
)
if
(
a
.
n
%
8
==
0
)
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
2
,
1
,
512
,
8
,
true
,
true
,
false
>>
(
s
,
a
);
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
2
,
1
,
512
,
8
,
true
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
n
%
4
==
0
)
else
if
(
a
.
n
%
4
==
0
)
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
4
,
1
,
512
,
4
,
true
,
true
,
false
>>
(
s
,
a
);
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
4
,
1
,
512
,
4
,
true
,
true
,
false
>>
(
s
,
a
);
else
if
(
a
.
n
%
2
==
0
)
else
if
(
a
.
n
%
2
==
0
)
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
4
,
1
,
1024
,
2
,
true
,
true
,
false
>>
(
s
,
a
);
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
4
,
1
,
1024
,
2
,
true
,
true
,
false
>>
(
s
,
a
);
else
else
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
8
,
1
,
1024
,
1
,
true
,
true
,
false
>>
(
s
,
a
);
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
8
,
1
,
1024
,
1
,
true
,
true
,
false
>>
(
s
,
a
);
}
else
{
if
(
a
.
n
%
8
==
0
)
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
2
,
1
,
512
,
8
,
true
,
false
,
false
>>
(
s
,
a
);
else
if
(
a
.
n
%
4
==
0
)
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
4
,
1
,
512
,
4
,
true
,
false
,
false
>>
(
s
,
a
);
else
if
(
a
.
n
%
2
==
0
)
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
4
,
1
,
1024
,
2
,
true
,
false
,
false
>>
(
s
,
a
);
else
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
8
,
1
,
1024
,
1
,
true
,
false
,
false
>>
(
s
,
a
);
}
}
}
else
{
else
{
if
(
a
.
n
%
8
==
0
)
if
(
t
.
save_x
){
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
2
,
1
,
512
,
8
,
true
,
false
,
false
>>
(
s
,
a
);
if
(
a
.
n
%
8
==
0
)
else
if
(
a
.
n
%
4
==
0
)
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
2
,
1
,
512
,
8
,
false
,
true
,
false
>>
(
s
,
a
);
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
4
,
1
,
512
,
4
,
true
,
false
,
false
>>
(
s
,
a
);
else
if
(
a
.
n
%
4
==
0
)
else
if
(
a
.
n
%
2
==
0
)
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
4
,
1
,
512
,
4
,
false
,
true
,
false
>>
(
s
,
a
);
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
4
,
1
,
1024
,
2
,
true
,
false
,
false
>>
(
s
,
a
);
else
if
(
a
.
n
%
2
==
0
)
else
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
4
,
1
,
1024
,
2
,
false
,
true
,
false
>>
(
s
,
a
);
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
8
,
1
,
1024
,
1
,
true
,
false
,
false
>>
(
s
,
a
);
else
}
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
8
,
1
,
1024
,
1
,
false
,
true
,
false
>>
(
s
,
a
);
}
}
else
{
else
{
if
(
a
.
n
%
8
==
0
)
if
(
a
.
n
%
8
==
0
)
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
2
,
1
,
512
,
8
,
false
,
false
,
false
>>
(
s
,
a
);
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
2
,
1
,
512
,
8
,
false
,
false
,
false
>>
(
s
,
a
);
else
if
(
a
.
n
%
4
==
0
)
else
if
(
a
.
n
%
4
==
0
)
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
4
,
1
,
512
,
4
,
false
,
false
,
false
>>
(
s
,
a
);
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
4
,
1
,
512
,
4
,
false
,
false
,
false
>>
(
s
,
a
);
else
if
(
a
.
n
%
2
==
0
)
else
if
(
a
.
n
%
2
==
0
)
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
4
,
1
,
1024
,
2
,
false
,
false
,
false
>>
(
s
,
a
);
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
4
,
1
,
1024
,
2
,
false
,
false
,
false
>>
(
s
,
a
);
else
else
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
8
,
1
,
1024
,
1
,
false
,
false
,
false
>>
(
s
,
a
);
r
=
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
input_data_type
,
quantized_data_type
,
1
,
8
,
1
,
1024
,
1
,
false
,
false
,
false
>>
(
s
,
a
);
}
}
}
}
}
else
if
(
a
.
n
>
8192
)
{
else
if
(
a
.
n
>
8192
)
{
...
...
example/ck_tile/11_add_rmsnorm2d_rdquant/instances/add_rmsnorm2d_rdquant_fwd_bf16_n8192_instance.cpp
View file @
b755f375
...
@@ -14,6 +14,10 @@ template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::bf16_t, ck_tile::int8
...
@@ -14,6 +14,10 @@ template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::bf16_t, ck_tile::int8
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
512
,
4
,
true
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
512
,
4
,
true
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
1024
,
2
,
true
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
1024
,
2
,
true
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
8
,
1
,
1024
,
1
,
true
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
8
,
1
,
1024
,
1
,
true
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
2
,
1
,
512
,
8
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
512
,
4
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
1024
,
2
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
8
,
1
,
1024
,
1
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
2
,
1
,
512
,
8
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
2
,
1
,
512
,
8
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
512
,
4
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
512
,
4
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
1024
,
2
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
1024
,
2
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
...
@@ -26,6 +30,10 @@ template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::bf16_t, ck_tile::fp8_
...
@@ -26,6 +30,10 @@ template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::bf16_t, ck_tile::fp8_
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
512
,
4
,
true
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
512
,
4
,
true
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
1024
,
2
,
true
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
1024
,
2
,
true
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
8
,
1
,
1024
,
1
,
true
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
8
,
1
,
1024
,
1
,
true
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
1
,
512
,
8
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
512
,
4
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
1024
,
2
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
8
,
1
,
1024
,
1
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
1
,
512
,
8
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
1
,
512
,
8
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
512
,
4
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
512
,
4
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
1024
,
2
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
bf16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
1024
,
2
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
...
...
example/ck_tile/11_add_rmsnorm2d_rdquant/instances/add_rmsnorm2d_rdquant_fwd_fp16_n8192_instance.cpp
View file @
b755f375
...
@@ -5,7 +5,7 @@
...
@@ -5,7 +5,7 @@
#include "add_rmsnorm2d_rdquant_fwd_instance_common.hpp"
#include "add_rmsnorm2d_rdquant_fwd_instance_common.hpp"
// clang-format off
// clang-format off
// rm rn tm tn vn pd x 3p
//
rm rn tm tn vn pd x 3p
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
2
,
1
,
512
,
8
,
true
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
2
,
1
,
512
,
8
,
true
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
512
,
4
,
true
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
512
,
4
,
true
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
1024
,
2
,
true
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
1024
,
2
,
true
,
true
,
false
>
>
(
const
S
&
,
A
);
...
@@ -18,6 +18,10 @@ template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8
...
@@ -18,6 +18,10 @@ template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::int8
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
512
,
4
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
512
,
4
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
1024
,
2
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
1024
,
2
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
8
,
1
,
1024
,
1
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
8
,
1
,
1024
,
1
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
2
,
1
,
512
,
8
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
512
,
4
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
4
,
1
,
1024
,
2
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
int8_t
,
1
,
8
,
1
,
1024
,
1
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
1
,
512
,
8
,
true
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
1
,
512
,
8
,
true
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
512
,
4
,
true
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
512
,
4
,
true
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
1024
,
2
,
true
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
1024
,
2
,
true
,
true
,
false
>
>
(
const
S
&
,
A
);
...
@@ -30,4 +34,8 @@ template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_
...
@@ -30,4 +34,8 @@ template float add_rmsnorm2d_rdquant_fwd_<trait_<ck_tile::fp16_t, ck_tile::fp8_
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
512
,
4
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
512
,
4
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
1024
,
2
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
1024
,
2
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
8
,
1
,
1024
,
1
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
8
,
1
,
1024
,
1
,
false
,
false
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
2
,
1
,
512
,
8
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
512
,
4
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
4
,
1
,
1024
,
2
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
template
float
add_rmsnorm2d_rdquant_fwd_
<
trait_
<
ck_tile
::
fp16_t
,
ck_tile
::
fp8_t
,
1
,
8
,
1
,
1024
,
1
,
false
,
true
,
false
>
>
(
const
S
&
,
A
);
// clang-format on
// clang-format on
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment