Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
180290ba
Commit
180290ba
authored
Dec 05, 2022
by
rocking
Browse files
Add gemm layernorm host code
parent
c13776be
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
96 additions
and
26 deletions
+96
-26
example/21_gemm_layernorm/gemm_add_add_layernorm_xdl_fp16.cpp
...ple/21_gemm_layernorm/gemm_add_add_layernorm_xdl_fp16.cpp
+96
-26
No files found.
example/21_gemm_layernorm/gemm_add_add_layernorm_xdl_fp16.cpp
View file @
180290ba
...
...
@@ -66,14 +66,6 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultipleDLayern
<
ALayout
,
BLayout
,
DsLayout
,
HLayout
,
ADataType
,
BDataType
,
AccDataType
,
CShuffleDataType
,
DsDataType
,
GammaDataType
,
BetaDataType
,
HDataType
,
AElementOp
,
BElementOp
,
CDEElementOp
,
HElementOp
,
GemmDefault
,
1
,
256
,
256
,
128
,
32
,
8
,
8
,
32
,
32
,
4
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
1
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
1
,
1
,
1
,
S
<
64
,
4
>
,
4
,
S
<
8
,
32
>
,
S
<
1
,
8
>
,
1
,
8
,
8
,
8
,
8
,
1
>
;
// clang-format on
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
AccDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
PassThrough
>
;
auto
f_host_tensor_descriptor1d
=
[](
std
::
size_t
len
,
std
::
size_t
stride
)
{
return
HostTensorDescriptor
(
std
::
vector
<
std
::
size_t
>
({
len
}),
std
::
vector
<
std
::
size_t
>
({
stride
}));
...
...
@@ -93,6 +85,78 @@ auto f_host_tensor_descriptor2d =
}
};
void
host_gemm_layernorm
(
Tensor
<
HDataType
>&
e_m_n
,
Tensor
<
HDataType
>&
h_m_n
,
const
Tensor
<
ADataType
>&
a_m_k
,
const
Tensor
<
BDataType
>&
b_k_n
,
const
Tensor
<
D0DataType
>&
bias_n
,
const
Tensor
<
D1DataType
>&
d1_m_n
,
const
Tensor
<
GammaDataType
>&
gamma_n
,
const
Tensor
<
BetaDataType
>&
beta_n
,
AElementOp
a_element_op
,
BElementOp
b_element_op
,
CDEElementOp
cde_element_op
,
int
M
,
int
N
,
float
epsilon
=
1e-5
)
{
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
AccDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
PassThrough
>
;
using
NormalizeFunctor
=
ck
::
tensor_operation
::
element_wise
::
Normalize
;
Tensor
<
AccDataType
>
c_m_n
(
HostTensorDescriptor
{
M
,
N
});
auto
ref_gemm
=
ReferenceGemmInstance
{};
auto
ref_invoker
=
ref_gemm
.
MakeInvoker
();
auto
ref_argument
=
ref_gemm
.
MakeArgument
(
a_m_k
,
b_k_n
,
c_m_n
,
a_element_op
,
b_element_op
,
PassThrough
{});
ref_invoker
.
Run
(
ref_argument
);
for
(
int
m
=
0
;
m
<
M
;
++
m
)
for
(
int
n
=
0
;
n
<
N
;
++
n
)
{
cde_element_op
(
e_m_n
(
m
,
n
),
c_m_n
(
m
,
n
),
bias_n
(
n
),
d1_m_n
(
m
,
n
));
}
// LayerNorm
Tensor
<
AccDataType
>
mean_m
(
f_host_tensor_descriptor1d
(
M
,
1
));
Tensor
<
AccDataType
>
meanSquare_m
(
f_host_tensor_descriptor1d
(
M
,
1
));
auto
layerNormInst
=
NormalizeFunctor
{
epsilon
};
for
(
int
m
=
0
;
m
<
M
;
++
m
)
{
AccDataType
mean
=
0
;
AccDataType
meanSquare
=
0
;
for
(
int
n
=
0
;
n
<
N
;
++
n
)
{
auto
e_val
=
ck
::
type_convert
<
AccDataType
>
(
e_m_n
(
m
,
n
));
mean
+=
e_val
;
meanSquare
+=
e_val
*
e_val
;
}
mean
/=
N
;
meanSquare
/=
N
;
for
(
int
n
=
0
;
n
<
N
;
++
n
)
{
AccDataType
h_val
=
0
;
AccDataType
e_val
=
ck
::
type_convert
<
AccDataType
>
(
e_m_n
(
m
,
n
));
AccDataType
gamma_val
=
ck
::
type_convert
<
AccDataType
>
(
gamma_n
(
n
));
AccDataType
beta_val
=
ck
::
type_convert
<
AccDataType
>
(
beta_n
(
n
));
layerNormInst
(
h_val
,
e_val
,
mean
,
meanSquare
,
gamma_val
,
beta_val
);
h_m_n
(
m
,
n
)
=
ck
::
type_convert
<
HDataType
>
(
h_val
);
}
}
}
int
main
()
{
bool
do_verification
=
true
;
...
...
@@ -181,28 +245,34 @@ int main()
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
false
});
bool
pass
=
true
;
if
(
do_verification
)
{
Tensor
<
AccDataType
>
c_m_n_host
(
HostTensorDescriptor
{
M
,
N
});
Tensor
<
HDataType
>
e_m_n_host
(
HostTensorDescriptor
{
M
,
N
});
Tensor
<
HDataType
>
h_m_n_host
(
HostTensorDescriptor
{
M
,
N
});
host_gemm_layernorm
(
e_m_n_host
,
h_m_n_host
,
a_m_k
,
b_k_n
,
d0_n
,
d1_m_n
,
gamma_n
,
beta_n
,
a_element_op
,
b_element_op
,
cde_element_op
,
M
,
N
,
epsilon
);
auto
ref_gemm
=
ReferenceGemmInstance
{};
auto
ref_invoker
=
ref_gemm
.
MakeInvoker
();
auto
ref_argument
=
ref_gemm
.
MakeArgument
(
a_m_k
,
b_k_n
,
c_m_n_host
,
a_element_op
,
b_element_op
,
PassThrough
{});
ref_invoker
.
Run
(
ref_argument
);
e_device_buf
.
FromDevice
(
e_m_n
.
mData
.
data
());
h_device_buf
.
FromDevice
(
h_m_n
.
mData
.
data
());
for
(
int
m
=
0
;
m
<
M
;
++
m
)
{
for
(
int
n
=
0
;
n
<
N
;
++
n
)
{
cde_element_op
(
e_m_n_host
(
m
,
n
),
c_m_n_host
(
m
,
n
),
d0_n
(
n
),
d1_m_n
(
m
,
n
));
}
pass
&=
ck
::
utils
::
check_err
(
e_m_n
,
e_m_n_host
);
pass
&=
ck
::
utils
::
check_err
(
h_m_n
,
h_m_n_host
);
}
e_device_buf
.
FromDevice
(
e_m_n
.
mData
.
data
());
return
ck
::
utils
::
check_err
(
e_m_n
,
e_m_n_host
)
?
0
:
1
;
}
return
pass
?
0
:
1
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment