Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
be14ab57
Commit
be14ab57
authored
Dec 07, 2022
by
rocking
Browse files
padding for GemmMeanVarCountGridDescriptor_M_NBlock
parent
70e7069c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
9 deletions
+6
-9
include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp
.../device/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp
+6
-9
No files found.
include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp
View file @
be14ab57
...
@@ -318,9 +318,8 @@ struct DeviceGemmMultipleDLayernorm_Xdl_CShuffle : public BaseOperator
...
@@ -318,9 +318,8 @@ struct DeviceGemmMultipleDLayernorm_Xdl_CShuffle : public BaseOperator
{
{
const
auto
grid_desc_m_n
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
M
,
NBlock
));
const
auto
grid_desc_m_n
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
M
,
NBlock
));
// TODO - padding according to MNperBlock of Gemm
return
PadTensorDescriptor
(
// CAUSION - GetWorkSpaceSize
grid_desc_m_n
,
make_tuple
(
MPerBlock
,
NPerBlock
),
Sequence
<
true
,
true
>
{});
return
grid_desc_m_n
;
}
}
static
auto
MakeLayernormMeanVarCountGridDescriptor_M_NBlock
(
index_t
M
,
index_t
NBlock
)
static
auto
MakeLayernormMeanVarCountGridDescriptor_M_NBlock
(
index_t
M
,
index_t
NBlock
)
...
@@ -521,6 +520,7 @@ struct DeviceGemmMultipleDLayernorm_Xdl_CShuffle : public BaseOperator
...
@@ -521,6 +520,7 @@ struct DeviceGemmMultipleDLayernorm_Xdl_CShuffle : public BaseOperator
b_element_op_
{
b_element_op
},
b_element_op_
{
b_element_op
},
cde_element_op_
{
cde_element_op
},
cde_element_op_
{
cde_element_op
},
h_element_op_
{
h_element_op
},
h_element_op_
{
h_element_op
},
MRaw_
(
MRaw
),
gemm_nblock_
{
math
::
integer_divide_ceil
(
NRaw
,
NPerBlock
)},
gemm_nblock_
{
math
::
integer_divide_ceil
(
NRaw
,
NPerBlock
)},
epsilon_
{
epsilon
}
epsilon_
{
epsilon
}
{
{
...
@@ -617,6 +617,7 @@ struct DeviceGemmMultipleDLayernorm_Xdl_CShuffle : public BaseOperator
...
@@ -617,6 +617,7 @@ struct DeviceGemmMultipleDLayernorm_Xdl_CShuffle : public BaseOperator
CDEElementwiseOperation
cde_element_op_
;
CDEElementwiseOperation
cde_element_op_
;
HElementwiseOperation
h_element_op_
;
HElementwiseOperation
h_element_op_
;
int
MRaw_
;
int
gemm_nblock_
;
int
gemm_nblock_
;
AccDataType
epsilon_
;
AccDataType
epsilon_
;
};
};
...
@@ -766,9 +767,7 @@ struct DeviceGemmMultipleDLayernorm_Xdl_CShuffle : public BaseOperator
...
@@ -766,9 +767,7 @@ struct DeviceGemmMultipleDLayernorm_Xdl_CShuffle : public BaseOperator
size_t
workspace_size
=
0
;
size_t
workspace_size
=
0
;
// FIXME - padding
int
gemm_welford_size
=
pArg_
->
MRaw_
*
pArg_
->
gemm_nblock_
;
int
gemm_welford_size
=
pArg_
->
gemm_mean_var_count_grid_desc_m_nblock_
.
GetElementSpaceSize
();
// workspace for welford intermediate mean
// workspace for welford intermediate mean
workspace_size
+=
gemm_welford_size
*
sizeof
(
MeanDataType
)
+
64
;
workspace_size
+=
gemm_welford_size
*
sizeof
(
MeanDataType
)
+
64
;
...
@@ -788,9 +787,7 @@ struct DeviceGemmMultipleDLayernorm_Xdl_CShuffle : public BaseOperator
...
@@ -788,9 +787,7 @@ struct DeviceGemmMultipleDLayernorm_Xdl_CShuffle : public BaseOperator
pArg_
->
p_workspace_
=
p_workspace
;
pArg_
->
p_workspace_
=
p_workspace
;
int
gemm_welford_size
=
int
gemm_welford_size
=
pArg_
->
MRaw_
*
pArg_
->
gemm_nblock_
;
pArg_
->
gemm_mean_var_count_grid_desc_m_nblock_
.
GetElementSpaceSize
();
// int gemm_welford_size = MRaw * pArg->gemm_nblock_;
// setup buffer used for intermediate welford mean
// setup buffer used for intermediate welford mean
pArg_
->
p_workspace_mean_
=
static_cast
<
char
*>
(
pArg_
->
p_workspace_
);
pArg_
->
p_workspace_mean_
=
static_cast
<
char
*>
(
pArg_
->
p_workspace_
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment