Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
7e8e54de
Commit
7e8e54de
authored
Apr 24, 2022
by
qinletao
Browse files
change rc matrix egister layout
parent
ef77a1ca
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
3 deletions
+3
-3
example/01_gemm/gemm_xdl_fp64.cpp
example/01_gemm/gemm_xdl_fp64.cpp
+1
-1
include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp
include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp
+2
-2
No files found.
example/01_gemm/gemm_xdl_fp64.cpp
View file @
7e8e54de
...
@@ -65,7 +65,7 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
...
@@ -65,7 +65,7 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
template
<
typename
DataType
>
template
<
typename
DataType
>
std
::
ostream
&
void
show_2d_matrix
(
std
::
ostream
&
os
,
Tensor
<
DataType
>&
matrix
)
std
::
ostream
&
show_2d_matrix
(
std
::
ostream
&
os
,
Tensor
<
DataType
>&
matrix
)
{
{
os
<<
"["
<<
std
::
endl
;
os
<<
"["
<<
std
::
endl
;
for
(
int
x
=
0
;
x
<
matrix
.
mDesc
.
GetLengths
()[
0
];
x
++
)
for
(
int
x
=
0
;
x
<
matrix
.
mDesc
.
GetLengths
()[
0
];
x
++
)
...
...
include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp
View file @
7e8e54de
...
@@ -387,8 +387,8 @@ struct mfma_type<MfmaInstr::mfma_i32_16x16x16i8>
...
@@ -387,8 +387,8 @@ struct mfma_type<MfmaInstr::mfma_i32_16x16x16i8>
template
<
>
template
<
>
struct
mfma_type
<
MfmaInstr
::
mfma_f64_16x16x4f64
>
struct
mfma_type
<
MfmaInstr
::
mfma_f64_16x16x4f64
>
{
{
static
constexpr
index_t
group_size
=
4
;
static
constexpr
index_t
group_size
=
1
;
static
constexpr
index_t
num_groups_per_blk
=
1
;
static
constexpr
index_t
num_groups_per_blk
=
4
;
static
constexpr
index_t
num_regs_per_blk
=
4
;
// group_size * num_groups_per_blk;
static
constexpr
index_t
num_regs_per_blk
=
4
;
// group_size * num_groups_per_blk;
static
constexpr
index_t
num_threads_per_blk
=
16
;
static
constexpr
index_t
num_threads_per_blk
=
16
;
static
constexpr
index_t
wave_size
=
64
;
static
constexpr
index_t
wave_size
=
64
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment