Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
f1f36a61
Commit
f1f36a61
authored
Feb 04, 2025
by
Andriy Roshchenko
Browse files
WIP: Debug some test cases
parent
9f8e26f6
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
119 additions
and
19 deletions
+119
-19
test/mx_mfma_op/mx_mfma_op.cpp
test/mx_mfma_op/mx_mfma_op.cpp
+4
-4
test/mx_mfma_op/mx_mfma_op.hpp
test/mx_mfma_op/mx_mfma_op.hpp
+115
-15
No files found.
test/mx_mfma_op/mx_mfma_op.cpp
View file @
f1f36a61
...
@@ -112,14 +112,14 @@ bool run_mxmfma_test(ck::index_t init)
...
@@ -112,14 +112,14 @@ bool run_mxmfma_test(ck::index_t init)
TEST
(
MXMFMA
,
MXFP8MFMA16x16x128
)
TEST
(
MXMFMA
,
MXFP8MFMA16x16x128
)
{
{
auto
AB_init
=
1
;
auto
AB_init
=
2
;
auto
pass
=
run_mxmfma_test
<
f8_t
,
f8_t
,
floa
t
,
ck
::
MFMA_F8F6F4
::
SCALE_F32_16x16x128
>
(
AB_init
);
auto
pass
=
run_mxmfma_test
<
f8_t
,
f8_t
,
half_
t
,
ck
::
MFMA_F8F6F4
::
SCALE_F32_16x16x128
>
(
AB_init
);
EXPECT_TRUE
(
pass
);
EXPECT_TRUE
(
pass
);
}
}
TEST
(
MXMFMA
,
MXFP8MFMA32x32x64
)
TEST
(
MXMFMA
,
MXFP8MFMA32x32x64
)
{
{
auto
AB_init
=
1
;
auto
AB_init
=
2
;
auto
pass
=
run_mxmfma_test
<
f8_t
,
f8_t
,
half_
t
,
ck
::
MFMA_F8F6F4
::
SCALE_F32_32x32x64
>
(
AB_init
);
auto
pass
=
run_mxmfma_test
<
f8_t
,
f8_t
,
floa
t
,
ck
::
MFMA_F8F6F4
::
SCALE_F32_32x32x64
>
(
AB_init
);
EXPECT_TRUE
(
pass
);
EXPECT_TRUE
(
pass
);
}
}
test/mx_mfma_op/mx_mfma_op.hpp
View file @
f1f36a61
...
@@ -738,8 +738,15 @@ void RunHostGEMM(const Tensor<ADataType>& A,
...
@@ -738,8 +738,15 @@ void RunHostGEMM(const Tensor<ADataType>& A,
Tensor
<
CDataType
>&
C
)
Tensor
<
CDataType
>&
C
)
{
{
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
GemmInstance
=
ck
::
tensor_operation
::
host
::
using
GemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
float
,
ReferenceGemm
<
float
,
float
,
CDataType
,
float
,
PassThrough
,
PassThrough
,
PassThrough
>
;
float
,
CDataType
,
float
,
PassThrough
,
PassThrough
,
PassThrough
,
float
,
float
>
;
Tensor
<
float
>
a_m_k
(
A
.
mDesc
);
Tensor
<
float
>
a_m_k
(
A
.
mDesc
);
Tensor
<
float
>
b_k_n
(
B
.
mDesc
);
Tensor
<
float
>
b_k_n
(
B
.
mDesc
);
...
@@ -753,8 +760,8 @@ void RunHostGEMM(const Tensor<ADataType>& A,
...
@@ -753,8 +760,8 @@ void RunHostGEMM(const Tensor<ADataType>& A,
{
{
for
(
size_t
k
=
0
;
k
<
K
;
k
++
)
for
(
size_t
k
=
0
;
k
<
K
;
k
++
)
{
{
a_m_k
(
m
,
k
)
=
a_m_k
(
m
,
k
)
=
type_convert
<
float
>
(
type_convert
<
ADataType
>
(
type_convert
<
float
>
(
A
(
m
,
k
))
*
type_convert
<
float
>
(
a_scales
(
m
,
k
/
BLOCK_X
));
type_convert
<
float
>
(
A
(
m
,
k
))
*
type_convert
<
float
>
(
a_scales
(
m
,
k
/
BLOCK_X
))
))
;
}
}
}
}
...
@@ -762,8 +769,8 @@ void RunHostGEMM(const Tensor<ADataType>& A,
...
@@ -762,8 +769,8 @@ void RunHostGEMM(const Tensor<ADataType>& A,
{
{
for
(
size_t
k
=
0
;
k
<
K
;
k
++
)
for
(
size_t
k
=
0
;
k
<
K
;
k
++
)
{
{
b_k_n
(
k
,
n
)
=
b_k_n
(
k
,
n
)
=
type_convert
<
float
>
(
type_convert
<
BDataType
>
(
type_convert
<
float
>
(
B
(
k
,
n
))
*
type_convert
<
float
>
(
b_scales
(
k
/
BLOCK_X
,
n
));
type_convert
<
float
>
(
B
(
k
,
n
))
*
type_convert
<
float
>
(
b_scales
(
k
/
BLOCK_X
,
n
))
))
;
}
}
}
}
...
@@ -862,18 +869,23 @@ struct TestMXMFMA
...
@@ -862,18 +869,23 @@ struct TestMXMFMA
case
1
:
case
1
:
// results in C = {K}
// results in C = {K}
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
ADataType
>
{
1.0
f
});
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
ADataType
>
{
1.0
f
});
a_scales
.
GenerateTensorValue
(
GeneratorTensor_1
<
ScaleType
>
{
ScaleType
{
1
.0
f
}});
a_scales
.
GenerateTensorValue
(
GeneratorTensor_1
<
ScaleType
>
{
ScaleType
{
512
.0
f
}});
b_n_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
BDataType
>
{
1.0
f
});
b_n_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
BDataType
>
{
1.0
f
});
b_scales
.
GenerateTensorValue
(
GeneratorTensor_1
<
ScaleType
>
{
ScaleType
{
1.0
f
}});
b_scales
.
GenerateTensorValue
(
GeneratorTensor_1
<
ScaleType
>
{
ScaleType
{
1.0
f
/
512
}});
break
;
break
;
case
2
:
case
2
:
// expect small round off errors
// expect small round off errors
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_3
<
ADataType
>
{
-
5
,
5
});
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_3
<
ADataType
>
{
-
0.5
,
0.5
});
// a_m_k.GenerateTensorValue(GeneratorTensor_1<ADataType>{1.9f});
a_scales
.
GenerateTensorValue
(
a_scales
.
GenerateTensorValue
(
GeneratorTensor_2
<
ScaleType
>
{
126
,
129
});
// scales: {0.5, 1, 2}
GeneratorTensor_2
<
ScaleType
>
{
127
,
129
});
// scales: {0.5, 1, 2}
b_n_k
.
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
5
,
5
});
// a_scales.GenerateTensorValue(GeneratorTensor_1<ScaleType>{ScaleType{1.0f}});
b_scales
.
GenerateTensorValue
(
// b_n_k.GenerateTensorValue(GeneratorTensor_3<BDataType>{-5, 5});
GeneratorTensor_2
<
ScaleType
>
{
126
,
129
});
// scales: {0.5, 1, 2}
// b_scales.GenerateTensorValue(
// GeneratorTensor_2<ScaleType>{125, 128}); // scales: {0.5, 1, 2}
b_n_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
BDataType
>
{
1.0
f
});
b_scales
.
GenerateTensorValue
(
GeneratorTensor_1
<
ScaleType
>
{
ScaleType
{
1.0
f
}});
break
;
break
;
case
3
:
case
3
:
// expect small round off errors
// expect small round off errors
...
@@ -884,12 +896,33 @@ struct TestMXMFMA
...
@@ -884,12 +896,33 @@ struct TestMXMFMA
b_scales
.
GenerateTensorValue
(
b_scales
.
GenerateTensorValue
(
GeneratorTensor_2
<
ScaleType
>
{
126
,
129
});
// scales: {0.5, 1, 2}
GeneratorTensor_2
<
ScaleType
>
{
126
,
129
});
// scales: {0.5, 1, 2}
break
;
break
;
case
4
:
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
ADataType
>
{
1.0
f
});
a_scales
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
ScaleType
,
0
>
{
-
9
});
b_n_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
BDataType
>
{
1.0
f
});
b_scales
.
GenerateTensorValue
(
GeneratorTensor_1
<
ScaleType
>
{
ScaleType
{
1.0
f
}});
break
;
case
5
:
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
ADataType
>
{
1.0
f
});
a_scales
.
GenerateTensorValue
(
GeneratorTensor_1
<
ScaleType
>
{
ScaleType
{
1.0
f
}});
b_n_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
BDataType
>
{
1.0
f
});
b_scales
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
ScaleType
,
1
>
{
-
9
});
break
;
case
6
:
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
ADataType
>
{
0.00195312
f
});
a_scales
.
GenerateTensorValue
(
GeneratorTensor_1
<
ScaleType
>
{
ScaleType
{
1.0
f
/
16
}});
b_n_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
BDataType
>
{
1.0
f
});
b_scales
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
ScaleType
,
1
>
{
-
9
});
break
;
default:
default:
// all initial values are representable in FP8, BF8
// all initial values are representable in FP8, BF8
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_2
<
ADataType
>
{
-
5
,
6
});
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_2
<
ADataType
>
{
-
5
,
6
});
a_scales
.
GenerateTensorValue
(
GeneratorTensor_3
<
ScaleType
>
{
1.0
f
/
32.0
f
,
1.0
f
});
// a_scales.GenerateTensorValue(GeneratorTensor_3<ScaleType>{1.0f / 32.0f, 1.0f});
a_scales
.
GenerateTensorValue
(
GeneratorTensor_1
<
ScaleType
>
{
ScaleType
{
1.0
f
}});
b_n_k
.
GenerateTensorValue
(
GeneratorTensor_2
<
BDataType
>
{
-
5
,
6
});
b_n_k
.
GenerateTensorValue
(
GeneratorTensor_2
<
BDataType
>
{
-
5
,
6
});
b_scales
.
GenerateTensorValue
(
GeneratorTensor_3
<
ScaleType
>
{
1.0
f
/
32.0
f
,
1.0
f
});
// b_scales.GenerateTensorValue(GeneratorTensor_3<ScaleType>{1.0f / 32.0f, 1.0f});
b_scales
.
GenerateTensorValue
(
GeneratorTensor_1
<
ScaleType
>
{
ScaleType
{
1.0
f
}});
break
;
break
;
}
}
...
@@ -946,12 +979,79 @@ struct TestMXMFMA
...
@@ -946,12 +979,79 @@ struct TestMXMFMA
RunDeviceGEMM
(
mfma_kernel
,
a
,
a_scales
,
b
,
b_scales
,
c_device
);
RunDeviceGEMM
(
mfma_kernel
,
a
,
a_scales
,
b
,
b_scales
,
c_device
);
#if 1
#if 1
std
::
cout
<<
"a:"
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
BLOCK_M
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
BLOCK_K
;
j
++
)
{
std
::
cout
<<
type_convert
<
float
>
(
a
(
i
,
j
))
<<
" "
;
}
std
::
cout
<<
std
::
endl
;
}
std
::
cout
<<
"b:"
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
BLOCK_K
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
BLOCK_N
;
j
++
)
{
std
::
cout
<<
type_convert
<
float
>
(
b
(
i
,
j
))
<<
" "
;
}
std
::
cout
<<
std
::
endl
;
}
#endif
#if 1
std
::
cout
<<
"a_scale:"
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
BLOCK_M
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
BLOCK_K
/
BLOCK_X
;
j
++
)
{
std
::
cout
<<
type_convert
<
float
>
(
a_scales
(
i
,
j
))
<<
" "
;
}
std
::
cout
<<
std
::
endl
;
}
std
::
cout
<<
"b_scale:"
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
BLOCK_K
/
BLOCK_X
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
BLOCK_N
;
j
++
)
{
std
::
cout
<<
type_convert
<
float
>
(
b_scales
(
i
,
j
))
<<
" "
;
}
std
::
cout
<<
std
::
endl
;
}
#endif
std
::
cout
<<
"c_device:"
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
BLOCK_M
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
BLOCK_N
;
j
++
)
{
std
::
cout
<<
type_convert
<
float
>
(
c_device
(
i
,
j
))
<<
" "
;
}
std
::
cout
<<
std
::
endl
;
}
#endif
bool
res
=
false
;
bool
res
=
false
;
if
constexpr
(
std
::
is_same
<
CDataType
,
float
>::
value
||
if
constexpr
(
std
::
is_same
<
CDataType
,
float
>::
value
||
std
::
is_same
<
CDataType
,
half_t
>::
value
)
std
::
is_same
<
CDataType
,
half_t
>::
value
)
{
{
res
=
ck
::
utils
::
check_err
(
c_device
.
mData
,
c_host
.
mData
);
res
=
ck
::
utils
::
check_err
(
c_device
.
mData
,
c_host
.
mData
);
std
::
cout
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
std
::
cout
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
if
(
!
res
)
{
std
::
cout
<<
"c_host:"
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
BLOCK_M
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
BLOCK_N
;
j
++
)
{
std
::
cout
<<
type_convert
<
float
>
(
c_host
(
i
,
j
))
<<
" "
;
}
std
::
cout
<<
std
::
endl
;
}
}
}
}
else
else
{
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment