Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
61b20afa
Commit
61b20afa
authored
Nov 08, 2024
by
Andriy Roshchenko
Browse files
Fix data types and improve testing verbocity.
parent
51b9abb9
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
33 additions
and
25 deletions
+33
-25
example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_ab_scale.cpp
...iply_multiply/gemm_multiply_multiply_xdl_fp8_ab_scale.cpp
+33
-25
No files found.
example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_ab_scale.cpp
View file @
61b20afa
...
...
@@ -188,15 +188,15 @@ int main(int argc, char* argv[])
b1_k_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
B1DataType
>
{
0
,
1.0
});
break
;
case
6
:
a0_m_k
.
GenerateTensorValue
(
GeneratorTensor_PI
<
ADataType
>
{});
b0_k_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
BDataType
>
{
0.5
f
});
a0_m_k
.
GenerateTensorValue
(
GeneratorTensor_PI
<
A
0
DataType
>
{});
b0_k_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
B
0
DataType
>
{
0.5
f
});
a1_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
A1DataType
>
{
0.5
});
b1_k_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
B1DataType
>
{
4
});
break
;
case
7
:
a0_m_k
.
GenerateTensorValue
(
GeneratorTensor_PI_A
<
ADataType
>
{});
b0_k_n
.
GenerateTensorValue
(
GeneratorTensor_PI_B
<
BDataType
>
{});
a0_m_k
.
GenerateTensorValue
(
GeneratorTensor_PI_A
<
A
0
DataType
>
{});
b0_k_n
.
GenerateTensorValue
(
GeneratorTensor_PI_B
<
B
0
DataType
>
{});
a1_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
A1DataType
>
{
2
});
b1_k_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
B1DataType
>
{
0.5
});
...
...
@@ -254,8 +254,11 @@ int main(int argc, char* argv[])
"not support this GEMM problem"
);
}
std
::
cout
<<
"Compute GEMM on device...
\n
"
;
float
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
time_kernel
,
20
,
50
});
std
::
cout
<<
"DONE!"
<<
std
::
endl
;
if
(
time_kernel
)
{
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
M
*
N
*
K
;
std
::
size_t
num_btype
=
sizeof
(
A0DataType
)
*
M
*
K
+
sizeof
(
B0DataType
)
*
K
*
N
+
sizeof
(
EDataType
)
*
M
*
N
;
...
...
@@ -264,13 +267,15 @@ int main(int argc, char* argv[])
float
gb_per_sec
=
num_btype
/
1.E6
/
ave_time
;
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s"
<<
std
::
endl
;
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s"
<<
std
::
endl
;
}
e_device_buf
.
FromDevice
(
e_m_n_device_result
.
mData
.
data
());
if
(
do_verification
)
{
std
::
cout
<<
"Running verification on CPU."
<<
std
::
endl
;
Tensor
<
AccDataType
>
c_m_n
({
M
,
N
});
Tensor
<
float
>
a_m_k
({
M
,
K
});
Tensor
<
float
>
b_k_n
({
K
,
N
});
...
...
@@ -324,16 +329,16 @@ int main(int argc, char* argv[])
{
std
::
cout
<<
std
::
fixed
<<
std
::
setprecision
(
16
);
float
a
=
ck
::
type_convert
<
float
>
(
a0_
device_buf
(
0
,
10
));
float
b
=
ck
::
type_convert
<
float
>
(
b0_
device_buf
(
0
,
10
));
float
a
=
ck
::
type_convert
<
float
>
(
a0_
m_k
(
0
,
10
));
float
b
=
ck
::
type_convert
<
float
>
(
b0_
k_n
(
0
,
10
));
std
::
cout
<<
"a(0,10): "
<<
a
<<
std
::
endl
;
std
::
cout
<<
"b(0,10): "
<<
b
<<
std
::
endl
;
std
::
cout
<<
"a: "
<<
ck
::
type_convert
<
float
>
(
a0_
device_buf
(
0
,
0
))
<<
std
::
endl
;
std
::
cout
<<
"a: "
<<
ck
::
type_convert
<
float
>
(
a0_
device_buf
(
0
,
1
))
<<
std
::
endl
;
std
::
cout
<<
"a: "
<<
ck
::
type_convert
<
float
>
(
a0_
device_buf
(
0
,
2
))
<<
std
::
endl
;
std
::
cout
<<
"b: "
<<
ck
::
type_convert
<
float
>
(
b0_
device_buf
(
0
,
0
))
<<
std
::
endl
;
std
::
cout
<<
"b: "
<<
ck
::
type_convert
<
float
>
(
b0_
device_buf
(
1
,
0
))
<<
std
::
endl
;
std
::
cout
<<
"b: "
<<
ck
::
type_convert
<
float
>
(
b0_
device_buf
(
2
,
0
))
<<
std
::
endl
;
std
::
cout
<<
"a: "
<<
ck
::
type_convert
<
float
>
(
a0_
m_k
(
0
,
0
))
<<
std
::
endl
;
std
::
cout
<<
"a: "
<<
ck
::
type_convert
<
float
>
(
a0_
m_k
(
0
,
1
))
<<
std
::
endl
;
std
::
cout
<<
"a: "
<<
ck
::
type_convert
<
float
>
(
a0_
m_k
(
0
,
2
))
<<
std
::
endl
;
std
::
cout
<<
"b: "
<<
ck
::
type_convert
<
float
>
(
b0_
k_n
(
0
,
0
))
<<
std
::
endl
;
std
::
cout
<<
"b: "
<<
ck
::
type_convert
<
float
>
(
b0_
k_n
(
1
,
0
))
<<
std
::
endl
;
std
::
cout
<<
"b: "
<<
ck
::
type_convert
<
float
>
(
b0_
k_n
(
2
,
0
))
<<
std
::
endl
;
float
d
=
ck
::
type_convert
<
float
>
(
e_m_n_device_result
(
0
,
10
));
float
h
=
ck
::
type_convert
<
float
>
(
e_m_n_host_result
(
10
,
0
));
...
...
@@ -343,13 +348,16 @@ int main(int argc, char* argv[])
std
::
cout
<<
"device - host: "
<<
std
::
abs
(
d
-
h
)
<<
std
::
endl
;
std
::
cout
<<
"device - expected: "
<<
std
::
abs
(
d
-
M_PI
)
<<
std
::
endl
;
std
::
cout
<<
"atol: "
<<
5e-2
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
<<
std
::
endl
;
}
return
ck
::
utils
::
check_err
(
e_m_n_device_result
,
e_m_n_host_result
,
"Error: Incorrect results!"
,
5e-2
,
5e-2
)
?
0
:
1
;
if
(
ck
::
utils
::
check_err
(
e_m_n_device_result
,
e_m_n_host_result
,
"Error: Incorrect results!"
,
5e-2
,
5e-2
))
{
std
::
cout
<<
"Verification on CPU: PASS"
<<
std
::
endl
;
return
0
;
}
else
return
1
;
}
return
0
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment