Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
b1a7d2a7
Commit
b1a7d2a7
authored
Oct 21, 2024
by
Andriy Roshchenko
Browse files
Improve GEMM example verbosity.
parent
10b0d214
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
48 additions
and
17 deletions
+48
-17
CMakePresets.json
CMakePresets.json
+20
-0
example/01_gemm/run_gemm_example.inc
example/01_gemm/run_gemm_example.inc
+28
-17
No files found.
CMakePresets.json
View file @
b1a7d2a7
...
...
@@ -65,6 +65,16 @@
"CMAKE_BUILD_TYPE"
:
"Release"
,
"CMAKE_CXX_FLAGS"
:
"-O3"
}
},
{
"name"
:
"MI250-debug"
,
"displayName"
:
"MI250 Debug"
,
"inherits"
:
"linux-debug"
,
"cacheVariables"
:
{
"GPU_TARGETS"
:
"gfx90a"
,
"CMAKE_BUILD_TYPE"
:
"Debug"
,
"CMAKE_CXX_FLAGS"
:
"-O0 -ggdb"
}
}
],
"buildPresets"
:
[
...
...
@@ -117,6 +127,16 @@
"Release"
],
"jobs"
:
128
},
{
"name"
:
"MI250-debug"
,
"displayName"
:
"MI250"
,
"configurePreset"
:
"MI250-debug"
,
"description"
:
"Build Environment for MI250 Debug."
,
"inherits"
:
[
"Debug"
],
"jobs"
:
128
}
]
}
example/01_gemm/run_gemm_example.inc
View file @
b1a7d2a7
...
...
@@ -317,16 +317,23 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
return
true
;
}
std
::
size_t
flop
=
2_
uz
*
M
*
N
*
K
;
std
::
size_t
num_btype
=
sizeof
(
ADataType
)
*
M
*
K
+
sizeof
(
BDataType
)
*
K
*
N
+
sizeof
(
CDataType
)
*
M
*
N
;
if
(
config
.
time_kernel
)
{
std
::
size_t
flop
=
2_
uz
*
M
*
N
*
K
;
std
::
size_t
num_btype
=
sizeof
(
ADataType
)
*
M
*
K
+
sizeof
(
BDataType
)
*
K
*
N
+
sizeof
(
CDataType
)
*
M
*
N
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
ave_time
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
ave_time
;
float
gb_per_sec
=
num_btype
/
1.E6
/
ave_time
;
float
gb_per_sec
=
num_btype
/
1.E6
/
ave_time
;
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s, "
<<
gemm
.
GetTypeString
()
<<
std
::
endl
;
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s, "
<<
gemm
.
GetTypeString
()
<<
std
::
endl
;
}
else
{
std
::
cout
<<
"FINISHED: "
<<
gemm
.
GetTypeString
()
<<
std
::
endl
;
}
bool
pass
=
true
;
...
...
@@ -353,12 +360,14 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
#else
c_m_n_device_buf
.
FromDevice
(
c_m_n_device_result
.
mData
.
data
());
pass
&
=
!
ck
::
utils
::
check_err
(
c_m_n_device_result
,
c_m_n_host_result
,
"Error: Incorrect results!"
,
get_rtol
<
CDataType
>
(),
get_atol
<
CDataType
>
());
pass
=
ck
::
utils
::
check_err
(
c_m_n_device_result
,
c_m_n_host_result
,
"Error: Incorrect results!"
,
get_rtol
<
CDataType
>
(),
get_atol
<
CDataType
>
());
#endif
if
(
pass
)
std
::
cout
<<
"Verification on CPU: PASS"
<<
std
::
endl
;
// GPU verification
auto
ref_gemm_gpu
=
ReferenceGemmInstanceGPU
{};
...
...
@@ -381,11 +390,13 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
c_m_n_device_ref_buf
.
FromDevice
(
c_m_n_device_ref_result
.
mData
.
data
());
c_m_n_device_buf
.
FromDevice
(
c_m_n_device_result
.
mData
.
data
());
pass
&=
!
ck
::
utils
::
check_err
(
c_m_n_device_result
,
c_m_n_device_ref_result
,
"Error: Incorrect results!"
,
get_rtol
<
CDataType
>
(),
get_atol
<
CDataType
>
());
pass
=
ck
::
utils
::
check_err
(
c_m_n_device_result
,
c_m_n_device_ref_result
,
"Error: Incorrect results!"
,
get_rtol
<
CDataType
>
(),
get_atol
<
CDataType
>
());
if
(
pass
)
std
::
cout
<<
"Verification on GPU: PASS"
<<
std
::
endl
;
}
return
!
pass
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment