Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
a74b2263
Commit
a74b2263
authored
Feb 07, 2024
by
Adam Osewski
Browse files
Clean up.
parent
522b7aee
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
7 additions
and
8 deletions
+7
-8
include/ck/utility/work_scheduling.hpp
include/ck/utility/work_scheduling.hpp
+2
-3
profiler/include/profiler/profile_grouped_gemm_multiple_d_splitk_impl.hpp
.../profiler/profile_grouped_gemm_multiple_d_splitk_impl.hpp
+2
-2
profiler/src/profile_grouped_gemm_multiple_d_splitk.cpp
profiler/src/profile_grouped_gemm_multiple_d_splitk.cpp
+3
-3
No files found.
include/ck/utility/work_scheduling.hpp
View file @
a74b2263
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 202
4
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#pragma once
...
@@ -75,12 +75,11 @@ class StridedReductionTileLoop
...
@@ -75,12 +75,11 @@ class StridedReductionTileLoop
///
///
/// @return The workgroup flag index.
/// @return The workgroup flag index.
///
///
__device__
uint32_t
GetWorkgroupFlagIdx
(
[[
maybe_unused
]]
index_t
k_tiles
,
__device__
uint32_t
GetWorkgroupFlagIdx
(
index_t
k_tiles
,
index_t
output_tile_idx
,
index_t
output_tile_idx
,
index_t
output_tile_idx_offset
)
const
index_t
output_tile_idx_offset
)
const
{
{
return
(
output_tile_idx
+
output_tile_idx_offset
)
%
GetFlagCount
(
k_tiles
);
return
(
output_tile_idx
+
output_tile_idx_offset
)
%
GetFlagCount
(
k_tiles
);
// return output_tile_idx + output_tile_idx_offset;
}
}
///
///
...
...
profiler/include/profiler/profile_grouped_gemm_multiple_d_splitk_impl.hpp
View file @
a74b2263
...
@@ -307,12 +307,12 @@ bool profile_ggemm_multid_splitk(int do_verification,
...
@@ -307,12 +307,12 @@ bool profile_ggemm_multid_splitk(int do_verification,
<<
(
instance_pass
?
"SUCCEED"
:
"FAILED"
)
<<
std
::
endl
;
<<
(
instance_pass
?
"SUCCEED"
:
"FAILED"
)
<<
std
::
endl
;
pass
=
pass
&&
instance_pass
;
pass
=
pass
&&
instance_pass
;
std
::
cout
<<
">>>>>CPU verification end!"
<<
std
::
endl
;
//
std::cout << ">>>>>CPU verification end!" << std::endl;
}
}
if
(
time_kernel
)
if
(
time_kernel
)
{
{
std
::
cout
<<
">>>>>GPU time profiling start!"
<<
std
::
endl
;
//
std::cout << ">>>>>GPU time profiling start!" << std::endl;
float
avg_time
=
invoker_ptr
->
Run
(
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
,
0
,
warmup_iter
,
kernel_iter
});
StreamConfig
{
nullptr
,
time_kernel
,
0
,
warmup_iter
,
kernel_iter
});
...
...
profiler/src/profile_grouped_gemm_multiple_d_splitk.cpp
View file @
a74b2263
...
@@ -91,9 +91,9 @@ int profile_grouped_gemm_multiple_d_splitk(int argc, char* argv[])
...
@@ -91,9 +91,9 @@ int profile_grouped_gemm_multiple_d_splitk(int argc, char* argv[])
const
auto
StrideAs
=
argToIntArray
(
argv
[
11
]);
const
auto
StrideAs
=
argToIntArray
(
argv
[
11
]);
const
auto
StrideBs
=
argToIntArray
(
argv
[
12
]);
const
auto
StrideBs
=
argToIntArray
(
argv
[
12
]);
const
auto
StrideCs
=
argToIntArray
(
argv
[
13
]);
const
auto
StrideCs
=
argToIntArray
(
argv
[
13
]);
const
int
kbatch
=
argc
=
=
15
?
std
::
stoi
(
argv
[
14
])
:
1
;
const
int
kbatch
=
argc
>
=
15
?
std
::
stoi
(
argv
[
14
])
:
1
;
const
int
warmup_iter
=
argc
=
=
16
?
std
::
stoi
(
argv
[
15
])
:
1
;
const
int
warmup_iter
=
argc
>
=
16
?
std
::
stoi
(
argv
[
15
])
:
1
;
const
int
kernel_iter
=
argc
=
=
17
?
std
::
stoi
(
argv
[
16
])
:
10
;
const
int
kernel_iter
=
argc
>
=
17
?
std
::
stoi
(
argv
[
16
])
:
10
;
#ifdef CK_ENABLE_FP16
#ifdef CK_ENABLE_FP16
if
(
data_type
==
GemmDataType
::
F16_F16_F16
&&
layout
==
GemmMatrixLayout
::
MK_KN_MN
)
if
(
data_type
==
GemmDataType
::
F16_F16_F16
&&
layout
==
GemmMatrixLayout
::
MK_KN_MN
)
{
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment