Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
6433eede
Commit
6433eede
authored
Dec 12, 2024
by
Jing Zhang
Browse files
fixed
parent
afeccb5f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
10 additions
and
5 deletions
+10
-5
example/01_gemm/gemm_xdl_fp16_pk_i4_v3.cpp
example/01_gemm/gemm_xdl_fp16_pk_i4_v3.cpp
+2
-2
include/ck/utility/static_buffer.hpp
include/ck/utility/static_buffer.hpp
+8
-2
profiler/src/CMakeLists.txt
profiler/src/CMakeLists.txt
+0
-1
No files found.
example/01_gemm/gemm_xdl_fp16_pk_i4_v3.cpp
View file @
6433eede
...
@@ -21,8 +21,8 @@ using CElementOp = PassThrough;
...
@@ -21,8 +21,8 @@ using CElementOp = PassThrough;
static
constexpr
auto
GemmDefault
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
Default
;
static
constexpr
auto
GemmDefault
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
Default
;
static
constexpr
bool
PermuteA
=
false
;
static
constexpr
bool
PermuteA
=
false
;
static
constexpr
bool
PermuteB
=
true
;
static
constexpr
bool
PermuteB
=
true
;
static
constexpr
ck
::
index_t
KPerBlock
=
128
;
static
constexpr
ck
::
index_t
KPerBlock
=
128
;
// clang-format off
// clang-format off
...
...
include/ck/utility/static_buffer.hpp
View file @
6433eede
...
@@ -114,7 +114,10 @@ struct StaticBufferTupleOfVector
...
@@ -114,7 +114,10 @@ struct StaticBufferTupleOfVector
// Get X
// Get X
// i is offset of S, not X. i should be aligned to X
// i is offset of S, not X. i should be aligned to X
template
<
typename
X
,
index_t
I
>
template
<
typename
X
,
index_t
I
,
typename
enable_if
<
has_same_scalar_type
<
S
,
X
>
::
value
||
!
is_native_type
<
S
>
(),
bool
>::
type
=
false
>
__host__
__device__
constexpr
auto
GetAsType
(
Number
<
I
>
i
)
const
__host__
__device__
constexpr
auto
GetAsType
(
Number
<
I
>
i
)
const
{
{
constexpr
auto
s_per_x
=
Number
<
scalar_type
<
remove_cvref_t
<
X
>>::
vector_size
>
{};
constexpr
auto
s_per_x
=
Number
<
scalar_type
<
remove_cvref_t
<
X
>>::
vector_size
>
{};
...
@@ -130,7 +133,10 @@ struct StaticBufferTupleOfVector
...
@@ -130,7 +133,10 @@ struct StaticBufferTupleOfVector
// Set X
// Set X
// i is offset of S, not X. i should be aligned to X
// i is offset of S, not X. i should be aligned to X
template
<
typename
X
,
index_t
I
>
template
<
typename
X
,
index_t
I
,
typename
enable_if
<
has_same_scalar_type
<
S
,
X
>
::
value
||
!
is_native_type
<
S
>
(),
bool
>::
type
=
false
>
__host__
__device__
constexpr
void
SetAsType
(
Number
<
I
>
i
,
X
x
)
__host__
__device__
constexpr
void
SetAsType
(
Number
<
I
>
i
,
X
x
)
{
{
constexpr
auto
s_per_x
=
Number
<
scalar_type
<
remove_cvref_t
<
X
>>::
vector_size
>
{};
constexpr
auto
s_per_x
=
Number
<
scalar_type
<
remove_cvref_t
<
X
>>::
vector_size
>
{};
...
...
profiler/src/CMakeLists.txt
View file @
6433eede
...
@@ -43,7 +43,6 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx9")
...
@@ -43,7 +43,6 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx9")
list
(
APPEND PROFILER_SOURCES profile_gemm_add_silu.cpp
)
list
(
APPEND PROFILER_SOURCES profile_gemm_add_silu.cpp
)
list
(
APPEND PROFILER_SOURCES profile_gemm_add_relu_add_layernorm.cpp
)
list
(
APPEND PROFILER_SOURCES profile_gemm_add_relu_add_layernorm.cpp
)
list
(
APPEND PROFILER_SOURCES profile_grouped_gemm_fixed_nk.cpp
)
list
(
APPEND PROFILER_SOURCES profile_grouped_gemm_fixed_nk.cpp
)
list
(
APPEND PROFILER_SOURCES profile_grouped_gemm_two_stage.cpp
)
list
(
APPEND PROFILER_SOURCES profile_grouped_gemm_fastgelu.cpp
)
list
(
APPEND PROFILER_SOURCES profile_grouped_gemm_fastgelu.cpp
)
list
(
APPEND PROFILER_SOURCES profile_grouped_gemm_tile_loop.cpp
)
list
(
APPEND PROFILER_SOURCES profile_grouped_gemm_tile_loop.cpp
)
list
(
APPEND PROFILER_SOURCES profile_grouped_gemm_multiply_tile_loop.cpp
)
list
(
APPEND PROFILER_SOURCES profile_grouped_gemm_multiply_tile_loop.cpp
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment