Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
9e15aa34
Commit
9e15aa34
authored
Oct 24, 2024
by
Jing Zhang
Browse files
fixed
parent
f16f55af
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
5 deletions
+5
-5
CMakeLists.txt
CMakeLists.txt
+2
-2
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3.hpp
...nsor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3.hpp
+1
-1
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp
+2
-2
No files found.
CMakeLists.txt
View file @
9e15aa34
...
...
@@ -543,7 +543,7 @@ ENDIF()
ENDFOREACH
()
add_custom_target
(
instances DEPENDS utility;
${
CK_DEVICE_INSTANCES
}
SOURCES
${
INSTANCE_FILES
}
)
#
add_subdirectory(library)
add_subdirectory
(
library
)
if
(
NOT GPU_ARCHS
)
rocm_package_setup_component
(
tests
...
...
@@ -565,7 +565,7 @@ rocm_package_setup_component(profiler
LIBRARY_NAME composablekernel
PACKAGE_NAME ckprofiler
)
#
add_subdirectory(profiler)
add_subdirectory
(
profiler
)
#if(CK_USE_CODEGEN AND (GPU_TARGETS MATCHES "gfx9" OR GPU_ARCHS))
#add_subdirectory(codegen)
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3.hpp
View file @
9e15aa34
...
...
@@ -1756,7 +1756,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
static_cast
<
ADataType
*>
(
p_shared_0
),
a_block_desc_ak0_m_ak1
.
GetElementSpaceSize
());
auto
b_block_buf_ping
=
make_dynamic_buffer
<
AddressSpaceEnum
::
Lds
>
(
static
_cast
<
BDataType
*>
(
static_cast
<
char
*>
(
p_shared_0
)
+
bit
_cast
<
BDataType
*>
(
static_cast
<
char
*>
(
p_shared_0
)
+
a_block_space_size_aligned
*
sizeof
(
ADataType
)),
b_block_desc_bk0_n_bk1
.
GetElementSpaceSize
());
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp
View file @
9e15aa34
...
...
@@ -381,8 +381,8 @@ struct ThreadwiseTensorSliceTransfer_v3r1
(
is_same
<
f8_t
,
remove_cvref_t
<
DstData
>>::
value
&&
SrcScalarPerVector
%
4
==
0
&&
DstScalarPerVector
%
4
==
0
)))
{
static_assert
(
is_same_v
<
remove_cvref_t
<
SrcData
>
,
pk_i4_t
>
,
"transpose is not allowed for pk_i4_t"
);
//
static_assert(is_same_v<remove_cvref_t<SrcData>, pk_i4_t>,
//
"transpose is not allowed for pk_i4_t");
#if 1
// each transpose does
// DstScalarPerVector # of src vectors in src_thread_scratch_
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment