Commit 9e15aa34 authored by Jing Zhang's avatar Jing Zhang
Browse files

fixed

parent f16f55af
...@@ -543,7 +543,7 @@ ENDIF() ...@@ -543,7 +543,7 @@ ENDIF()
ENDFOREACH() ENDFOREACH()
add_custom_target(instances DEPENDS utility;${CK_DEVICE_INSTANCES} SOURCES ${INSTANCE_FILES}) add_custom_target(instances DEPENDS utility;${CK_DEVICE_INSTANCES} SOURCES ${INSTANCE_FILES})
#add_subdirectory(library) add_subdirectory(library)
if(NOT GPU_ARCHS) if(NOT GPU_ARCHS)
rocm_package_setup_component(tests rocm_package_setup_component(tests
...@@ -565,7 +565,7 @@ rocm_package_setup_component(profiler ...@@ -565,7 +565,7 @@ rocm_package_setup_component(profiler
LIBRARY_NAME composablekernel LIBRARY_NAME composablekernel
PACKAGE_NAME ckprofiler PACKAGE_NAME ckprofiler
) )
#add_subdirectory(profiler) add_subdirectory(profiler)
#if(CK_USE_CODEGEN AND (GPU_TARGETS MATCHES "gfx9" OR GPU_ARCHS)) #if(CK_USE_CODEGEN AND (GPU_TARGETS MATCHES "gfx9" OR GPU_ARCHS))
#add_subdirectory(codegen) #add_subdirectory(codegen)
......
...@@ -1756,7 +1756,7 @@ struct GridwiseGemm_xdl_cshuffle_v3 ...@@ -1756,7 +1756,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
static_cast<ADataType*>(p_shared_0), a_block_desc_ak0_m_ak1.GetElementSpaceSize()); static_cast<ADataType*>(p_shared_0), a_block_desc_ak0_m_ak1.GetElementSpaceSize());
auto b_block_buf_ping = make_dynamic_buffer<AddressSpaceEnum::Lds>( auto b_block_buf_ping = make_dynamic_buffer<AddressSpaceEnum::Lds>(
static_cast<BDataType*>(static_cast<char*>(p_shared_0) + bit_cast<BDataType*>(static_cast<char*>(p_shared_0) +
a_block_space_size_aligned * sizeof(ADataType)), a_block_space_size_aligned * sizeof(ADataType)),
b_block_desc_bk0_n_bk1.GetElementSpaceSize()); b_block_desc_bk0_n_bk1.GetElementSpaceSize());
......
...@@ -381,8 +381,8 @@ struct ThreadwiseTensorSliceTransfer_v3r1 ...@@ -381,8 +381,8 @@ struct ThreadwiseTensorSliceTransfer_v3r1
(is_same<f8_t, remove_cvref_t<DstData>>::value && (is_same<f8_t, remove_cvref_t<DstData>>::value &&
SrcScalarPerVector % 4 == 0 && DstScalarPerVector % 4 == 0))) SrcScalarPerVector % 4 == 0 && DstScalarPerVector % 4 == 0)))
{ {
static_assert(is_same_v<remove_cvref_t<SrcData>, pk_i4_t>, //static_assert(is_same_v<remove_cvref_t<SrcData>, pk_i4_t>,
"transpose is not allowed for pk_i4_t"); //"transpose is not allowed for pk_i4_t");
#if 1 #if 1
// each transpose does // each transpose does
// DstScalarPerVector # of src vectors in src_thread_scratch_ // DstScalarPerVector # of src vectors in src_thread_scratch_
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment