Commit 9e15aa34 authored by Jing Zhang's avatar Jing Zhang
Browse files

fixed

parent f16f55af
......@@ -543,7 +543,7 @@ ENDIF()
ENDFOREACH()
add_custom_target(instances DEPENDS utility;${CK_DEVICE_INSTANCES} SOURCES ${INSTANCE_FILES})
#add_subdirectory(library)
add_subdirectory(library)
if(NOT GPU_ARCHS)
rocm_package_setup_component(tests
......@@ -565,7 +565,7 @@ rocm_package_setup_component(profiler
LIBRARY_NAME composablekernel
PACKAGE_NAME ckprofiler
)
#add_subdirectory(profiler)
add_subdirectory(profiler)
#if(CK_USE_CODEGEN AND (GPU_TARGETS MATCHES "gfx9" OR GPU_ARCHS))
#add_subdirectory(codegen)
......
......@@ -1756,7 +1756,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
static_cast<ADataType*>(p_shared_0), a_block_desc_ak0_m_ak1.GetElementSpaceSize());
auto b_block_buf_ping = make_dynamic_buffer<AddressSpaceEnum::Lds>(
static_cast<BDataType*>(static_cast<char*>(p_shared_0) +
bit_cast<BDataType*>(static_cast<char*>(p_shared_0) +
a_block_space_size_aligned * sizeof(ADataType)),
b_block_desc_bk0_n_bk1.GetElementSpaceSize());
......
......@@ -381,8 +381,8 @@ struct ThreadwiseTensorSliceTransfer_v3r1
(is_same<f8_t, remove_cvref_t<DstData>>::value &&
SrcScalarPerVector % 4 == 0 && DstScalarPerVector % 4 == 0)))
{
static_assert(is_same_v<remove_cvref_t<SrcData>, pk_i4_t>,
"transpose is not allowed for pk_i4_t");
//static_assert(is_same_v<remove_cvref_t<SrcData>, pk_i4_t>,
//"transpose is not allowed for pk_i4_t");
#if 1
// each transpose does
// DstScalarPerVector # of src vectors in src_thread_scratch_
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment