"...composable_kernel.git" did not exist on "c5138aa1304916ba7bb7e8d05ce2f3518e9b3bd0"
Commit bfcf33e1 authored by Adam Osewski's avatar Adam Osewski
Browse files

Uncomment code commented for debuggin.

parent 7546eb7a
...@@ -16,83 +16,83 @@ namespace tensor_operation { ...@@ -16,83 +16,83 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
// void add_device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instances( void add_device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instances(
// std::vector<std::unique_ptr<DeviceGroupedGemm<Row, std::vector<std::unique_ptr<DeviceGroupedGemm<Row,
// Row, Row,
// Empty_Tuple, Empty_Tuple,
// Row, Row,
// F16, F16,
// F16, F16,
// Empty_Tuple, Empty_Tuple,
// F16, F16,
// PassThrough, PassThrough,
// PassThrough, PassThrough,
// PassThrough>>>& instances); PassThrough>>>& instances);
// void add_device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instances( void add_device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instances(
// std::vector<std::unique_ptr<DeviceGroupedGemm<Row, std::vector<std::unique_ptr<DeviceGroupedGemm<Row,
// Col, Col,
// Empty_Tuple, Empty_Tuple,
// Row, Row,
// F16, F16,
// F16, F16,
// Empty_Tuple, Empty_Tuple,
// F16, F16,
// PassThrough, PassThrough,
// PassThrough, PassThrough,
// PassThrough>>>& instances); PassThrough>>>& instances);
// void add_device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instances( void add_device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instances(
// std::vector<std::unique_ptr<DeviceGroupedGemm<Col, std::vector<std::unique_ptr<DeviceGroupedGemm<Col,
// Row, Row,
// Empty_Tuple, Empty_Tuple,
// Row, Row,
// F16, F16,
// F16, F16,
// Empty_Tuple, Empty_Tuple,
// F16, F16,
// PassThrough, PassThrough,
// PassThrough, PassThrough,
// PassThrough>>>& instances); PassThrough>>>& instances);
// void add_device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instances( void add_device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instances(
// std::vector<std::unique_ptr<DeviceGroupedGemm<Col, std::vector<std::unique_ptr<DeviceGroupedGemm<Col,
// Col, Col,
// Empty_Tuple, Empty_Tuple,
// Row, Row,
// F16, F16,
// F16, F16,
// Empty_Tuple, Empty_Tuple,
// F16, F16,
// PassThrough, PassThrough,
// PassThrough, PassThrough,
// PassThrough>>>& instances); PassThrough>>>& instances);
// void add_device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instances( void add_device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instances(
// std::vector<std::unique_ptr<DeviceGroupedGemm<Row, std::vector<std::unique_ptr<DeviceGroupedGemm<Row,
// Col, Col,
// Empty_Tuple, Empty_Tuple,
// Row, Row,
// F16, F16,
// F16, F16,
// Empty_Tuple, Empty_Tuple,
// F16, F16,
// PassThrough, PassThrough,
// PassThrough, PassThrough,
// PassThrough>>>& instances); PassThrough>>>& instances);
// void add_device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instances( void add_device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instances(
// std::vector<std::unique_ptr<DeviceGroupedGemm<Row, std::vector<std::unique_ptr<DeviceGroupedGemm<Row,
// Row, Row,
// Empty_Tuple, Empty_Tuple,
// Row, Row,
// F16, F16,
// F16, F16,
// Empty_Tuple, Empty_Tuple,
// F16, F16,
// PassThrough, PassThrough,
// PassThrough, PassThrough,
// PassThrough>>>& instances); PassThrough>>>& instances);
void add_device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instances( void add_device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instances(
std::vector<std::unique_ptr<DeviceGroupedGemm<Row, std::vector<std::unique_ptr<DeviceGroupedGemm<Row,
...@@ -160,28 +160,28 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGroupe ...@@ -160,28 +160,28 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGroupe
if constexpr(is_same_v<ALayout, Row> && is_same_v<BLayout, Row> && if constexpr(is_same_v<ALayout, Row> && is_same_v<BLayout, Row> &&
is_same_v<ELayout, Row>) is_same_v<ELayout, Row>)
{ {
// add_device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instances(op_ptrs); add_device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instances(op_ptrs);
// add_device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instances(op_ptrs); add_device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instances(op_ptrs);
add_device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instances( add_device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instances(
op_ptrs); op_ptrs);
} }
else if constexpr(is_same_v<ALayout, Row> && is_same_v<BLayout, Col> && else if constexpr(is_same_v<ALayout, Row> && is_same_v<BLayout, Col> &&
is_same_v<ELayout, Row>) is_same_v<ELayout, Row>)
{ {
// add_device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instances(op_ptrs); add_device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instances(op_ptrs);
// add_device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instances(op_ptrs); add_device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instances(op_ptrs);
add_device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instances( add_device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instances(
op_ptrs); op_ptrs);
} }
else if constexpr(is_same_v<ALayout, Col> && is_same_v<BLayout, Row> && else if constexpr(is_same_v<ALayout, Col> && is_same_v<BLayout, Row> &&
is_same_v<ELayout, Row>) is_same_v<ELayout, Row>)
{ {
// add_device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instances(op_ptrs); add_device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instances(op_ptrs);
} }
else if constexpr(is_same_v<ALayout, Col> && is_same_v<BLayout, Col> && else if constexpr(is_same_v<ALayout, Col> && is_same_v<BLayout, Col> &&
is_same_v<ELayout, Row>) is_same_v<ELayout, Row>)
{ {
// add_device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instances(op_ptrs); add_device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instances(op_ptrs);
} }
} }
return op_ptrs; return op_ptrs;
......
...@@ -9,18 +9,14 @@ endfunction(add_instance_library INSTANCE_NAME) ...@@ -9,18 +9,14 @@ endfunction(add_instance_library INSTANCE_NAME)
file(GLOB dir_list LIST_DIRECTORIES true *) file(GLOB dir_list LIST_DIRECTORIES true *)
set(CK_DEVICE_INSTANCES) set(CK_DEVICE_INSTANCES)
# FOREACH(subdir_path ${dir_list}) FOREACH(subdir_path ${dir_list})
# set(target_dir) set(target_dir)
# IF(IS_DIRECTORY "${subdir_path}") IF(IS_DIRECTORY "${subdir_path}")
# get_filename_component(target_dir ${subdir_path} NAME) get_filename_component(target_dir ${subdir_path} NAME)
# add_subdirectory(${target_dir}) add_subdirectory(${target_dir})
# list(APPEND CK_DEVICE_INSTANCES $<TARGET_OBJECTS:device_${target_dir}_instance>) list(APPEND CK_DEVICE_INSTANCES $<TARGET_OBJECTS:device_${target_dir}_instance>)
# ENDIF() ENDIF()
# ENDFOREACH() ENDFOREACH()
add_subdirectory(grouped_gemm)
add_subdirectory(gemm_splitk)
list(APPEND CK_DEVICE_INSTANCES $<TARGET_OBJECTS:device_grouped_gemm_instance>)
add_library(device_operations STATIC ${CK_DEVICE_INSTANCES}) add_library(device_operations STATIC ${CK_DEVICE_INSTANCES})
add_library(composablekernels::device_operations ALIAS device_operations) add_library(composablekernels::device_operations ALIAS device_operations)
......
add_instance_library(device_gemm_splitk_instance add_instance_library(device_gemm_splitk_instance
# device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp
# device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp
# device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp
# device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp
# device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp
device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp
# device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp
# device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp
) )
add_instance_library(device_grouped_gemm_instance add_instance_library(device_grouped_gemm_instance
# device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp
# device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp
# device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp
# device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp
# device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp
# device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp
device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp
device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp
) )
...@@ -28,7 +28,7 @@ using Empty_Tuple = ck::Tuple<>; ...@@ -28,7 +28,7 @@ using Empty_Tuple = ck::Tuple<>;
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
// static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default; static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
// a[m, k] * b[n, k] = e[m, n] // a[m, k] * b[n, k] = e[m, n]
using device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instances = std::tuple< using device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instances = std::tuple<
......
...@@ -89,10 +89,7 @@ bool profile_grouped_gemm_impl(int do_verification, ...@@ -89,10 +89,7 @@ bool profile_grouped_gemm_impl(int do_verification,
std::size_t num_thread = 1; std::size_t num_thread = 1;
switch(init_method) switch(init_method)
{ {
case 0: case 0: break;
utils::FillConstant<ADataType>{1.0}(a_m_k[i]);
utils::FillConstant<BDataType>{1.0}(b_k_n[i]);
break;
case 1: case 1:
a_m_k[i].GenerateTensorValue(GeneratorTensor_2<ADataType>{-5, 5}, num_thread); a_m_k[i].GenerateTensorValue(GeneratorTensor_2<ADataType>{-5, 5}, num_thread);
b_k_n[i].GenerateTensorValue(GeneratorTensor_2<BDataType>{-5, 5}, num_thread); b_k_n[i].GenerateTensorValue(GeneratorTensor_2<BDataType>{-5, 5}, num_thread);
......
# ckProfiler # ckProfiler
set(PROFILER_SOURCES set(PROFILER_SOURCES
profiler.cpp profiler.cpp
# profile_gemm.cpp profile_gemm.cpp
profile_gemm_splitk.cpp profile_gemm_splitk.cpp
# profile_gemm_bilinear.cpp profile_gemm_bilinear.cpp
# profile_gemm_bias_add_reduce.cpp profile_gemm_bias_add_reduce.cpp
# profile_gemm_add_add_fastgelu.cpp profile_gemm_add_add_fastgelu.cpp
# profile_gemm_add_multiply.cpp profile_gemm_add_multiply.cpp
# profile_gemm_add_fastgelu.cpp profile_gemm_add_fastgelu.cpp
# profile_gemm_add_relu_add_layernorm.cpp profile_gemm_add_relu_add_layernorm.cpp
# profile_gemm_fastgelu.cpp profile_gemm_fastgelu.cpp
# profile_gemm_reduce.cpp profile_gemm_reduce.cpp
# profile_batched_gemm.cpp profile_batched_gemm.cpp
# profile_batched_gemm_gemm.cpp profile_batched_gemm_gemm.cpp
# profile_batched_gemm_add_relu_gemm_add.cpp profile_batched_gemm_add_relu_gemm_add.cpp
# profile_batched_gemm_reduce.cpp profile_batched_gemm_reduce.cpp
profile_grouped_gemm.cpp profile_grouped_gemm.cpp
# profile_conv_fwd.cpp profile_conv_fwd.cpp
# profile_conv_fwd_bias_relu.cpp profile_conv_fwd_bias_relu.cpp
# profile_conv_fwd_bias_relu_add.cpp profile_conv_fwd_bias_relu_add.cpp
# profile_conv_bwd_data.cpp profile_conv_bwd_data.cpp
# profile_grouped_conv_fwd.cpp profile_grouped_conv_fwd.cpp
# profile_grouped_conv_bwd_weight.cpp profile_grouped_conv_bwd_weight.cpp
# profile_reduce.cpp profile_reduce.cpp
# profile_groupnorm.cpp profile_groupnorm.cpp
# profile_layernorm.cpp profile_layernorm.cpp
# profile_softmax.cpp profile_softmax.cpp
# profile_batchnorm_fwd.cpp profile_batchnorm_fwd.cpp
# profile_batchnorm_bwd.cpp profile_batchnorm_bwd.cpp
# profile_batchnorm_infer.cpp profile_batchnorm_infer.cpp
# profile_grouped_gemm_fastgelu.cpp profile_grouped_gemm_fastgelu.cpp
) )
set(PROFILER_EXECUTABLE ckProfiler) set(PROFILER_EXECUTABLE ckProfiler)
...@@ -38,36 +38,36 @@ add_executable(${PROFILER_EXECUTABLE} ${PROFILER_SOURCES}) ...@@ -38,36 +38,36 @@ add_executable(${PROFILER_EXECUTABLE} ${PROFILER_SOURCES})
target_compile_options(${PROFILER_EXECUTABLE} PRIVATE -Wno-global-constructors) target_compile_options(${PROFILER_EXECUTABLE} PRIVATE -Wno-global-constructors)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE utility) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE utility)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_splitk_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_splitk_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_bilinear_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_bilinear_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_add_fastgelu_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_add_fastgelu_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_multiply_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_multiply_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_fastgelu_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_fastgelu_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_fastgelu_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_fastgelu_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_relu_add_layernorm_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_relu_add_layernorm_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_reduce_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_reduce_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_bias_add_reduce_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_bias_add_reduce_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_gemm_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_gemm_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_add_relu_gemm_add_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_add_relu_gemm_add_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_reduce_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_reduce_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_gemm_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_gemm_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv1d_fwd_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv1d_fwd_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv2d_fwd_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv2d_fwd_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_fwd_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_fwd_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv1d_bwd_data_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv1d_bwd_data_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_bwd_data_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_bwd_data_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv3d_bwd_data_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv3d_bwd_data_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv1d_bwd_weight_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv1d_bwd_weight_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv2d_bwd_weight_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv2d_bwd_weight_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_bwd_weight_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_bwd_weight_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_bias_relu_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_bias_relu_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_bias_relu_add_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_bias_relu_add_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_normalization_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_normalization_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_softmax_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_softmax_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_reduce_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_reduce_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batchnorm_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batchnorm_instance)
# target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_gemm_fastgelu_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_gemm_fastgelu_instance)
rocm_install(TARGETS ${PROFILER_EXECUTABLE} COMPONENT profiler) rocm_install(TARGETS ${PROFILER_EXECUTABLE} COMPONENT profiler)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment