Unverified Commit fb1cbf02 authored by cloudhan's avatar cloudhan Committed by GitHub
Browse files

Change all device operations to use add_instance_library (#338)



* Change all device operations to use add_instance_library to avoid duplicated cmake configuration.

* update DeviceMem
Co-authored-by: default avatarChao Liu <chao.liu2@amd.com>
parent 0bd6b842
set(DEVICE_GEMM_BIAS_ADD_REDUCE_INSTANCE_SOURCE
add_instance_library(device_gemm_bias_add_reduce_instance
device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp
device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp
device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp
device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp
)
add_library(device_gemm_bias_add_reduce_instance OBJECT ${DEVICE_GEMM_BIAS_ADD_REDUCE_INSTANCE_SOURCE})
target_compile_features(device_gemm_bias_add_reduce_instance PUBLIC)
set_target_properties(device_gemm_bias_add_reduce_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
clang_tidy_check(device_gemm_bias_add_reduce_instance)
# device_gemm_bilinear_instance
set(DEVICE_GEMM_BILINEAR_INSTANCE_SOURCE
device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp;
device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp;
device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp;
device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp;
add_instance_library(device_gemm_bilinear_instance
device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp
device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp
device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp
device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp
)
add_library(device_gemm_bilinear_instance OBJECT ${DEVICE_GEMM_BILINEAR_INSTANCE_SOURCE})
set_target_properties(device_gemm_bilinear_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
clang_tidy_check(device_gemm_bilinear_instance)
set(DEVICE_GEMM_REDUCE_INSTANCE_SOURCE
add_instance_library(device_gemm_reduce_instance
device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp
device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp
device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp
device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp
)
add_instance_library(device_gemm_reduce_instance ${DEVICE_GEMM_REDUCE_INSTANCE_SOURCE})
rocm_install(TARGETS device_gemm_reduce_instance)
clang_tidy_check(device_gemm_reduce_instance)
set(DEVICE_GEMM_SPLITK_INSTANCE_SOURCE
device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp;
device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp;
device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp;
device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp;
device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp;
device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp;
device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp;
device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp;
add_instance_library(device_gemm_splitk_instance
device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp
device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp
device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp
device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp
device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp
device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp
device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp
device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp
)
add_library(device_gemm_splitk_instance OBJECT ${DEVICE_GEMM_SPLITK_INSTANCE_SOURCE})
target_compile_features(device_gemm_splitk_instance PUBLIC)
set_target_properties(device_gemm_splitk_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
# device_grouped_conv1d_fwd_instance
set(DEVICE_GROUPED_CONV1D_FWD_INSTANCE_SOURCE
device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp;
device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp;
device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp;
device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp;
add_instance_library(device_grouped_conv1d_fwd_instance
device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp
device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp
device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp
device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp
)
add_library(device_grouped_conv1d_fwd_instance OBJECT ${DEVICE_GROUPED_CONV1D_FWD_INSTANCE_SOURCE})
set_target_properties(device_grouped_conv1d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
clang_tidy_check(device_grouped_conv1d_fwd_instance)
# device_grouped_conv2d_fwd_instance
set(DEVICE_GROUPED_CONV2D_FWD_INSTANCE_SOURCE
add_instance_library(device_grouped_conv2d_fwd_instance
# GNHWC, GKYXC, GNHWK
device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp;
device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp;
device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp;
device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_int8_instance.cpp;
device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp
device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp
device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp
device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_int8_instance.cpp
# NHWGC, GKYXC, NHWGK
device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp;
device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp
)
add_library(device_grouped_conv2d_fwd_instance OBJECT ${DEVICE_GROUPED_CONV2D_FWD_INSTANCE_SOURCE})
set_target_properties(device_grouped_conv2d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
clang_tidy_check(device_grouped_conv2d_fwd_instance)
# device_grouped_conv3d_fwd_instance
set(DEVICE_GROUPED_CONV3D_FWD_INSTANCE_SOURCE
device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp;
device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp;
device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp;
device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp;
add_library(device_grouped_conv3d_fwd_instance
device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp
device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp
device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp
device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp
)
add_library(device_grouped_conv3d_fwd_instance OBJECT ${DEVICE_GROUPED_CONV3D_FWD_INSTANCE_SOURCE})
set_target_properties(device_grouped_conv3d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
clang_tidy_check(device_grouped_conv3d_fwd_instance)
# device_grouped_gemm_instance
set(DEVICE_GROUPED_GEMM_INSTANCE_SOURCE
device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp;
device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp;
device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp;
device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp;
add_instance_library(device_grouped_gemm_instance
device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp
device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp
device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp
device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp
)
add_library(device_grouped_gemm_instance OBJECT ${DEVICE_GROUPED_GEMM_INSTANCE_SOURCE})
target_compile_features(device_grouped_gemm_instance PUBLIC)
set_target_properties(device_grouped_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
rocm_install(TARGETS device_grouped_gemm_instance)
clang_tidy_check(device_grouped_gemm_instance)
# device_normalization_instance
set(DEVICE_NORMALIZATION_INSTANCE_SOURCE
add_instance_library(device_normalization_instance
device_layernorm_f16_instance.cpp
device_layernorm_f32_instance.cpp
device_softmax_f32_f32_instance.cpp
device_softmax_f16_f16_instance.cpp
)
add_library(device_normalization_instance OBJECT ${DEVICE_NORMALIZATION_INSTANCE_SOURCE})
set_target_properties(device_normalization_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
clang_tidy_check(device_normalization_instance)
# device_reduce_instance
set(DEVICE_REDUCE_INSTANCE_SOURCE
device_reduce_instance_blockwise_f16_f16_f16.cpp;
device_reduce_instance_blockwise_f16_f32_f16.cpp;
device_reduce_instance_blockwise_f32_f32_f32.cpp;
device_reduce_instance_blockwise_f32_f64_f32.cpp;
device_reduce_instance_blockwise_f64_f64_f64.cpp;
device_reduce_instance_blockwise_i8_i32_i8.cpp;
device_reduce_instance_blockwise_i8_i8_i8.cpp;
device_reduce_instance_blockwise_b16_f32_b16.cpp;
device_reduce_instance_threadwise_f16_f16_f16.cpp;
device_reduce_instance_threadwise_f16_f32_f16.cpp;
device_reduce_instance_threadwise_f32_f32_f32.cpp;
device_reduce_instance_threadwise_f32_f64_f32.cpp;
device_reduce_instance_threadwise_f64_f64_f64.cpp;
device_reduce_instance_threadwise_i8_i32_i8.cpp;
device_reduce_instance_threadwise_i8_i8_i8.cpp;
device_reduce_instance_threadwise_b16_f32_b16.cpp;
device_reduce_instance_multiblock_atomic_add_f16_f32_f32.cpp;
device_reduce_instance_multiblock_atomic_add_f32_f32_f32.cpp;
device_reduce_instance_multiblock_atomic_add_f32_f64_f32.cpp;
device_reduce_instance_multiblock_atomic_add_f64_f64_f64.cpp;
device_reduce_instance_multiblock_atomic_add_b16_f32_f32.cpp;
add_instance_library(device_reduce_instance
device_reduce_instance_blockwise_f16_f16_f16.cpp
device_reduce_instance_blockwise_f16_f32_f16.cpp
device_reduce_instance_blockwise_f32_f32_f32.cpp
device_reduce_instance_blockwise_f32_f64_f32.cpp
device_reduce_instance_blockwise_f64_f64_f64.cpp
device_reduce_instance_blockwise_i8_i32_i8.cpp
device_reduce_instance_blockwise_i8_i8_i8.cpp
device_reduce_instance_blockwise_b16_f32_b16.cpp
device_reduce_instance_threadwise_f16_f16_f16.cpp
device_reduce_instance_threadwise_f16_f32_f16.cpp
device_reduce_instance_threadwise_f32_f32_f32.cpp
device_reduce_instance_threadwise_f32_f64_f32.cpp
device_reduce_instance_threadwise_f64_f64_f64.cpp
device_reduce_instance_threadwise_i8_i32_i8.cpp
device_reduce_instance_threadwise_i8_i8_i8.cpp
device_reduce_instance_threadwise_b16_f32_b16.cpp
device_reduce_instance_multiblock_atomic_add_f16_f32_f32.cpp
device_reduce_instance_multiblock_atomic_add_f32_f32_f32.cpp
device_reduce_instance_multiblock_atomic_add_f32_f64_f32.cpp
device_reduce_instance_multiblock_atomic_add_f64_f64_f64.cpp
device_reduce_instance_multiblock_atomic_add_b16_f32_f32.cpp
)
add_library(device_reduce_instance OBJECT ${DEVICE_REDUCE_INSTANCE_SOURCE})
set_target_properties(device_reduce_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
clang_tidy_check(device_reduce_instance)
......@@ -10,20 +10,20 @@ DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
hip_check_error(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
}
void* DeviceMem::GetDeviceBuffer() { return mpDeviceBuf; }
void* DeviceMem::GetDeviceBuffer() const { return mpDeviceBuf; }
std::size_t DeviceMem::GetBufferSize() { return mMemSize; }
std::size_t DeviceMem::GetBufferSize() const { return mMemSize; }
void DeviceMem::ToDevice(const void* p)
void DeviceMem::ToDevice(const void* p) const
{
hip_check_error(hipMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, hipMemcpyHostToDevice));
}
void DeviceMem::FromDevice(void* p)
void DeviceMem::FromDevice(void* p) const
{
hip_check_error(hipMemcpy(p, mpDeviceBuf, mMemSize, hipMemcpyDeviceToHost));
}
void DeviceMem::SetZero() { hip_check_error(hipMemset(mpDeviceBuf, 0, mMemSize)); }
void DeviceMem::SetZero() const { hip_check_error(hipMemset(mpDeviceBuf, 0, mMemSize)); }
DeviceMem::~DeviceMem() { hip_check_error(hipFree(mpDeviceBuf)); }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment