Commit f785032d authored by Jehandad Khan's avatar Jehandad Khan
Browse files

Create host_api interface

parent 6b1b28eb
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
#include <vector> #include <vector>
#include "client_app_impl.hpp" #include "client_app_impl.hpp"
enum ConvDataType enum ConvDataType
{ {
F32_F32_F32, // 0 F32_F32_F32, // 0
...@@ -84,13 +83,7 @@ int main(int argc, char* argv[]) ...@@ -84,13 +83,7 @@ int main(int argc, char* argv[])
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - YEff) / conv_stride_h + 1; const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - YEff) / conv_stride_h + 1;
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - XEff) / conv_stride_w + 1; const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - XEff) / conv_stride_w + 1;
ck::app::profile_conv_fwd_impl<2, ck::app::profile_conv_fwd_impl(
float,
float,
float,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
do_verification, do_verification,
init_method, init_method,
do_log, do_log,
......
#pragma once #pragma once
#include "config.hpp"
#include "device.hpp"
#include "tensor_layout.hpp"
#include "device_conv_fwd.hpp"
#include "element_wise_operation.hpp"
namespace ck { #include "host_interface.hpp"
namespace tensor_operation {
namespace device {
namespace device_conv2d_fwd_instance {
using DeviceConvFwdNoOpPtr = DeviceConvFwdPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough>;
void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instances( struct DeviceMem
std::vector<DeviceConvFwdNoOpPtr>&); {
float* ptr_mem=nullptr;
void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances(std::vector<DeviceConvFwdNoOpPtr>&); int size;
DeviceMem(int _size): size(_size){}
void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(std::vector<DeviceConvFwdNoOpPtr>&); float* GetDeviceBuffer()
} // namespace device_conv2d_fwd_instance {
} // namespace device return ptr_mem;
} // namespace tensor_operation }
} // namespace ck };
namespace ck { namespace ck {
namespace app { namespace app {
template <int NDimSpatial,
typename InDataType,
typename WeiDataType,
typename OutDataType,
typename InLayout,
typename WeiLayout,
typename OutLayout>
void profile_conv_fwd_impl(int do_verification, void profile_conv_fwd_impl(int do_verification,
int init_method, int init_method,
bool do_log, bool do_log,
...@@ -62,36 +40,26 @@ void profile_conv_fwd_impl(int do_verification, ...@@ -62,36 +40,26 @@ void profile_conv_fwd_impl(int do_verification,
const ck::index_t Ho = output_spatial_lengths[0]; const ck::index_t Ho = output_spatial_lengths[0];
const ck::index_t Wo = output_spatial_lengths[1]; const ck::index_t Wo = output_spatial_lengths[1];
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
const auto in_element_op = InElementOp{};
const auto wei_element_op = WeiElementOp{};
const auto out_element_op = OutElementOp{};
const auto in_sz = 1000; const auto in_sz = 1000;
const auto wei_sz = 1000; const auto wei_sz = 1000;
const auto out_sz = 1000; const auto out_sz = 1000;
using WeiDataType = float;
using InDataType = float;
using OutDataType = float;
DeviceMem in_device_buf(sizeof(InDataType) * in_sz); DeviceMem in_device_buf(sizeof(InDataType) * in_sz);
DeviceMem wei_device_buf(sizeof(WeiDataType) * wei_sz); DeviceMem wei_device_buf(sizeof(WeiDataType) * wei_sz);
DeviceMem out_device_buf(sizeof(OutDataType) * out_sz); DeviceMem out_device_buf(sizeof(OutDataType) * out_sz);
// data is already on device! // data is already on device!
// in_device_buf.ToDevice(in_n_c_hi_wi.mData.data());
// wei_device_buf.ToDevice(wei_k_c_y_x.mData.data());
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using DeviceConvFwdNoOpPtr =
ck::tensor_operation::device::DeviceConvFwdPtr<PassThrough, PassThrough, PassThrough>;
// add device Conv instances // add device Conv instances
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; std::vector<DeviceConvFwdPtr_t> conv_ptrs;
ck::tensor_operation::device::device_conv2d_fwd_instance:: add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances_t(conv_ptrs);
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(conv_ptrs); if(conv_ptrs.empty())
if(conv_ptrs.size() <= 0)
{ {
throw std::runtime_error("wrong! no device Conv instance found"); throw std::runtime_error("wrong! no device Conv instance found");
} }
...@@ -104,10 +72,10 @@ void profile_conv_fwd_impl(int do_verification, ...@@ -104,10 +72,10 @@ void profile_conv_fwd_impl(int do_verification,
// profile device Conv instances // profile device Conv instances
for(auto& conv_ptr : conv_ptrs) for(auto& conv_ptr : conv_ptrs)
{ {
auto argument_ptr = conv_ptr->MakeArgumentPointer( auto argument_ptr = conv_ptr.MakeArgumentPointer(
static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()), static_cast<void*>(in_device_buf.GetDeviceBuffer()),
static_cast<WeiDataType*>(wei_device_buf.GetDeviceBuffer()), static_cast<void*>(wei_device_buf.GetDeviceBuffer()),
static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()), static_cast<void*>(out_device_buf.GetDeviceBuffer()),
N, N,
K, K,
C, C,
...@@ -117,16 +85,13 @@ void profile_conv_fwd_impl(int do_verification, ...@@ -117,16 +85,13 @@ void profile_conv_fwd_impl(int do_verification,
conv_filter_strides, conv_filter_strides,
conv_filter_dilations, conv_filter_dilations,
input_left_pads, input_left_pads,
input_right_pads, input_right_pads);
in_element_op,
wei_element_op,
out_element_op);
auto invoker_ptr = conv_ptr->MakeInvokerPointer(); auto invoker_ptr = conv_ptr.MakeInvokerPointer();
if(conv_ptr->IsSupportedArgument(argument_ptr.get())) //if(conv_ptr.IsSupportedArgument(argument_ptr.get()))
{ {
std::string conv_name = conv_ptr->GetTypeString(); std::string conv_name = conv_ptr.GetTypeString();
float ave_time = invoker_ptr->Run(argument_ptr.get(), nrepeat); float ave_time = invoker_ptr->Run(argument_ptr.get(), nrepeat);
......
...@@ -22,7 +22,7 @@ struct BaseInvoker ...@@ -22,7 +22,7 @@ struct BaseInvoker
BaseInvoker(const BaseInvoker&) = default; BaseInvoker(const BaseInvoker&) = default;
BaseInvoker& operator=(const BaseInvoker&) = default; BaseInvoker& operator=(const BaseInvoker&) = default;
virtual float Run(const BaseArgument*, int = 1) = 0; virtual float Run(const BaseArgument*, int = 1){return -1;}
virtual ~BaseInvoker() {} virtual ~BaseInvoker() {}
}; };
...@@ -33,8 +33,8 @@ struct BaseOperator ...@@ -33,8 +33,8 @@ struct BaseOperator
BaseOperator(const BaseOperator&) = default; BaseOperator(const BaseOperator&) = default;
BaseOperator& operator=(const BaseOperator&) = default; BaseOperator& operator=(const BaseOperator&) = default;
virtual bool IsSupportedArgument(const BaseArgument*) = 0; virtual bool IsSupportedArgument(const BaseArgument*){return false;}
virtual std::string GetTypeString() const = 0; virtual std::string GetTypeString() const {return "";}
virtual ~BaseOperator() {} virtual ~BaseOperator() {}
}; };
......
#pragma once
#include <memory>
#include <string>
#include "config.hpp"
#include "device_base.hpp"
struct DeviceConvFwdPtr_t
{
using BaseArgument = ck::tensor_operation::device::BaseArgument;
using BaseInvoker = ck::tensor_operation::device::BaseInvoker;
struct DeviceConvFwdPtrImpl;
std::unique_ptr<DeviceConvFwdPtrImpl> pImpl;
DeviceConvFwdPtr_t();
~DeviceConvFwdPtr_t();
DeviceConvFwdPtr_t(DeviceConvFwdPtr_t&&);
DeviceConvFwdPtr_t(DeviceConvFwdPtrImpl&);
DeviceConvFwdPtr_t& operator=(DeviceConvFwdPtr_t&) = delete;
DeviceConvFwdPtr_t& operator=(const DeviceConvFwdPtr_t&)=delete;
std::unique_ptr<BaseArgument> MakeArgumentPointer(void* in_ptr, void* wei_ptr, void* out_ptr,
size_t N, size_t K, size_t C,
std::vector<ck::index_t> input_spatial_lengths,
std::vector<ck::index_t> filter_spatial_lengths,
std::vector<ck::index_t> output_spatial_lengths,
std::vector<ck::index_t> conv_filter_strides,
std::vector<ck::index_t> conv_filter_dilations,
std::vector<ck::index_t> input_left_pads,
std::vector<ck::index_t> input_right_pads); // in,wei and out element ops are ignored for now since even if we change them, they cant be linked
std::unique_ptr<BaseInvoker> MakeInvokerPointer(); // requires including BaseInvoker headers
std::string GetTypeString();
};
void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances_t(std::vector<DeviceConvFwdPtr_t>& instances);
...@@ -31,13 +31,6 @@ install(TARGETS host_tensor ...@@ -31,13 +31,6 @@ install(TARGETS host_tensor
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
) )
install(DIRECTORY
${PROJECT_SOURCE_DIR}/include/ck
${PROJECT_SOURCE_DIR}/include/ck/utility
${PROJECT_SOURCE_DIR}/library/include/ck/library/host_tensor
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
install(EXPORT host_tensorTargets install(EXPORT host_tensorTargets
FILE composable_kernelhost_tensorTargets.cmake FILE composable_kernelhost_tensorTargets.cmake
NAMESPACE composable_kernel:: NAMESPACE composable_kernel::
......
...@@ -11,6 +11,7 @@ include_directories(BEFORE ...@@ -11,6 +11,7 @@ include_directories(BEFORE
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/thread ${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/thread
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/element ${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/element
${PROJECT_SOURCE_DIR}/library/include/ck/library/host_tensor ${PROJECT_SOURCE_DIR}/library/include/ck/library/host_tensor
${PROJECT_SOURCE_DIR}/library/include/ck/library/host
${PROJECT_SOURCE_DIR}/library/include/ck/library/tensor_operation_instance ${PROJECT_SOURCE_DIR}/library/include/ck/library/tensor_operation_instance
${PROJECT_SOURCE_DIR}/library/include/ck/library/tensor_operation_instance/gpu/reduce ${PROJECT_SOURCE_DIR}/library/include/ck/library/tensor_operation_instance/gpu/reduce
${PROJECT_SOURCE_DIR}/external/include/half ${PROJECT_SOURCE_DIR}/external/include/half
...@@ -44,27 +45,13 @@ add_library(device_operations STATIC ...@@ -44,27 +45,13 @@ add_library(device_operations STATIC
$<TARGET_OBJECTS:device_reduce_instance> $<TARGET_OBJECTS:device_reduce_instance>
) )
add_library(composablekernels::device_operations ALIAS device_operations) add_library(composablekernels::device_operations ALIAS device_operations)
set(DEV_OPS_INC_DIRS set(DEV_OPS_INC_DIRS
${PROJECT_SOURCE_DIR}/include/ck ${PROJECT_SOURCE_DIR}/include/ck/
${PROJECT_SOURCE_DIR}/include/ck/utility ${PROJECT_SOURCE_DIR}/library/include/ck/
${PROJECT_SOURCE_DIR}/include/ck/tensor_description ${PROJECT_SOURCE_DIR}/external/include/
${PROJECT_SOURCE_DIR}/include/ck/tensor
${PROJECT_SOURCE_DIR}/include/ck/problem_transform
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/device
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/grid
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/block
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/warp
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/thread
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/element
${PROJECT_SOURCE_DIR}/library/include/ck/library/host_tensor
${PROJECT_SOURCE_DIR}/library/include/ck/library/tensor_operation_instance
${PROJECT_SOURCE_DIR}/library/include/ck/library/tensor_operation_instance/gpu/reduce
${PROJECT_SOURCE_DIR}/external/include/half
) )
# target_include_directories(device_operations PUBLIC
# $<INSTALL_INTERFACE:${DEV_OPS_INC_DIRS}>
# )
target_compile_features(device_operations PUBLIC) target_compile_features(device_operations PUBLIC)
set_target_properties(device_operations PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(device_operations PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(device_operations PUBLIC target_include_directories(device_operations PUBLIC
...@@ -80,9 +67,10 @@ target_include_directories(device_operations PUBLIC ...@@ -80,9 +67,10 @@ target_include_directories(device_operations PUBLIC
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/thread> $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/thread>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/element> $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/element>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/host_tensor> $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/host_tensor>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/host>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance> $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu/reduce> $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu/reduce>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/half> $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/half>
) )
#once new arches are enabled make this an option on the main cmake file #once new arches are enabled make this an option on the main cmake file
...@@ -101,7 +89,7 @@ install(TARGETS device_operations ...@@ -101,7 +89,7 @@ install(TARGETS device_operations
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
) )
install(DIRECTORY ${DEV_OPS_INC_DIRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) install(DIRECTORY ${DEV_OPS_INC_DIRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ck)
install(EXPORT device_operationsTargets install(EXPORT device_operationsTargets
FILE composable_kerneldevice_operationsTargets.cmake FILE composable_kerneldevice_operationsTargets.cmake
NAMESPACE composable_kernel:: NAMESPACE composable_kernel::
......
# device_conv2d_fwd_instance # device_conv2d_fwd_instance
set(DEVICE_CONV2D_FWD_INSTANCE_SOURCE set(DEVICE_CONV2D_FWD_INSTANCE_SOURCE
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp; device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance2.cpp;
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp; device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp;
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp; device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp;
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp; device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment