Commit f785032d authored by Jehandad Khan's avatar Jehandad Khan
Browse files

Create host_api interface

parent 6b1b28eb
......@@ -7,7 +7,6 @@
#include <vector>
#include "client_app_impl.hpp"
enum ConvDataType
{
F32_F32_F32, // 0
......@@ -84,13 +83,7 @@ int main(int argc, char* argv[])
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - YEff) / conv_stride_h + 1;
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - XEff) / conv_stride_w + 1;
ck::app::profile_conv_fwd_impl<2,
float,
float,
float,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
ck::app::profile_conv_fwd_impl(
do_verification,
init_method,
do_log,
......
#pragma once
#include "config.hpp"
#include "device.hpp"
#include "tensor_layout.hpp"
#include "device_conv_fwd.hpp"
#include "element_wise_operation.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace device_conv2d_fwd_instance {
using DeviceConvFwdNoOpPtr = DeviceConvFwdPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough>;
void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(std::vector<DeviceConvFwdNoOpPtr>&);
#include "host_interface.hpp"
void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instances(
std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(std::vector<DeviceConvFwdNoOpPtr>&);
} // namespace device_conv2d_fwd_instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
struct DeviceMem
{
float* ptr_mem=nullptr;
int size;
DeviceMem(int _size): size(_size){}
float* GetDeviceBuffer()
{
return ptr_mem;
}
};
namespace ck {
namespace app {
template <int NDimSpatial,
typename InDataType,
typename WeiDataType,
typename OutDataType,
typename InLayout,
typename WeiLayout,
typename OutLayout>
void profile_conv_fwd_impl(int do_verification,
int init_method,
bool do_log,
......@@ -62,36 +40,26 @@ void profile_conv_fwd_impl(int do_verification,
const ck::index_t Ho = output_spatial_lengths[0];
const ck::index_t Wo = output_spatial_lengths[1];
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
const auto in_element_op = InElementOp{};
const auto wei_element_op = WeiElementOp{};
const auto out_element_op = OutElementOp{};
const auto in_sz = 1000;
const auto wei_sz = 1000;
const auto out_sz = 1000;
using WeiDataType = float;
using InDataType = float;
using OutDataType = float;
DeviceMem in_device_buf(sizeof(InDataType) * in_sz);
DeviceMem wei_device_buf(sizeof(WeiDataType) * wei_sz);
DeviceMem out_device_buf(sizeof(OutDataType) * out_sz);
// data is already on device!
// in_device_buf.ToDevice(in_n_c_hi_wi.mData.data());
// wei_device_buf.ToDevice(wei_k_c_y_x.mData.data());
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using DeviceConvFwdNoOpPtr =
ck::tensor_operation::device::DeviceConvFwdPtr<PassThrough, PassThrough, PassThrough>;
// add device Conv instances
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
std::vector<DeviceConvFwdPtr_t> conv_ptrs;
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(conv_ptrs);
if(conv_ptrs.size() <= 0)
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances_t(conv_ptrs);
if(conv_ptrs.empty())
{
throw std::runtime_error("wrong! no device Conv instance found");
}
......@@ -104,10 +72,10 @@ void profile_conv_fwd_impl(int do_verification,
// profile device Conv instances
for(auto& conv_ptr : conv_ptrs)
{
auto argument_ptr = conv_ptr->MakeArgumentPointer(
static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
static_cast<WeiDataType*>(wei_device_buf.GetDeviceBuffer()),
static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()),
auto argument_ptr = conv_ptr.MakeArgumentPointer(
static_cast<void*>(in_device_buf.GetDeviceBuffer()),
static_cast<void*>(wei_device_buf.GetDeviceBuffer()),
static_cast<void*>(out_device_buf.GetDeviceBuffer()),
N,
K,
C,
......@@ -117,16 +85,13 @@ void profile_conv_fwd_impl(int do_verification,
conv_filter_strides,
conv_filter_dilations,
input_left_pads,
input_right_pads,
in_element_op,
wei_element_op,
out_element_op);
input_right_pads);
auto invoker_ptr = conv_ptr->MakeInvokerPointer();
auto invoker_ptr = conv_ptr.MakeInvokerPointer();
if(conv_ptr->IsSupportedArgument(argument_ptr.get()))
//if(conv_ptr.IsSupportedArgument(argument_ptr.get()))
{
std::string conv_name = conv_ptr->GetTypeString();
std::string conv_name = conv_ptr.GetTypeString();
float ave_time = invoker_ptr->Run(argument_ptr.get(), nrepeat);
......
......@@ -22,7 +22,7 @@ struct BaseInvoker
BaseInvoker(const BaseInvoker&) = default;
BaseInvoker& operator=(const BaseInvoker&) = default;
virtual float Run(const BaseArgument*, int = 1) = 0;
virtual float Run(const BaseArgument*, int = 1){return -1;}
virtual ~BaseInvoker() {}
};
......@@ -33,8 +33,8 @@ struct BaseOperator
BaseOperator(const BaseOperator&) = default;
BaseOperator& operator=(const BaseOperator&) = default;
virtual bool IsSupportedArgument(const BaseArgument*) = 0;
virtual std::string GetTypeString() const = 0;
virtual bool IsSupportedArgument(const BaseArgument*){return false;}
virtual std::string GetTypeString() const {return "";}
virtual ~BaseOperator() {}
};
......
#pragma once
#include <memory>
#include <string>
#include "config.hpp"
#include "device_base.hpp"
struct DeviceConvFwdPtr_t
{
using BaseArgument = ck::tensor_operation::device::BaseArgument;
using BaseInvoker = ck::tensor_operation::device::BaseInvoker;
struct DeviceConvFwdPtrImpl;
std::unique_ptr<DeviceConvFwdPtrImpl> pImpl;
DeviceConvFwdPtr_t();
~DeviceConvFwdPtr_t();
DeviceConvFwdPtr_t(DeviceConvFwdPtr_t&&);
DeviceConvFwdPtr_t(DeviceConvFwdPtrImpl&);
DeviceConvFwdPtr_t& operator=(DeviceConvFwdPtr_t&) = delete;
DeviceConvFwdPtr_t& operator=(const DeviceConvFwdPtr_t&)=delete;
std::unique_ptr<BaseArgument> MakeArgumentPointer(void* in_ptr, void* wei_ptr, void* out_ptr,
size_t N, size_t K, size_t C,
std::vector<ck::index_t> input_spatial_lengths,
std::vector<ck::index_t> filter_spatial_lengths,
std::vector<ck::index_t> output_spatial_lengths,
std::vector<ck::index_t> conv_filter_strides,
std::vector<ck::index_t> conv_filter_dilations,
std::vector<ck::index_t> input_left_pads,
std::vector<ck::index_t> input_right_pads); // in,wei and out element ops are ignored for now since even if we change them, they cant be linked
std::unique_ptr<BaseInvoker> MakeInvokerPointer(); // requires including BaseInvoker headers
std::string GetTypeString();
};
void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances_t(std::vector<DeviceConvFwdPtr_t>& instances);
......@@ -31,13 +31,6 @@ install(TARGETS host_tensor
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
install(DIRECTORY
${PROJECT_SOURCE_DIR}/include/ck
${PROJECT_SOURCE_DIR}/include/ck/utility
${PROJECT_SOURCE_DIR}/library/include/ck/library/host_tensor
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
install(EXPORT host_tensorTargets
FILE composable_kernelhost_tensorTargets.cmake
NAMESPACE composable_kernel::
......
......@@ -11,6 +11,7 @@ include_directories(BEFORE
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/thread
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/element
${PROJECT_SOURCE_DIR}/library/include/ck/library/host_tensor
${PROJECT_SOURCE_DIR}/library/include/ck/library/host
${PROJECT_SOURCE_DIR}/library/include/ck/library/tensor_operation_instance
${PROJECT_SOURCE_DIR}/library/include/ck/library/tensor_operation_instance/gpu/reduce
${PROJECT_SOURCE_DIR}/external/include/half
......@@ -44,27 +45,13 @@ add_library(device_operations STATIC
$<TARGET_OBJECTS:device_reduce_instance>
)
add_library(composablekernels::device_operations ALIAS device_operations)
set(DEV_OPS_INC_DIRS
${PROJECT_SOURCE_DIR}/include/ck
${PROJECT_SOURCE_DIR}/include/ck/utility
${PROJECT_SOURCE_DIR}/include/ck/tensor_description
${PROJECT_SOURCE_DIR}/include/ck/tensor
${PROJECT_SOURCE_DIR}/include/ck/problem_transform
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/device
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/grid
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/block
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/warp
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/thread
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/element
${PROJECT_SOURCE_DIR}/library/include/ck/library/host_tensor
${PROJECT_SOURCE_DIR}/library/include/ck/library/tensor_operation_instance
${PROJECT_SOURCE_DIR}/library/include/ck/library/tensor_operation_instance/gpu/reduce
${PROJECT_SOURCE_DIR}/external/include/half
${PROJECT_SOURCE_DIR}/include/ck/
${PROJECT_SOURCE_DIR}/library/include/ck/
${PROJECT_SOURCE_DIR}/external/include/
)
# target_include_directories(device_operations PUBLIC
# $<INSTALL_INTERFACE:${DEV_OPS_INC_DIRS}>
# )
target_compile_features(device_operations PUBLIC)
set_target_properties(device_operations PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(device_operations PUBLIC
......@@ -80,9 +67,10 @@ target_include_directories(device_operations PUBLIC
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/thread>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/element>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/host_tensor>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/host>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu/reduce>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/half>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/half>
)
#once new arches are enabled make this an option on the main cmake file
......@@ -101,7 +89,7 @@ install(TARGETS device_operations
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
install(DIRECTORY ${DEV_OPS_INC_DIRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
install(DIRECTORY ${DEV_OPS_INC_DIRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ck)
install(EXPORT device_operationsTargets
FILE composable_kerneldevice_operationsTargets.cmake
NAMESPACE composable_kernel::
......
# device_conv2d_fwd_instance
set(DEVICE_CONV2D_FWD_INSTANCE_SOURCE
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp;
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance2.cpp;
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp;
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp;
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment