Commit 6fc49f91 authored by Chao Liu's avatar Chao Liu
Browse files

remove deprecated tensor descriptor

parent 506a823a
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
#define CK_CONSTANT_MATRIX_DESCRIPTOR_HPP #define CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor_deprecated.hpp"
#include "tensor_descriptor.hpp" #include "tensor_descriptor.hpp"
namespace ck { namespace ck {
...@@ -58,18 +57,6 @@ __host__ __device__ constexpr auto ...@@ -58,18 +57,6 @@ __host__ __device__ constexpr auto
return ConstantMatrixDescriptor<NRow, NCol, RowStride>{}; return ConstantMatrixDescriptor<NRow, NCol, RowStride>{};
} }
template <typename... Ts>
__host__ __device__ constexpr auto
make_ConstantMatrixDescriptor(ConstantTensorDescriptor_deprecated<Ts...>)
{
using TDesc = ConstantTensorDescriptor_deprecated<Ts...>;
static_assert(TDesc::GetNumOfDimension() == 2, "wrong");
static_assert(TDesc::GetStrides()[1] == 1, "wrong");
return ConstantMatrixDescriptor<TDesc::GetLengths()[0],
TDesc::GetLengths()[1],
TDesc::GetStrides()[0]>{};
}
template <typename... Ts> template <typename... Ts>
__host__ __device__ constexpr auto make_ConstantMatrixDescriptor(NativeTensorDescriptor<Ts...>) __host__ __device__ constexpr auto make_ConstantMatrixDescriptor(NativeTensorDescriptor<Ts...>)
{ {
......
#ifndef CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_DEPRECATED_HPP
#define CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_DEPRECATED_HPP
#include "common_header.hpp"
#include "ConstantTensorDescriptor_deprecated.hpp"
namespace ck {
// OriginalTensorDesc : ConstantTensorDescriptor_deprecated<...>
// it's the tensor whose dimensions are to be merged
// OriginalDimMergeSeqs : Sequence<...>...
// each is a sequence of original dimensions (of OriginalTensorDesc) to be merged
template <class OriginalTensorDesc, class... OriginalDimMergeSeqs>
struct ConstantMergedTensorDescriptor_deprecated
{
using Type = ConstantMergedTensorDescriptor_deprecated;
static constexpr auto mOriginalDimMergeSeqs = std::tuple<OriginalDimMergeSeqs...>{};
static constexpr index_t nDim = sizeof...(OriginalDimMergeSeqs);
static constexpr index_t nOriginalDim = OriginalTensorDesc::GetNumOfDimension();
__host__ __device__ constexpr ConstantMergedTensorDescriptor_deprecated()
{
static_assert(nDim <= nOriginalDim, "wrong!");
// TODO: check each of OriginalDimMergeSeqs contains at least 1, and at most
// OriginalTensorDesc::nDim number of dimensions
// TODO: check OriginalDimMergeSeqs contains all original dimensions
// TODO: check there is no duplication in OriginalDimMergeSeqs
}
__host__ __device__ static constexpr auto GetOriginalTensorDescriptor()
{
return OriginalTensorDesc{};
}
__host__ __device__ static constexpr auto GetNumOfDimension() { return Number<nDim>{}; }
template <index_t IDim>
__host__ __device__ static constexpr auto GetContainedOriginalDimensions(Number<IDim>)
{
return std::get<IDim>(mOriginalDimMergeSeqs);
}
template <index_t IDim>
__host__ __device__ static constexpr bool ContainMultipleOriginalDimensions(Number<IDim>)
{
return (std::get<IDim>(mOriginalDimMergeSeqs).GetSize() > 1);
}
template <index_t IDim>
__host__ __device__ static constexpr auto GetLength(Number<IDim>)
{
constexpr auto original_dims_partial = std::get<IDim>(mOriginalDimMergeSeqs);
return OriginalTensorDesc::Extract(original_dims_partial).GetElementSize();
}
template <index_t IDim>
__host__ __device__ static constexpr auto GetStride(Number<IDim>)
{
static_assert(!ContainMultipleOriginalDimensions(Number<IDim>{}),
"wrong! stride of a merged dimension is undefined");
constexpr auto idim_original = std::get<IDim>(mOriginalDimMergeSeqs).Back();
return OriginalTensorDesc::GetStride(Number<idim_original>{});
}
// this is a hack to return the stride of the last original dimension of a merged dimension
// TODO: refactor this once the concept of "dimension" is used
template <index_t IDim>
__host__ __device__ static constexpr auto GetLastOriginalDimensionStride(Number<IDim>)
{
constexpr auto idim_last_original = std::get<IDim>(mOriginalDimMergeSeqs).Back();
return OriginalTensorDesc::GetStride(Number<idim_last_original>{});
}
__host__ __device__ static constexpr auto GetLengths()
{
return Sequence<OriginalTensorDesc::Extract(OriginalDimMergeSeqs{}).GetElementSize()...>{};
}
__host__ __device__ static constexpr auto GetElementSize()
{
return OriginalTensorDesc::GetElementSize();
}
template <class OriginalDimsPartial>
struct lambda_1_GetOriginalMultiIndexFromMultiIndex
{
const Array<index_t, OriginalDimsPartial::GetSize()>& original_multi_id_partial;
Array<index_t, nOriginalDim>& original_multi_id;
__host__ __device__ constexpr lambda_1_GetOriginalMultiIndexFromMultiIndex(
const Array<index_t, OriginalDimsPartial::GetSize()>& original_multi_id_partial_,
Array<index_t, nOriginalDim>& original_multi_id_)
: original_multi_id_partial(original_multi_id_partial_),
original_multi_id(original_multi_id_)
{
}
template <index_t I>
__host__ __device__ constexpr void operator()(Number<I>) const
{
constexpr index_t idim_original = OriginalDimsPartial::Get(Number<I>{});
index_t itmp = original_multi_id_partial[I];
original_multi_id(idim_original) = itmp;
}
};
struct lambda_0_GetOriginalMultiIndexFromMultiIndex
{
const Array<index_t, nDim>& multi_id;
Array<index_t, nOriginalDim>& original_multi_id;
__host__ __device__ constexpr lambda_0_GetOriginalMultiIndexFromMultiIndex(
const Array<index_t, nDim>& multi_id_, Array<index_t, nOriginalDim>& original_multi_id_)
: multi_id(multi_id_), original_multi_id(original_multi_id_)
{
}
template <index_t IDim>
__host__ __device__ constexpr void operator()(Number<IDim>) const
{
constexpr auto original_dims_partial = std::get<IDim>(Type::mOriginalDimMergeSeqs);
// get partial original-multi-id corresponding to this merged dimension
const auto original_multi_id_partial =
OriginalTensorDesc::Extract(original_dims_partial)
.GetMultiIndexFrom1dIndex(multi_id[IDim]);
static_for<0, original_dims_partial.GetSize(), 1>{}(
lambda_1_GetOriginalMultiIndexFromMultiIndex<decltype(original_dims_partial)>(
original_multi_id_partial, original_multi_id));
}
};
// return type is Array<...>
__host__ __device__ static constexpr auto
GetOriginalMultiIndexFromMultiIndex(Array<index_t, nDim> multi_id)
{
Array<index_t, nOriginalDim> original_multi_id;
static_for<0, nDim, 1>{}(
lambda_0_GetOriginalMultiIndexFromMultiIndex(multi_id, original_multi_id));
return original_multi_id;
}
template <index_t... Is>
__host__ __device__ static constexpr index_t GetOffsetFromMultiIndex(Sequence<Is...>)
{
constexpr auto multi_id = sequence2array(Sequence<Is...>{});
constexpr auto original_multi_id = GetOriginalMultiIndexFromMultiIndex(multi_id);
return OriginalTensorDesc::GetOffsetFromMultiIndex(original_multi_id);
}
__host__ __device__ static constexpr index_t
GetOffsetFromMultiIndex(Array<index_t, nDim> multi_id)
{
auto original_multi_id = GetOriginalMultiIndexFromMultiIndex(multi_id);
return OriginalTensorDesc::GetOffsetFromMultiIndex(original_multi_id);
}
template <class... Is>
__host__ __device__ static constexpr index_t GetOffsetFromMultiIndex(Is... is)
{
return GetOffsetFromMultiIndex(Array<index_t, nDim>{is...});
}
__host__ __device__ static constexpr Array<index_t, nDim> GetMultiIndexFrom1dIndex(index_t id)
{
constexpr auto packed_desc = make_ConstantTensorDescriptor_packed(GetLengths());
return packed_desc.GetMultiIndexFrom1dIndex(id);
}
__host__ __device__ static constexpr auto Pack()
{
constexpr auto lengths = GetLengths();
constexpr auto strides = calculate_tensor_strides_packed(lengths);
return ConstantTensorDescriptor_deprecated<decltype(lengths), decltype(strides)>{};
}
};
template <class OriginalTensorDesc, class... OriginalDimMergeSeqs>
__host__ __device__ constexpr auto make_ConstantMergedTensorDescriptor(OriginalTensorDesc,
OriginalDimMergeSeqs...)
{
return ConstantMergedTensorDescriptor_deprecated<OriginalTensorDesc, OriginalDimMergeSeqs...>{};
}
template <class TDesc>
__host__ __device__ void print_ConstantMergedTensorDescriptor(const char* s, TDesc)
{
print_ConstantTensorDescriptor(s, TDesc::GetOriginalTensorDescriptor());
}
} // namespace ck
#endif
#ifndef CK_TENSOR_COORDINATE_DEPRECATED_HPP
#define CK_TENSOR_COORDINATE_DEPRECATED_HPP
#include "common_header.hpp"
#include "ConstantTensorDescriptor_deprecated.hpp"
#include "ConstantMergedTensorDescriptor_deprecated.hpp"
namespace ck {
// TensorDesc is ConstantTensorDescriptor_deprecated
template <class TensorDesc>
struct NormalTensorCoordinate_deprecated
{
using type = NormalTensorCoordinate_deprecated;
using tensor_desc_type = TensorDesc;
static constexpr index_t nDim = tensor_desc_type::GetNumOfDimension();
__host__
__device__ constexpr NormalTensorCoordinate_deprecated(Array<index_t, nDim> tensor_index)
: mOffset{tensor_desc_type::GetOffsetFromMultiIndex(tensor_index)}
{
}
template <class... Xs>
__host__ __device__ constexpr NormalTensorCoordinate_deprecated(Xs... xs)
: NormalTensorCoordinate_deprecated(Array<index_t, nDim>{xs...})
{
}
template <index_t... Xs>
__host__ __device__ constexpr NormalTensorCoordinate_deprecated(Sequence<Xs...>)
: NormalTensorCoordinate_deprecated(Array<index_t, nDim>{Xs...})
{
}
__host__ __device__ constexpr index_t GetOffset() const { return mOffset; }
// T is Array or Sequence
template <class T>
__host__ __device__ type operator+=(T step_sizes)
{
static_assert(is_same<typename T::data_type, index_t>{} && T::GetSize() == nDim, "wrong!");
mOffset += tensor_desc_type::GetOffsetFromMultiIndex(step_sizes);
return *this;
}
template <class T>
__host__ __device__ type operator-=(T step_sizes)
{
static_assert(is_same<typename T::data_type, index_t>{} && T::GetSize() == nDim, "wrong!");
mOffset -= tensor_desc_type::GetOffsetFromMultiIndex(step_sizes);
return *this;
}
template <class T>
__host__ __device__ constexpr type operator+(T step_sizes) const
{
type coord = *this;
coord += step_sizes;
return coord;
}
template <class T>
__host__ __device__ constexpr type operator-(T step_sizes) const
{
type coord = *this;
coord -= step_sizes;
return coord;
}
// reposition point of origin, and return compensated offset.
// This is a hack to reduce index calculation during looping over
// a tensor whose origin is this TensorCoordinate. It does so, by spitting
// out the run-time offset to the pointer (to the tensor data) held by this
// TensorCoordiante, so the caller can add the offset into the run-time pointer of
// the data, so only 1 run-time variable (update pointer) is needed, instead
// of 2 run-time variables (old pointer and this offset)
// TODO: after introducing the concept of "run-time tensor view", which contains the
// run-time pointer to the data, always keep track of the pointer, instead of both
// offset and the pointer. This also bring additional benefit that we don't need to
// worry the offset might underflow (because offset is unsigned integer) when updating it.
__host__ __device__ constexpr index_t RepositionOrigin()
{
index_t offset_diff = mOffset;
mOffset = 0;
return offset_diff;
}
private:
index_t mOffset;
};
// TensorDesc is ConstantMergedTensorDescriptor_deprecated
template <class TensorDesc>
struct MergedTensorCoordinate_deprecated
{
using type = MergedTensorCoordinate_deprecated;
using tensor_desc_type = TensorDesc;
static constexpr index_t nDim = tensor_desc_type::GetNumOfDimension();
static constexpr index_t nOriginalDim =
tensor_desc_type::GetOriginalTensorDescriptor().GetNumOfDimension();
__host__
__device__ constexpr MergedTensorCoordinate_deprecated(Array<index_t, nDim> tensor_index)
: mOriginalIndex{tensor_desc_type::GetOriginalMultiIndexFromMultiIndex(tensor_index)}
{
// partial offset on each dimension
static_for<0, nDim, 1>{}([&](auto idim) {
constexpr auto partial_original_dims =
tensor_desc_type::GetContainedOriginalDimensions(idim);
constexpr auto partial_original_desc =
tensor_desc_type::GetOriginalTensorDescriptor().Extract(partial_original_dims);
mPartialOffsets(idim) = partial_original_desc.GetOffsetFromMultiIndex(
extract_array(mOriginalIndex, partial_original_dims));
});
// complete offset
mOffset =
accumulate_on_array(mPartialOffsets, math::plus<index_t>{}, static_cast<index_t>(0));
}
template <class... Xs>
__host__ __device__ constexpr MergedTensorCoordinate_deprecated(Xs... xs)
: MergedTensorCoordinate_deprecated(Array<index_t, nDim>{xs...})
{
}
__host__ __device__ constexpr index_t GetOffset() const { return mOffset; }
template <class IDim, class T, bool PositiveDirection>
__host__ __device__ void
MoveOnDimension(IDim idim_, T step_size, integral_constant<bool, PositiveDirection>)
{
constexpr auto idim = idim_;
// if step_size is known at compile time
static_if<is_static<T>::value>{}(
[&](auto) { static_if<T{} == 0>{}([&](auto) { return; }); });
// update original index
static_if<tensor_desc_type::ContainMultipleOriginalDimensions(idim)>{}([&](auto) {
constexpr auto partial_original_dims =
tensor_desc_type::GetContainedOriginalDimensions(idim);
constexpr index_t ndim_partial_original = partial_original_dims.GetSize();
constexpr auto partial_original_desc =
tensor_desc_type::GetOriginalTensorDescriptor().Extract(partial_original_dims);
const auto partial_original_step_sizes =
partial_original_desc.GetMultiIndexFrom1dIndex(step_size);
// update partial original multi-id
auto partial_original_id = extract_array(mOriginalIndex, partial_original_dims);
static_if<PositiveDirection>{}([&](auto) {
partial_original_id += partial_original_step_sizes;
bool carry = false;
// do carry check in reversed order, starting from lowest dimension
// don't check the highest dimension
static_for<0, ndim_partial_original - 1, 1>{}([&](auto IReverse) {
constexpr index_t i = ndim_partial_original - 1 - IReverse;
if(carry)
{
++partial_original_id(i);
}
carry = false;
if(partial_original_id[i] >= partial_original_desc.GetLength(i))
{
partial_original_id(i) -= partial_original_desc.GetLength(i);
carry = true;
}
});
// highest dimension
if(carry)
{
++partial_original_id(0);
}
}).Else([&](auto) {
// shift up multi-id to avoid unsigned integer underflow during intermediate
// calculations. After the shift, should have new_multi_id[...] >= 1
partial_original_id +=
partial_original_desc.GetLengths() - partial_original_step_sizes;
bool borrow = false;
// do borrow check in reversed order, starting from lowest dimension
// don't check the highest dimension
static_for<0, ndim_partial_original - 1, 1>{}([&](auto IReverse) {
constexpr index_t i = ndim_partial_original - 1 - IReverse;
if(borrow)
{
--partial_original_id(i);
}
borrow = false;
if(partial_original_id[i] < partial_original_desc.GetLength(i))
{
partial_original_id(i) += partial_original_desc.GetLength(i);
borrow = true;
}
});
// highest dimension
if(borrow)
{
--partial_original_id(0);
}
// shift back down multi-id
// here, should have new_multi_id[...] >= GetLengths()
partial_original_id = partial_original_id - partial_original_desc.GetLengths();
});
// update "mOriginalIndex"
static_for<0, ndim_partial_original, 1>{}([&](auto I) {
constexpr auto idim_original = partial_original_dims[I];
mOriginalIndex(idim_original) = partial_original_id[I];
});
// calculate new partial offset on this merged dimension
const index_t old_partial_offset = mPartialOffsets[idim];
mPartialOffsets(idim) =
partial_original_desc.GetOffsetFromMultiIndex(partial_original_id);
// update "mThreadSrcOffset", do "+" before "-" to avoid underflow
mOffset = (mOffset + mPartialOffsets[idim]) - old_partial_offset;
}).Else([&](auto fwd) {
static_if<PositiveDirection>{}([&](auto) {
mOffset += step_size * fwd(tensor_desc_type{}).GetStride(idim);
}).Else([&](auto) { mOffset -= step_size * fwd(tensor_desc_type{}).GetStride(idim); });
});
}
// T is Array or Sequence
template <class T>
__host__ __device__ type operator+=(T step_sizes)
{
static_assert(is_same<typename T::data_type, index_t>{} && T::GetSize() == nDim, "wrong!");
static_for<0, nDim, 1>{}([&](auto idim) {
// compiler should remove dead code path, because step_sizes is known at
// compile time
if(step_sizes[idim] != 0)
{
this->MoveOnDimension(idim, step_sizes[idim], integral_constant<bool, true>{});
}
});
return *this;
}
template <class T>
__host__ __device__ type operator-=(T step_sizes)
{
static_assert(is_same<typename T::data_type, index_t>{} && T::GetSize() == nDim, "wrong!");
static_for<0, nDim, 1>{}([&](auto idim) {
// compiler should remove dead code path, because step_sizes is known at
// compile time
if(step_sizes[idim] != 0)
{
this->MoveOnDimension(idim, step_sizes[idim], integral_constant<bool, false>{});
}
});
return *this;
}
template <class T>
__host__ __device__ constexpr type operator+(T step_sizes) const
{
type coord = *this;
coord += step_sizes;
return coord;
}
template <class T>
__host__ __device__ constexpr type operator-(T step_sizes) const
{
type coord = *this;
coord -= step_sizes;
return coord;
}
__host__ __device__ static constexpr index_t RepositionOrigin() { return 0; }
private:
// Allocate register memory for all merged dimensions and normal dimensions.
// However, only those merged dimensions, whose index will be involved in arithmetic
// after the construction of this TensorCoordinate (e.g. when user move a slicing
// window on the merged dimension), will use these register memory.
// Let's hope compiler will optimize away those register memory allocated for normal
// dimensions, and those merged dimensions, that would never be involved in index
// arithmetic after construction of TensorCoordinate.
// TODO: refactor TensorCoordinate, after introducing the concept of "dimensions"
// and simplify implementation of ConstantMergedTensorDescriptor_deprecated, so we don't need to
// count on compiler to optimize away those register memory for us
Array<index_t, nOriginalDim> mOriginalIndex;
Array<index_t, nDim> mPartialOffsets;
// complete offset
index_t mOffset;
};
template <class TensorDesc>
struct TensorCoordinate_deprecated
{
private:
template <class... Ts>
__host__ __device__ static constexpr auto
MakeDummyTensorCoordinate(ConstantTensorDescriptor_deprecated<Ts...>)
{
return NormalTensorCoordinate_deprecated<ConstantTensorDescriptor_deprecated<Ts...>>();
}
template <class... Ts>
__host__ __device__ static constexpr auto
MakeDummyTensorCoordinate(ConstantMergedTensorDescriptor_deprecated<Ts...>)
{
return MergedTensorCoordinate_deprecated<
ConstantMergedTensorDescriptor_deprecated<Ts...>>();
}
public:
using type = decltype(MakeDummyTensorCoordinate(TensorDesc{}));
};
} // namespace ck
#endif
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include "config.hpp" #include "config.hpp"
#include "integral_constant.hpp" #include "integral_constant.hpp"
#include "number.hpp"
#include "type.hpp" #include "type.hpp"
namespace ck { namespace ck {
......
set(TENSOR_SOURCE set(TENSOR_SOURCE
src/tensor.cpp; src/host_tensor.cpp;
src/device.cpp; src/device.cpp;
) )
...@@ -25,8 +25,6 @@ elseif(DEVICE_BACKEND STREQUAL "NVIDIA") ...@@ -25,8 +25,6 @@ elseif(DEVICE_BACKEND STREQUAL "NVIDIA")
endif() endif()
add_executable(conv_driver ${CONV_SOURCE}) add_executable(conv_driver ${CONV_SOURCE})
add_executable(col2im_driver ${COL2IM_SOURCE})
add_executable(conv_bwd_data_driver ${CONV_BWD_DATA_SOURCE}) add_executable(conv_bwd_data_driver ${CONV_BWD_DATA_SOURCE})
target_link_libraries(conv_driver PRIVATE host) target_link_libraries(conv_driver PRIVATE host)
target_link_libraries(col2im_driver PRIVATE host)
target_link_libraries(conv_bwd_data_driver PRIVATE host) target_link_libraries(conv_bwd_data_driver PRIVATE host)
#ifndef CONV_COMMON_HPP #ifndef CONV_COMMON_HPP
#define CONV_COMMON_HPP #define CONV_COMMON_HPP
#include "ConstantTensorDescriptor_deprecated.hpp"
#include "tensor_descriptor.hpp" #include "tensor_descriptor.hpp"
template <class InDesc,
class WeiDesc,
class ConvStrides,
class ConvDilations,
class LowerPads,
class UpperPads>
constexpr auto get_convolution_output_default_4d_tensor_descriptor_deprecated(
InDesc, WeiDesc, ConvStrides, ConvDilations, LowerPads, UpperPads)
{
using namespace ck;
constexpr auto in_desc = InDesc{};
constexpr auto wei_desc = WeiDesc{};
constexpr auto I0 = Number<0>{};
constexpr auto I1 = Number<1>{};
constexpr auto I2 = Number<2>{};
constexpr auto I3 = Number<3>{};
static_assert(in_desc.GetNumOfDimension() == 4, "input nDim is not 4");
static_assert(wei_desc.GetNumOfDimension() == 4, "weight nDim is not 4");
static_assert(in_desc.GetLength(I1) == wei_desc.GetLength(I1),
"input & weight dimension not consistent");
constexpr index_t N = in_desc.GetLength(I0);
constexpr index_t Hi = in_desc.GetLength(I2);
constexpr index_t Wi = in_desc.GetLength(I3);
constexpr index_t K = wei_desc.GetLength(I0);
constexpr index_t Y = wei_desc.GetLength(I2);
constexpr index_t X = wei_desc.GetLength(I3);
constexpr index_t HPadLow = LowerPads{}.Get(I0);
constexpr index_t WPadLow = LowerPads{}.Get(I1);
constexpr index_t HPadUp = UpperPads{}.Get(I0);
constexpr index_t WPadUp = UpperPads{}.Get(I1);
constexpr index_t YEff = (Y - 1) * ConvDilations{}[0] + 1;
constexpr index_t XEff = (X - 1) * ConvDilations{}[1] + 1;
constexpr index_t Ho = (Hi + HPadLow + HPadUp - YEff) / ConvStrides{}[0] + 1;
constexpr index_t Wo = (Wi + WPadLow + WPadUp - XEff) / ConvStrides{}[1] + 1;
return make_ConstantTensorDescriptor_packed(Sequence<N, K, Ho, Wo>{});
}
template <class InDesc, template <class InDesc,
class WeiDesc, class WeiDesc,
class ConvStrides, class ConvStrides,
......
#pragma once #pragma once
#include <unistd.h> #include <unistd.h>
#include "device.hpp" #include "device.hpp"
#include "tensor.hpp" #include "host_tensor.hpp"
#include "gridwise_operation_wrapper.hpp" #include "gridwise_operation_wrapper.hpp"
#include "gridwise_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw.hpp" #include "gridwise_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw.hpp"
......
#pragma once #pragma once
#include <unistd.h> #include <unistd.h>
#include "device.hpp" #include "device.hpp"
#include "tensor.hpp" #include "host_tensor.hpp"
#include "gridwise_operation_wrapper.hpp" #include "gridwise_operation_wrapper.hpp"
#include "gridwise_convolution_backward_data_implicit_gemm_v1r2_nchw_kcyx_nkhw_lds_double_buffer.hpp" #include "gridwise_convolution_backward_data_implicit_gemm_v1r2_nchw_kcyx_nkhw_lds_double_buffer.hpp"
......
#pragma once #pragma once
#include <unistd.h> #include <unistd.h>
#include "device.hpp" #include "device.hpp"
#include "tensor.hpp" #include "host_tensor.hpp"
#include "gridwise_operation_wrapper.hpp" #include "gridwise_operation_wrapper.hpp"
#include "gridwise_convolution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw.hpp" #include "gridwise_convolution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw.hpp"
......
#pragma once #pragma once
#include <unistd.h> #include <unistd.h>
#include "device.hpp" #include "device.hpp"
#include "tensor.hpp" #include "host_tensor.hpp"
#include "gridwise_operation_wrapper.hpp" #include "gridwise_operation_wrapper.hpp"
#include "gridwise_convolution_backward_data_implicit_gemm_v3r1_nchw_kcyx_nkhw.hpp" #include "gridwise_convolution_backward_data_implicit_gemm_v3r1_nchw_kcyx_nkhw.hpp"
......
#pragma once #pragma once
#include <unistd.h> #include <unistd.h>
#include "device.hpp" #include "device.hpp"
#include "tensor.hpp" #include "host_tensor.hpp"
#include "gridwise_operation_wrapper.hpp" #include "gridwise_operation_wrapper.hpp"
#include "gridwise_convolution_backward_data_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp" #include "gridwise_convolution_backward_data_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
......
#pragma once #pragma once
#include <unistd.h> #include <unistd.h>
#include "device.hpp" #include "device.hpp"
#include "tensor.hpp" #include "host_tensor.hpp"
#include "gridwise_operation_wrapper.hpp" #include "gridwise_operation_wrapper.hpp"
#include "gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.hpp" #include "gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.hpp"
......
#include <unistd.h> #include <unistd.h>
#include "device.hpp" #include "device.hpp"
#include "tensor.hpp" #include "host_tensor.hpp"
#include "gridwise_operation_wrapper.hpp" #include "gridwise_operation_wrapper.hpp"
#include "gridwise_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp" #include "gridwise_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
......
#pragma once #pragma once
#include "tensor.hpp" #include "host_tensor.hpp"
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor_deprecated.hpp"
#include "tensor_descriptor.hpp" #include "tensor_descriptor.hpp"
template <typename ConstTensorDesc, std::size_t... Is> template <typename TensorDesc, std::size_t... Is>
auto make_TensorDescriptor_impl(ConstTensorDesc, std::integer_sequence<std::size_t, Is...>) auto make_HostTensorDescriptor_impl(TensorDesc, std::integer_sequence<std::size_t, Is...>)
{ {
std::initializer_list<std::size_t> lengths = {ConstTensorDesc::GetLengths()[Is]...}; std::initializer_list<std::size_t> lengths = {TensorDesc::GetLengths()[Is]...};
std::initializer_list<std::size_t> strides = {ConstTensorDesc::GetStrides()[Is]...}; std::initializer_list<std::size_t> strides = {TensorDesc::GetStrides()[Is]...};
return TensorDescriptor(lengths, strides); return HostTensorDescriptor(lengths, strides);
} }
template <typename ConstTensorDesc> template <typename TensorDesc>
auto make_TensorDescriptor(ConstTensorDesc) auto make_HostTensorDescriptor(TensorDesc)
{ {
return make_TensorDescriptor_impl( return make_HostTensorDescriptor_impl(
ConstTensorDesc{}, TensorDesc{}, std::make_integer_sequence<std::size_t, TensorDesc::GetNumOfDimension()>{});
std::make_integer_sequence<std::size_t, ConstTensorDesc::GetNumOfDimension()>{});
} }
template <typename ConstTensorDesc> template <typename TensorDesc>
void ostream_ConstantTensorDescriptor(ConstTensorDesc, std::ostream& os = std::cout) void ostream_tensor_descriptor(TensorDesc, std::ostream& os = std::cout)
{ {
ostream_TensorDescriptor(make_TensorDescriptor(ConstTensorDesc{}), os); ostream_HostTensorDescriptor(make_HostTensorDescriptor(TensorDesc{}), os);
} }
#pragma once #pragma once
#include "tensor.hpp" #include "host_tensor.hpp"
template <class TIn, template <class TIn,
class TWei, class TWei,
......
#pragma once #pragma once
#include "tensor.hpp" #include "host_tensor.hpp"
template <typename TIn, template <typename TIn,
typename TWei, typename TWei,
......
#ifndef TENSOR_HPP #ifndef HOST_TENSOR_HPP
#define TENSOR_HPP #define HOST_TENSOR_HPP
#include <thread> #include <thread>
#include <vector> #include <vector>
...@@ -65,26 +65,26 @@ auto construct_f_unpack_args(F, T args) ...@@ -65,26 +65,26 @@ auto construct_f_unpack_args(F, T args)
return construct_f_unpack_args_impl<F>(args, std::make_index_sequence<N>{}); return construct_f_unpack_args_impl<F>(args, std::make_index_sequence<N>{});
} }
struct TensorDescriptor struct HostTensorDescriptor
{ {
TensorDescriptor() = delete; HostTensorDescriptor() = delete;
template <typename X> template <typename X>
TensorDescriptor(std::vector<X> lens); HostTensorDescriptor(std::vector<X> lens);
template <typename X, typename Y> template <typename X, typename Y>
TensorDescriptor(std::vector<X> lens, std::vector<Y> strides); HostTensorDescriptor(std::vector<X> lens, std::vector<Y> strides);
void CalculateStrides(); void CalculateStrides();
template <class Range> template <class Range>
TensorDescriptor(const Range& lens) : mLens(lens.begin(), lens.end()) HostTensorDescriptor(const Range& lens) : mLens(lens.begin(), lens.end())
{ {
this->CalculateStrides(); this->CalculateStrides();
} }
template <class Range1, class Range2> template <class Range1, class Range2>
TensorDescriptor(const Range1& lens, const Range2& strides) HostTensorDescriptor(const Range1& lens, const Range2& strides)
: mLens(lens.begin(), lens.end()), mStrides(strides.begin(), strides.end()) : mLens(lens.begin(), lens.end()), mStrides(strides.begin(), strides.end())
{ {
} }
...@@ -205,7 +205,7 @@ struct Tensor ...@@ -205,7 +205,7 @@ struct Tensor
{ {
} }
Tensor(const TensorDescriptor& desc) : mDesc(desc), mData(mDesc.GetElementSpace()) {} Tensor(const HostTensorDescriptor& desc) : mDesc(desc), mData(mDesc.GetElementSpace()) {}
template <class G> template <class G>
void GenerateTensorValue(G g, std::size_t num_thread = 1) void GenerateTensorValue(G g, std::size_t num_thread = 1)
...@@ -267,11 +267,11 @@ struct Tensor ...@@ -267,11 +267,11 @@ struct Tensor
typename std::vector<T>::const_iterator end() const { return mData.end(); } typename std::vector<T>::const_iterator end() const { return mData.end(); }
TensorDescriptor mDesc; HostTensorDescriptor mDesc;
std::vector<T> mData; std::vector<T> mData;
}; };
void ostream_TensorDescriptor(const TensorDescriptor& desc, std::ostream& os = std::cout) void ostream_HostTensorDescriptor(const HostTensorDescriptor& desc, std::ostream& os = std::cout)
{ {
os << "dim " << desc.GetNumOfDimension() << ", "; os << "dim " << desc.GetNumOfDimension() << ", ";
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment