Commit c9a8e558 authored by Chao Liu's avatar Chao Liu
Browse files

adding tensor_view

parent 8669e242
......@@ -433,7 +433,7 @@ __host__ __device__ void print_Sequence(const char* s, Sequence<Xs...>)
{
constexpr index_t nsize = Sequence<Xs...>::GetSize();
static_assert(nsize <= 10, "wrong!");
static_assert(nsize <= 12, "wrong!");
static_if<nsize == 0>{}([&](auto) { printf("%s size %u, {}\n", s, nsize, Xs...); });
......@@ -462,6 +462,13 @@ __host__ __device__ void print_Sequence(const char* s, Sequence<Xs...>)
static_if<nsize == 10>{}(
[&](auto) { printf("%s size %u, {%u %u %u %u %u %u %u %u %u %u}\n", s, nsize, Xs...); });
static_if<nsize == 11>{}(
[&](auto) { printf("%s size %u, {%u %u %u %u %u %u %u %u %u %u %u}\n", s, nsize, Xs...); });
static_if<nsize == 12>{}([&](auto) {
printf("%s size %u, {%u %u %u %u %u %u %u %u %u %u %u %u}\n", s, nsize, Xs...);
});
}
} // namespace ck
......
#ifndef CONV_COMMON_HPP
#define CONV_COMMON_HPP
#include "ConstantTensorDescriptor.hpp"
#include "constant_tensor_descriptor.hpp"
// this is ugly, only for 4d
template <class InDesc, class WeiDesc>
......
......@@ -9,11 +9,11 @@ using namespace ck;
template <class T, class InDesc, class WeiDesc, class OutDesc>
void device_convolution_direct_v2_nchw_kcyx_nkhw(InDesc,
const Tensor<T>& in,
const HostTensor<T>& in,
WeiDesc,
const Tensor<T>& wei,
const HostTensor<T>& wei,
OutDesc,
Tensor<T>& out,
HostTensor<T>& out,
index_t nrepeat)
{
std::size_t data_sz = sizeof(T);
......
......@@ -12,11 +12,11 @@ using namespace ck;
template <class T, class InDesc, class WeiDesc, class OutDesc>
void device_convolution_implicit_gemm_v1_chwn_cyxk_khwn(InDesc,
const Tensor<T>& in_nchw,
const HostTensor<T>& in_nchw,
WeiDesc,
const Tensor<T>& wei_kcyx,
const HostTensor<T>& wei_kcyx,
OutDesc,
Tensor<T>& out_nkhw,
HostTensor<T>& out_nkhw,
index_t nrepeat)
{
constexpr auto I0 = Number<0>{};
......@@ -44,7 +44,7 @@ void device_convolution_implicit_gemm_v1_chwn_cyxk_khwn(InDesc,
auto wei_cyxk_desc = make_ConstantTensorDescriptor_packed(Sequence<C, Y, X, K>{});
ostream_ConstantTensorDescriptor(wei_cyxk_desc, std::cout << "wei_cyxk_desc: ");
Tensor<T> wei_cyxk(make_TensorDescriptor(wei_cyxk_desc));
HostTensor<T> wei_cyxk(make_TensorDescriptor(wei_cyxk_desc));
auto f_reorder_kcyx2cyxk = [&](auto k, auto c, auto y, auto x) {
wei_cyxk(c, y, x, k) = wei_kcyx(k, c, y, x);
......@@ -57,7 +57,7 @@ void device_convolution_implicit_gemm_v1_chwn_cyxk_khwn(InDesc,
auto in_chwn_desc = make_ConstantTensorDescriptor_packed(Sequence<C, Hi, Wi, N>{});
ostream_ConstantTensorDescriptor(in_chwn_desc, std::cout << "in_chwn_desc: ");
Tensor<T> in_chwn(make_TensorDescriptor(in_chwn_desc));
HostTensor<T> in_chwn(make_TensorDescriptor(in_chwn_desc));
auto f_reorder_nchw2chwn = [&](auto n, auto c, auto hi, auto wi) {
in_chwn(c, hi, wi, n) = in_nchw(n, c, hi, wi);
......@@ -70,7 +70,7 @@ void device_convolution_implicit_gemm_v1_chwn_cyxk_khwn(InDesc,
auto out_khwn_desc = make_ConstantTensorDescriptor_packed(Sequence<K, Ho, Wo, N>{});
ostream_ConstantTensorDescriptor(out_khwn_desc, std::cout << "out_khwn_desc: ");
Tensor<T> out_khwn(make_TensorDescriptor(out_khwn_desc));
HostTensor<T> out_khwn(make_TensorDescriptor(out_khwn_desc));
std::size_t data_sz = sizeof(T);
DeviceMem in_chwn_device_buf(data_sz * in_chwn.mDesc.GetElementSpace());
......
......@@ -10,11 +10,11 @@ using namespace ck;
template <class T, class InDesc, class WeiDesc, class OutDesc>
void device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw(InDesc,
const Tensor<T>& in_nchw,
const HostTensor<T>& in_nchw,
WeiDesc,
const Tensor<T>& wei_kcyx,
const HostTensor<T>& wei_kcyx,
OutDesc,
Tensor<T>& out_nkhw,
HostTensor<T>& out_nkhw,
index_t nrepeat)
{
constexpr auto I0 = Number<0>{};
......@@ -42,7 +42,7 @@ void device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw(InDesc,
auto wei_cyxk_desc = make_ConstantTensorDescriptor_packed(Sequence<C, Y, X, K>{});
ostream_ConstantTensorDescriptor(wei_cyxk_desc, std::cout << "wei_cyxk_desc: ");
Tensor<T> wei_cyxk(make_TensorDescriptor(wei_cyxk_desc));
HostTensor<T> wei_cyxk(make_TensorDescriptor(wei_cyxk_desc));
auto f_reorder_kcyx2cyxk = [&](auto k, auto c, auto y, auto x) {
wei_cyxk(c, y, x, k) = wei_kcyx(k, c, y, x);
......
......@@ -10,11 +10,11 @@ using namespace ck;
template <class T, class InDesc, class WeiDesc, class OutDesc>
void device_convolution_implicit_gemm_v2_chwn_cyxk_khwn(InDesc,
const Tensor<T>& in_nchw,
const HostTensor<T>& in_nchw,
WeiDesc,
const Tensor<T>& wei_kcyx,
const HostTensor<T>& wei_kcyx,
OutDesc,
Tensor<T>& out_nkhw,
HostTensor<T>& out_nkhw,
index_t nrepeat)
{
constexpr auto I0 = Number<0>{};
......@@ -44,7 +44,7 @@ void device_convolution_implicit_gemm_v2_chwn_cyxk_khwn(InDesc,
auto in_chwn_desc = make_ConstantTensorDescriptor(Sequence<C, Hi, Wi, N>{});
ostream_ConstantTensorDescriptor(in_chwn_desc, std::cout << "in_chwn_desc: ");
Tensor<T> in_chwn(make_TensorDescriptor(in_chwn_desc));
HostTensor<T> in_chwn(make_TensorDescriptor(in_chwn_desc));
make_ParallelTensorFunctor(
[&](auto n, auto c, auto hi, auto wi) { in_chwn(c, hi, wi, n) = in_nchw(n, c, hi, wi); },
......@@ -57,7 +57,7 @@ void device_convolution_implicit_gemm_v2_chwn_cyxk_khwn(InDesc,
auto wei_cyxk_desc = make_ConstantTensorDescriptor(Sequence<C, Y, X, K>{});
ostream_ConstantTensorDescriptor(wei_cyxk_desc, std::cout << "wei_cyxk_desc: ");
Tensor<T> wei_cyxk(make_TensorDescriptor(wei_cyxk_desc));
HostTensor<T> wei_cyxk(make_TensorDescriptor(wei_cyxk_desc));
make_ParallelTensorFunctor(
[&](auto k, auto c, auto y, auto x) { wei_cyxk(c, y, x, k) = wei_kcyx(k, c, y, x); },
......@@ -70,7 +70,7 @@ void device_convolution_implicit_gemm_v2_chwn_cyxk_khwn(InDesc,
auto out_khwn_desc = make_ConstantTensorDescriptor(Sequence<K, Ho, Wo, N>{});
ostream_ConstantTensorDescriptor(out_khwn_desc, std::cout << "out_khwn_desc: ");
Tensor<T> out_khwn(make_TensorDescriptor(out_khwn_desc));
HostTensor<T> out_khwn(make_TensorDescriptor(out_khwn_desc));
#if 0
// 3x3, 34x34
......
......@@ -8,11 +8,11 @@
template <class T, class InDesc, class WeiDesc, class OutDesc>
void device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw(InDesc,
const Tensor<T>& in_nchw,
const HostTensor<T>& in_nchw,
WeiDesc,
const Tensor<T>& wei_kcyx,
const HostTensor<T>& wei_kcyx,
OutDesc,
Tensor<T>& out_nkhw,
HostTensor<T>& out_nkhw,
index_t nrepeat)
{
using namespace ck;
......@@ -42,7 +42,7 @@ void device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw(InDesc,
auto wei_cyxk_desc = make_ConstantTensorDescriptor_packed(Sequence<C, Y, X, K>{});
ostream_ConstantTensorDescriptor(wei_cyxk_desc, std::cout << "wei_cyxk_desc: ");
Tensor<T> wei_cyxk(make_TensorDescriptor(wei_cyxk_desc));
HostTensor<T> wei_cyxk(make_TensorDescriptor(wei_cyxk_desc));
auto f_reorder_kcyx2cyxk = [&](auto k, auto c, auto y, auto x) {
wei_cyxk(c, y, x, k) = wei_kcyx(k, c, y, x);
......
......@@ -13,11 +13,11 @@ template <class T,
class ConvStrides,
class ConvDilations>
void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc,
const Tensor<T>& in_nchw,
const HostTensor<T>& in_nchw,
WeiDesc,
const Tensor<T>& wei_kcyx,
const HostTensor<T>& wei_kcyx,
OutDesc,
Tensor<T>& out_nkhw,
HostTensor<T>& out_nkhw,
ConvStrides,
ConvDilations,
index_t nrepeat)
......
......@@ -14,11 +14,11 @@ template <class T,
class ConvStrides,
class ConvDilations>
void device_convolution_implicit_gemm_v4r2_nchw_kcyx_nkhw(InDesc,
const Tensor<T>& in_nchw,
const HostTensor<T>& in_nchw,
WeiDesc,
const Tensor<T>& wei_kcyx,
const HostTensor<T>& wei_kcyx,
OutDesc,
Tensor<T>& out_nkhw,
HostTensor<T>& out_nkhw,
ConvStrides,
ConvDilations,
index_t nrepeat)
......
......@@ -14,11 +14,11 @@ template <class T,
class ConvStrides,
class ConvDilations>
void device_convolution_implicit_gemm_v4r3_nchw_kcyx_nkhw(InDesc,
const Tensor<T>& in_nchw,
const HostTensor<T>& in_nchw,
WeiDesc,
const Tensor<T>& wei_kcyx,
const HostTensor<T>& wei_kcyx,
OutDesc,
Tensor<T>& out_nkhw,
HostTensor<T>& out_nkhw,
ConvStrides,
ConvDilations,
index_t nrepeat)
......@@ -90,14 +90,14 @@ void device_convolution_implicit_gemm_v4r3_nchw_kcyx_nkhw(InDesc,
constexpr index_t InBlockCopyDataPerAccess_W2 = 4;
using WeiBlockCopySubLengths_E_K = Sequence<2, 2>;
using WeiBlockCopyClusterLengths_E_K = Sequence<4, 64>;
using WeiBlockCopySubLengths_E_K = Sequence<4, 1>;
using WeiBlockCopyClusterLengths_E_K = Sequence<2, 128>;
using WeiBlockCopyThreadClusterArrangeOrder = Sequence<1, 0>; // [K, E]
using WeiBlockCopySrcAccessOrder = Sequence<1, 0>; // [K, E]
using WeiBlockCopyDstAccessOrder = Sequence<0, 1>; // [E, K]
constexpr index_t WeiBlockCopySrcDataPerRead_E = 1;
constexpr index_t WeiBlockCopyDstDataPerWrite_K = 2;
constexpr index_t WeiBlockCopySrcDataPerRead_E = 4;
constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1;
#endif
constexpr index_t N0 = N / (N1 * N2);
......
......@@ -8,11 +8,11 @@ using namespace ck;
template <class TInWei, class TOut, class InDesc, class WeiDesc, class OutDesc>
void device_direct_convolution_2_vectorized_nchw_kcyx_nkhw(InDesc,
const Tensor<TInWei>& in_nchw,
const HostTensor<TInWei>& in_nchw,
WeiDesc,
const Tensor<TInWei>& wei_kcyx,
const HostTensor<TInWei>& wei_kcyx,
OutDesc,
Tensor<TOut>& out_nkhw,
HostTensor<TOut>& out_nkhw,
index_t nrepeat)
{
// this suppose in / wei data type is int8x4
......@@ -46,7 +46,7 @@ void device_direct_convolution_2_vectorized_nchw_kcyx_nkhw(InDesc,
auto in_nchw_vec_desc = make_ConstantTensorDescriptor(Sequence<N, C / NVector, Hi, Wi>{});
ostream_ConstantTensorDescriptor(in_nchw_vec_desc, std::cout << "in_nchw_vec_desc: ");
Tensor<vector_mem_t> in_nchw_vec(make_TensorDescriptor(in_nchw_vec_desc));
HostTensor<vector_mem_t> in_nchw_vec(make_TensorDescriptor(in_nchw_vec_desc));
auto f_vectorized_nchw = [&](auto n, auto c, auto h, auto w) {
#if 0
......@@ -69,7 +69,7 @@ void device_direct_convolution_2_vectorized_nchw_kcyx_nkhw(InDesc,
auto wei_kcyx_vec_desc = make_ConstantTensorDescriptor(Sequence<K, C / NVector, Y, X>{});
ostream_ConstantTensorDescriptor(wei_kcyx_vec_desc, std::cout << "wei_kcyx_vec_desc: ");
Tensor<vector_mem_t> wei_kcyx_vec(make_TensorDescriptor(wei_kcyx_vec_desc));
HostTensor<vector_mem_t> wei_kcyx_vec(make_TensorDescriptor(wei_kcyx_vec_desc));
auto f_vectorized_kcyx = [&](auto k, auto c, auto y, auto x) {
#if 0
......
......@@ -8,11 +8,11 @@ using namespace ck;
template <class T, class InDesc, class WeiDesc, class OutDesc, class LowerPads, class UpperPads>
void device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded(InDesc,
const Tensor<T>& in_nchw,
const HostTensor<T>& in_nchw,
WeiDesc,
const Tensor<T>& wei_kcyx,
const HostTensor<T>& wei_kcyx,
OutDesc,
Tensor<T>& out_nkhw,
HostTensor<T>& out_nkhw,
LowerPads,
UpperPads,
index_t nrepeat)
......@@ -42,7 +42,7 @@ void device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded(InDesc,
auto wei_cyxk_desc = make_ConstantTensorDescriptor(Sequence<C, Y, X, K>{});
ostream_ConstantTensorDescriptor(wei_cyxk_desc, std::cout << "wei_cyxk_desc: ");
Tensor<T> wei_cyxk(make_TensorDescriptor(wei_cyxk_desc));
HostTensor<T> wei_cyxk(make_TensorDescriptor(wei_cyxk_desc));
auto f_reorder_kcyx2cyxk = [&](auto k, auto c, auto y, auto x) {
wei_cyxk(c, y, x, k) = wei_kcyx(k, c, y, x);
......@@ -55,7 +55,7 @@ void device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded(InDesc,
auto in_chwn_desc = make_ConstantTensorDescriptor(Sequence<C, Hi, Wi, N>{});
ostream_ConstantTensorDescriptor(in_chwn_desc, std::cout << "in_chwn_desc: ");
Tensor<T> in_chwn(make_TensorDescriptor(in_chwn_desc));
HostTensor<T> in_chwn(make_TensorDescriptor(in_chwn_desc));
auto f_reorder_nchw2chwn = [&](auto n, auto c, auto hi, auto wi) {
in_chwn(c, hi, wi, n) = in_nchw(n, c, hi, wi);
......@@ -68,7 +68,7 @@ void device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded(InDesc,
auto out_khwn_desc = make_ConstantTensorDescriptor(Sequence<K, Ho, Wo, N>{});
ostream_ConstantTensorDescriptor(out_khwn_desc, std::cout << "out_khwn_desc: ");
Tensor<T> out_khwn(make_TensorDescriptor(out_khwn_desc));
HostTensor<T> out_khwn(make_TensorDescriptor(out_khwn_desc));
std::size_t data_sz = sizeof(T);
DeviceMem in_chwn_device_buf(data_sz * in_chwn.mDesc.GetElementSpace());
......
#pragma once
#include "tensor.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "constant_tensor_descriptor.hpp"
// this is ugly, only for 4d
template <class TConstTensorDesc>
......@@ -42,7 +42,7 @@ auto make_TensorDescriptor(TConstTensorDesc)
std::initializer_list<index_t> strides = {
desc.GetStride(I0), desc.GetStride(I1), desc.GetStride(I2), desc.GetStride(I3)};
return TensorDescriptor(lengths, strides);
return HostTensorDescriptor(lengths, strides);
}
template <class TIn,
......@@ -52,9 +52,9 @@ template <class TIn,
class ConvDilations,
class LowerPads,
class UpperPads>
void host_direct_convolution(const Tensor<TIn>& in_nchw,
const Tensor<TWei>& wei_kcyx,
Tensor<TOut>& out_nkhw,
void host_direct_convolution(const HostTensor<TIn>& in_nchw,
const HostTensor<TWei>& wei_kcyx,
HostTensor<TOut>& out_nkhw,
ConvStrides,
ConvDilations,
LowerPads,
......@@ -99,9 +99,9 @@ void host_direct_convolution(const Tensor<TIn>& in_nchw,
}
template <class TIn, class TWei, class TOut, class LowerPads, class UpperPads>
void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw,
const Tensor<TWei>& wei_kcyx,
Tensor<TOut>& out_nkhw,
void host_winograd_3x3_convolution(const HostTensor<TIn>& in_nchw,
const HostTensor<TWei>& wei_kcyx,
HostTensor<TOut>& out_nkhw,
LowerPads,
UpperPads)
{
......@@ -134,11 +134,11 @@ void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw,
std::size_t HTile = (HO + HoPerTile - 1) / HoPerTile;
std::size_t WTile = (WO + WoPerTile - 1) / WoPerTile;
Tensor<double> in_hold({N, C, HTile, WTile, HiPerTile, WiPerTile});
Tensor<double> in_transform({N, C, HTile, WTile, HiPerTile, WiPerTile});
Tensor<double> wei_transform({K, C, HiPerTile, WiPerTile});
Tensor<double> out_transform({N, K, HTile, WTile, HiPerTile, HiPerTile});
Tensor<double> out_hold({N, K, HTile, WTile, HoPerTile, WoPerTile});
HostTensor<double> in_hold({N, C, HTile, WTile, HiPerTile, WiPerTile});
HostTensor<double> in_transform({N, C, HTile, WTile, HiPerTile, WiPerTile});
HostTensor<double> wei_transform({K, C, HiPerTile, WiPerTile});
HostTensor<double> out_transform({N, K, HTile, WTile, HiPerTile, HiPerTile});
HostTensor<double> out_hold({N, K, HTile, WTile, HoPerTile, WoPerTile});
auto f_in_hold = [&](auto n, auto c, auto htile, auto wtile) {
for(int j = 0; j < HiPerTile; ++j)
......@@ -339,7 +339,7 @@ void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw,
}
template <class T>
void check_error(const Tensor<T>& ref, const Tensor<T>& result)
void check_error(const HostTensor<T>& ref, const HostTensor<T>& result)
{
float error = 0;
float max_diff = -1;
......
#ifndef TENSOR_HPP
#define TENSOR_HPP
#ifndef HOST_TENSOR_HPP
#define HOST_TENSOR_HPP
#include <thread>
#include <vector>
......@@ -65,24 +65,24 @@ auto construct_f_unpack_args(F, T args)
return construct_f_unpack_args_impl<F>(args, std::make_index_sequence<N>{});
}
struct TensorDescriptor
struct HostTensorDescriptor
{
TensorDescriptor() = delete;
TensorDescriptor(std::initializer_list<std::size_t> lens);
TensorDescriptor(std::initializer_list<std::size_t> lens,
std::initializer_list<std::size_t> strides);
TensorDescriptor(std::vector<std::size_t> lens, std::vector<std::size_t> strides);
HostTensorDescriptor() = delete;
HostTensorDescriptor(std::initializer_list<std::size_t> lens);
HostTensorDescriptor(std::initializer_list<std::size_t> lens,
std::initializer_list<std::size_t> strides);
HostTensorDescriptor(std::vector<std::size_t> lens, std::vector<std::size_t> strides);
void CalculateStrides();
template <class Range>
TensorDescriptor(const Range& lens) : mLens(lens.begin(), lens.end())
HostTensorDescriptor(const Range& lens) : mLens(lens.begin(), lens.end())
{
this->CalculateStrides();
}
template <class Range1, class Range2>
TensorDescriptor(const Range1& lens, const Range2& strides)
HostTensorDescriptor(const Range1& lens, const Range2& strides)
: mLens(lens.begin(), lens.end()), mStrides(strides.begin(), strides.end())
{
}
......@@ -185,25 +185,25 @@ auto make_ParallelTensorFunctor(F f, Xs... xs)
}
template <class T>
struct Tensor
struct HostTensor
{
template <class X>
Tensor(std::initializer_list<X> lens) : mDesc(lens), mData(mDesc.GetElementSpace())
HostTensor(std::initializer_list<X> lens) : mDesc(lens), mData(mDesc.GetElementSpace())
{
}
template <class X>
Tensor(std::vector<X> lens) : mDesc(lens), mData(mDesc.GetElementSpace())
HostTensor(std::vector<X> lens) : mDesc(lens), mData(mDesc.GetElementSpace())
{
}
template <class X, class Y>
Tensor(std::vector<X> lens, std::vector<Y> strides)
HostTensor(std::vector<X> lens, std::vector<Y> strides)
: mDesc(lens, strides), mData(mDesc.GetElementSpace())
{
}
Tensor(const TensorDescriptor& desc) : mDesc(desc), mData(mDesc.GetElementSpace()) {}
HostTensor(const HostTensorDescriptor& desc) : mDesc(desc), mData(mDesc.GetElementSpace()) {}
template <class G>
void GenerateTensorValue(G g, std::size_t num_thread = 1)
......@@ -265,7 +265,7 @@ struct Tensor
typename std::vector<T>::const_iterator end() const { return mData.end(); }
TensorDescriptor mDesc;
HostTensorDescriptor mDesc;
std::vector<T> mData;
};
......
......@@ -4,7 +4,7 @@
#include <cstdlib>
#include <stdlib.h>
#include "config.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "constant_tensor_descriptor.hpp"
#include "device.hpp"
#include "conv_common.hpp"
#include "host_conv.hpp"
......@@ -473,10 +473,10 @@ int main(int argc, char* argv[])
using in_data_t = float;
using out_data_t = float;
Tensor<in_data_t> in_nchw(make_TensorDescriptor(in_nchw_desc));
Tensor<in_data_t> wei_kcyx(make_TensorDescriptor(wei_kcyx_desc));
Tensor<out_data_t> out_nkhw_host(make_TensorDescriptor(out_nkhw_desc));
Tensor<out_data_t> out_nkhw_device(make_TensorDescriptor(out_nkhw_desc));
HostTensor<in_data_t> in_nchw(make_TensorDescriptor(in_nchw_desc));
HostTensor<in_data_t> wei_kcyx(make_TensorDescriptor(wei_kcyx_desc));
HostTensor<out_data_t> out_nkhw_host(make_TensorDescriptor(out_nkhw_desc));
HostTensor<out_data_t> out_nkhw_device(make_TensorDescriptor(out_nkhw_desc));
std::size_t num_thread = std::thread::hardware_concurrency();
......@@ -491,7 +491,7 @@ int main(int argc, char* argv[])
if(do_verification)
{
#if 1
#if 0
in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
#elif 0
......@@ -503,6 +503,9 @@ int main(int argc, char* argv[])
#elif 1
in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
#elif 0
in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
wei_kcyx.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
#elif 0
in_nchw.GenerateTensorValue(GeneratorTensor_2{1, 5}, num_thread);
......
......@@ -3,17 +3,18 @@
#include "tensor.hpp"
TensorDescriptor::TensorDescriptor(std::initializer_list<std::size_t> lens) : mLens(lens)
HostTensorDescriptor::HostTensorDescriptor(std::initializer_list<std::size_t> lens) : mLens(lens)
{
this->CalculateStrides();
}
TensorDescriptor::TensorDescriptor(std::vector<std::size_t> lens, std::vector<std::size_t> strides)
HostTensorDescriptor::HostTensorDescriptor(std::vector<std::size_t> lens,
std::vector<std::size_t> strides)
: mLens(lens), mStrides(strides)
{
}
void TensorDescriptor::CalculateStrides()
void HostTensorDescriptor::CalculateStrides()
{
mStrides.clear();
mStrides.resize(mLens.size(), 0);
......@@ -25,21 +26,21 @@ void TensorDescriptor::CalculateStrides()
mLens.rbegin(), mLens.rend() - 1, mStrides.rbegin() + 1, std::multiplies<std::size_t>());
}
std::size_t TensorDescriptor::GetNumOfDimension() const { return mLens.size(); }
std::size_t HostTensorDescriptor::GetNumOfDimension() const { return mLens.size(); }
std::size_t TensorDescriptor::GetElementSize() const
std::size_t HostTensorDescriptor::GetElementSize() const
{
assert(mLens.size() == mStrides.size());
return std::accumulate(
mLens.begin(), mLens.end(), std::size_t{1}, std::multiplies<std::size_t>());
}
std::size_t TensorDescriptor::GetElementSpace() const
std::size_t HostTensorDescriptor::GetElementSpace() const
{
auto ls = mLens | boost::adaptors::transformed([](std::size_t v) { return v - 1; });
return std::inner_product(ls.begin(), ls.end(), mStrides.begin(), std::size_t{0}) + 1;
}
const std::vector<std::size_t>& TensorDescriptor::GetLengths() const { return mLens; }
const std::vector<std::size_t>& HostTensorDescriptor::GetLengths() const { return mLens; }
const std::vector<std::size_t>& TensorDescriptor::GetStrides() const { return mStrides; }
const std::vector<std::size_t>& HostTensorDescriptor::GetStrides() const { return mStrides; }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment