Commit b23e7f8e authored by Chao Liu's avatar Chao Liu
Browse files

dynamic tensor descriptor v2 can produce correct result, but spill too many register

parent 0a944e8f
......@@ -15,7 +15,7 @@ make_dynamic_native_tensor_descriptor_packed_v2(const MultiIndex<N>& lengths)
constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{});
constexpr auto up_dim_hidden_idss =
make_tuple(typename arithmetic_sequence_gen<1, N + 1, 1>::type{});
constexpr auto visible_dim_hidden_ids = typename arithmetic_sequence_gen<0, N, 1>::type{};
constexpr auto visible_dim_hidden_ids = typename arithmetic_sequence_gen<1, N + 1, 1>::type{};
const index_t element_space_size =
reduce_on_array(lengths, math::multiplies<index_t>{}, index_t{1});
......@@ -37,7 +37,7 @@ make_dynamic_native_tensor_descriptor_v2(const MultiIndex<N>& lengths, const Mul
constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{});
constexpr auto up_dim_hidden_idss =
make_tuple(typename arithmetic_sequence_gen<1, N + 1, 1>::type{});
constexpr auto visible_dim_hidden_ids = typename arithmetic_sequence_gen<0, N, 1>::type{};
constexpr auto visible_dim_hidden_ids = typename arithmetic_sequence_gen<1, N + 1, 1>::type{};
index_t element_space_size = 1;
......
......@@ -282,6 +282,8 @@ struct DynamicTensorCoordinateStep_v2
{
}
__host__ __device__ constexpr const auto& GetIndexDiff() const { return GetVisibleIndexDiff(); }
// private:
__host__ __device__ constexpr const auto& GetVisibleIndexDiff() const
{
......@@ -510,7 +512,12 @@ __host__ __device__ void move_dynamic_tensor_coordinate_v2(const TensorDesc& ten
// this is what needs to be updated
auto& idx_hidden = coord.GetHiddenIndex();
// update hidden index
// update visible index
auto idx_hidden_pick_visible =
pick_array_element(idx_hidden, TensorDesc::GetVisibleDimensionIds());
idx_hidden_pick_visible += coord_step.GetIndexDiff();
// update rest of hidden index
static_for<ntransform - 1, -1, -1>{}([&](auto itran) {
const auto& tran = tensor_desc.GetTransforms().At(itran);
constexpr auto dims_low = TensorDesc::GetLowerDimensionIdss().At(itran);
......
......@@ -147,6 +147,18 @@ struct Array
return new_array;
}
template <index_t NAppend>
__host__ __device__ constexpr auto Append(const Array<TData, NAppend>& xs) const
{
Array<TData, NSize + NAppend> r;
static_for<0, NSize, 1>{}([&r, this ](auto i) constexpr { r(i) = (*this)[i]; });
static_for<0, NAppend, 1>{}([&r, &xs ](auto i) constexpr { r(NSize + i) = xs[i]; });
return r;
}
};
// Arr: Array
......
......@@ -5,6 +5,12 @@
namespace ck {
template <typename X, typename... Xs>
__host__ __device__ constexpr auto make_array(const X& x, const Xs&... xs)
{
return Array<X, sizeof...(xs) + 1>{{x, xs...}};
}
template <typename Arr, typename Picks>
__host__ __device__ constexpr auto pick_array_element(Arr& a, Picks)
{
......
......@@ -2,6 +2,7 @@
#define CK_PRINT_HPP
#include "array.hpp"
#include "array_helper.hpp"
#include "sequence.hpp"
namespace ck {
......@@ -12,8 +13,6 @@ __host__ __device__ void print_array(const char* s, T a)
using data_type = typename decltype(a)::data_type;
constexpr index_t nsize = a.Size();
static_assert(nsize >= 0 && nsize <= 10, "wrong!");
if constexpr(is_same<data_type, uint32_t>{})
{
if constexpr(nsize == 0)
......@@ -103,6 +102,12 @@ __host__ __device__ void print_array(const char* s, T a)
a[8],
a[9]);
}
else
{
printf("%s size %u, {", s, nsize);
static_for<0, nsize, 1>{}([&a](auto i) constexpr { printf("%u, ", a[i]); });
printf("}\n");
}
}
else if constexpr(is_same<data_type, int32_t>{})
{
......@@ -193,6 +198,32 @@ __host__ __device__ void print_array(const char* s, T a)
a[8],
a[9]);
}
else
{
printf("%s size %d, {", s, nsize);
static_for<0, nsize, 1>{}([&a](auto i) constexpr { printf("%d, ", a[i]); });
printf("}\n");
}
}
}
template <typename T>
__host__ __device__ void print_array_v2(const char* s, T a)
{
using data_type = typename decltype(a)::data_type;
constexpr index_t nsize = a.Size();
if constexpr(is_same<data_type, uint32_t>{})
{
printf("%s size %u, {", s, nsize);
static_for<0, nsize, 1>{}([&a](auto i) constexpr { printf("[%u] %u, ", i.value, a[i]); });
printf("}\n");
}
else if constexpr(is_same<data_type, int32_t>{})
{
printf("%s size %d, {", s, nsize);
static_for<0, nsize, 1>{}([&a](auto i) constexpr { printf("[%d] %d, ", i.value, a[i]); });
printf("}\n");
}
}
......
......@@ -54,7 +54,7 @@ void device_dummy_dynamic_transform(InDesc,
auto in_gemmk_gemmn_coord =
make_dynamic_tensor_coordinate(in_gemmk_gemmn_global_desc, MultiIndex<2>{0, 0});
for(index_t iter = 0; iter < 100; ++iter)
for(index_t iter = 0; iter < 10; ++iter)
{
constexpr auto gemmk1_gemmn0 = MultiIndex<2>{1, 0};
......@@ -190,17 +190,14 @@ void device_dummy_dynamic_transform_v2(InDesc,
make_dynamic_tensor_coordinate_v2(in_gemmk_gemmn_global_desc, MultiIndex<2>{0, 0});
const auto in_gemmk_gemmn_coord_step =
make_dynamic_tensor_coordinate_step_v2(in_gemmk_gemmn_global_desc, MultiIndex<2>{1, 0});
make_dynamic_tensor_coordinate_step_v2(in_gemmk_gemmn_global_desc, MultiIndex<2>{0, 1});
for(index_t iter = 0; iter < 100; ++iter)
{
constexpr auto gemmk1_gemmn0 = MultiIndex<2>{1, 0};
printf("iter %d\n", iter);
print_array("idx: ", in_gemmk_gemmn_coord.GetIndex());
print_array_v2("visible idx: ", in_gemmk_gemmn_coord.GetIndex());
print_array_v2("hidden idx: ", in_gemmk_gemmn_coord.GetHiddenIndex());
printf("offset: %d\n", in_gemmk_gemmn_coord.GetOffset());
printf("\n");
move_dynamic_tensor_coordinate_v2(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment