"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "c9a219b323bed08fbaea9025cc940568f8bea78b"
Commit b23e7f8e authored by Chao Liu's avatar Chao Liu
Browse files

dynamic tensor descriptor v2 can produce correct result, but spill too many register

parent 0a944e8f
...@@ -15,7 +15,7 @@ make_dynamic_native_tensor_descriptor_packed_v2(const MultiIndex<N>& lengths) ...@@ -15,7 +15,7 @@ make_dynamic_native_tensor_descriptor_packed_v2(const MultiIndex<N>& lengths)
constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{}); constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{});
constexpr auto up_dim_hidden_idss = constexpr auto up_dim_hidden_idss =
make_tuple(typename arithmetic_sequence_gen<1, N + 1, 1>::type{}); make_tuple(typename arithmetic_sequence_gen<1, N + 1, 1>::type{});
constexpr auto visible_dim_hidden_ids = typename arithmetic_sequence_gen<0, N, 1>::type{}; constexpr auto visible_dim_hidden_ids = typename arithmetic_sequence_gen<1, N + 1, 1>::type{};
const index_t element_space_size = const index_t element_space_size =
reduce_on_array(lengths, math::multiplies<index_t>{}, index_t{1}); reduce_on_array(lengths, math::multiplies<index_t>{}, index_t{1});
...@@ -37,7 +37,7 @@ make_dynamic_native_tensor_descriptor_v2(const MultiIndex<N>& lengths, const Mul ...@@ -37,7 +37,7 @@ make_dynamic_native_tensor_descriptor_v2(const MultiIndex<N>& lengths, const Mul
constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{}); constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{});
constexpr auto up_dim_hidden_idss = constexpr auto up_dim_hidden_idss =
make_tuple(typename arithmetic_sequence_gen<1, N + 1, 1>::type{}); make_tuple(typename arithmetic_sequence_gen<1, N + 1, 1>::type{});
constexpr auto visible_dim_hidden_ids = typename arithmetic_sequence_gen<0, N, 1>::type{}; constexpr auto visible_dim_hidden_ids = typename arithmetic_sequence_gen<1, N + 1, 1>::type{};
index_t element_space_size = 1; index_t element_space_size = 1;
......
...@@ -282,6 +282,8 @@ struct DynamicTensorCoordinateStep_v2 ...@@ -282,6 +282,8 @@ struct DynamicTensorCoordinateStep_v2
{ {
} }
__host__ __device__ constexpr const auto& GetIndexDiff() const { return GetVisibleIndexDiff(); }
// private: // private:
__host__ __device__ constexpr const auto& GetVisibleIndexDiff() const __host__ __device__ constexpr const auto& GetVisibleIndexDiff() const
{ {
...@@ -510,7 +512,12 @@ __host__ __device__ void move_dynamic_tensor_coordinate_v2(const TensorDesc& ten ...@@ -510,7 +512,12 @@ __host__ __device__ void move_dynamic_tensor_coordinate_v2(const TensorDesc& ten
// this is what needs to be updated // this is what needs to be updated
auto& idx_hidden = coord.GetHiddenIndex(); auto& idx_hidden = coord.GetHiddenIndex();
// update hidden index // update visible index
auto idx_hidden_pick_visible =
pick_array_element(idx_hidden, TensorDesc::GetVisibleDimensionIds());
idx_hidden_pick_visible += coord_step.GetIndexDiff();
// update rest of hidden index
static_for<ntransform - 1, -1, -1>{}([&](auto itran) { static_for<ntransform - 1, -1, -1>{}([&](auto itran) {
const auto& tran = tensor_desc.GetTransforms().At(itran); const auto& tran = tensor_desc.GetTransforms().At(itran);
constexpr auto dims_low = TensorDesc::GetLowerDimensionIdss().At(itran); constexpr auto dims_low = TensorDesc::GetLowerDimensionIdss().At(itran);
......
...@@ -147,6 +147,18 @@ struct Array ...@@ -147,6 +147,18 @@ struct Array
return new_array; return new_array;
} }
template <index_t NAppend>
__host__ __device__ constexpr auto Append(const Array<TData, NAppend>& xs) const
{
Array<TData, NSize + NAppend> r;
static_for<0, NSize, 1>{}([&r, this ](auto i) constexpr { r(i) = (*this)[i]; });
static_for<0, NAppend, 1>{}([&r, &xs ](auto i) constexpr { r(NSize + i) = xs[i]; });
return r;
}
}; };
// Arr: Array // Arr: Array
......
...@@ -5,6 +5,12 @@ ...@@ -5,6 +5,12 @@
namespace ck { namespace ck {
template <typename X, typename... Xs>
__host__ __device__ constexpr auto make_array(const X& x, const Xs&... xs)
{
return Array<X, sizeof...(xs) + 1>{{x, xs...}};
}
template <typename Arr, typename Picks> template <typename Arr, typename Picks>
__host__ __device__ constexpr auto pick_array_element(Arr& a, Picks) __host__ __device__ constexpr auto pick_array_element(Arr& a, Picks)
{ {
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#define CK_PRINT_HPP #define CK_PRINT_HPP
#include "array.hpp" #include "array.hpp"
#include "array_helper.hpp"
#include "sequence.hpp" #include "sequence.hpp"
namespace ck { namespace ck {
...@@ -12,8 +13,6 @@ __host__ __device__ void print_array(const char* s, T a) ...@@ -12,8 +13,6 @@ __host__ __device__ void print_array(const char* s, T a)
using data_type = typename decltype(a)::data_type; using data_type = typename decltype(a)::data_type;
constexpr index_t nsize = a.Size(); constexpr index_t nsize = a.Size();
static_assert(nsize >= 0 && nsize <= 10, "wrong!");
if constexpr(is_same<data_type, uint32_t>{}) if constexpr(is_same<data_type, uint32_t>{})
{ {
if constexpr(nsize == 0) if constexpr(nsize == 0)
...@@ -103,6 +102,12 @@ __host__ __device__ void print_array(const char* s, T a) ...@@ -103,6 +102,12 @@ __host__ __device__ void print_array(const char* s, T a)
a[8], a[8],
a[9]); a[9]);
} }
else
{
printf("%s size %u, {", s, nsize);
static_for<0, nsize, 1>{}([&a](auto i) constexpr { printf("%u, ", a[i]); });
printf("}\n");
}
} }
else if constexpr(is_same<data_type, int32_t>{}) else if constexpr(is_same<data_type, int32_t>{})
{ {
...@@ -193,6 +198,32 @@ __host__ __device__ void print_array(const char* s, T a) ...@@ -193,6 +198,32 @@ __host__ __device__ void print_array(const char* s, T a)
a[8], a[8],
a[9]); a[9]);
} }
else
{
printf("%s size %d, {", s, nsize);
static_for<0, nsize, 1>{}([&a](auto i) constexpr { printf("%d, ", a[i]); });
printf("}\n");
}
}
}
template <typename T>
__host__ __device__ void print_array_v2(const char* s, T a)
{
using data_type = typename decltype(a)::data_type;
constexpr index_t nsize = a.Size();
if constexpr(is_same<data_type, uint32_t>{})
{
printf("%s size %u, {", s, nsize);
static_for<0, nsize, 1>{}([&a](auto i) constexpr { printf("[%u] %u, ", i.value, a[i]); });
printf("}\n");
}
else if constexpr(is_same<data_type, int32_t>{})
{
printf("%s size %d, {", s, nsize);
static_for<0, nsize, 1>{}([&a](auto i) constexpr { printf("[%d] %d, ", i.value, a[i]); });
printf("}\n");
} }
} }
......
...@@ -54,7 +54,7 @@ void device_dummy_dynamic_transform(InDesc, ...@@ -54,7 +54,7 @@ void device_dummy_dynamic_transform(InDesc,
auto in_gemmk_gemmn_coord = auto in_gemmk_gemmn_coord =
make_dynamic_tensor_coordinate(in_gemmk_gemmn_global_desc, MultiIndex<2>{0, 0}); make_dynamic_tensor_coordinate(in_gemmk_gemmn_global_desc, MultiIndex<2>{0, 0});
for(index_t iter = 0; iter < 100; ++iter) for(index_t iter = 0; iter < 10; ++iter)
{ {
constexpr auto gemmk1_gemmn0 = MultiIndex<2>{1, 0}; constexpr auto gemmk1_gemmn0 = MultiIndex<2>{1, 0};
...@@ -190,17 +190,14 @@ void device_dummy_dynamic_transform_v2(InDesc, ...@@ -190,17 +190,14 @@ void device_dummy_dynamic_transform_v2(InDesc,
make_dynamic_tensor_coordinate_v2(in_gemmk_gemmn_global_desc, MultiIndex<2>{0, 0}); make_dynamic_tensor_coordinate_v2(in_gemmk_gemmn_global_desc, MultiIndex<2>{0, 0});
const auto in_gemmk_gemmn_coord_step = const auto in_gemmk_gemmn_coord_step =
make_dynamic_tensor_coordinate_step_v2(in_gemmk_gemmn_global_desc, MultiIndex<2>{1, 0}); make_dynamic_tensor_coordinate_step_v2(in_gemmk_gemmn_global_desc, MultiIndex<2>{0, 1});
for(index_t iter = 0; iter < 100; ++iter) for(index_t iter = 0; iter < 100; ++iter)
{ {
constexpr auto gemmk1_gemmn0 = MultiIndex<2>{1, 0};
printf("iter %d\n", iter); printf("iter %d\n", iter);
print_array_v2("visible idx: ", in_gemmk_gemmn_coord.GetIndex());
print_array("idx: ", in_gemmk_gemmn_coord.GetIndex()); print_array_v2("hidden idx: ", in_gemmk_gemmn_coord.GetHiddenIndex());
printf("offset: %d\n", in_gemmk_gemmn_coord.GetOffset()); printf("offset: %d\n", in_gemmk_gemmn_coord.GetOffset());
printf("\n"); printf("\n");
move_dynamic_tensor_coordinate_v2( move_dynamic_tensor_coordinate_v2(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment