Unverified Commit 0e92deb7 authored by Chao Liu's avatar Chao Liu Committed by GitHub
Browse files

Tile program init bulk PR (#4)



Tile Program init bulk PR

---------
Co-authored-by: default avatarzjing14 <zhangjing14@gmail.com>
Co-authored-by: default avatarPo-Yen, Chen <PoYen.Chen@amd.com>
parent 0077eeb3
...@@ -114,7 +114,7 @@ static inline __device__ int8_t abs(int8_t x) ...@@ -114,7 +114,7 @@ static inline __device__ int8_t abs(int8_t x)
return (x ^ sgn) - sgn; return (x ^ sgn) - sgn;
}; };
static inline __device__ int32_t abs(int32_t x) static inline __device__ constexpr int32_t abs(int32_t x)
{ {
int32_t sgn = x >> (32 - 1); int32_t sgn = x >> (32 - 1);
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
namespace ck {
template <index_t MaxSize>
struct MetaDataBuffer
{
__host__ __device__ constexpr MetaDataBuffer() : buffer_{}, size_{0} {}
template <typename X, typename... Xs>
__host__ __device__ constexpr MetaDataBuffer(const X& x, const Xs&... xs) : buffer_{}, size_{0}
{
Push(x, xs...);
}
template <typename T>
__host__ __device__ constexpr void Push(const T& data)
{
if constexpr(!is_empty_v<T>)
{
constexpr index_t size = sizeof(T);
auto tmp = bit_cast<Array<std::byte, size>>(data);
for(int i = 0; i < size; i++)
{
buffer_(size_) = tmp[i];
size_++;
}
}
}
template <typename X, typename... Xs>
__host__ __device__ constexpr void Push(const X& x, const Xs&... xs)
{
Push(x);
Push(xs...);
}
template <typename T>
__host__ __device__ constexpr T Pop(index_t& pos) const
{
T data;
if constexpr(!is_empty_v<T>)
{
constexpr index_t size = sizeof(T);
Array<std::byte, size> tmp;
for(int i = 0; i < size; i++)
{
tmp(i) = buffer_[pos];
pos++;
}
data = bit_cast<T>(tmp);
}
return data;
}
template <typename T>
__host__ __device__ constexpr T Get(index_t pos) const
{
constexpr index_t size = sizeof(T);
Array<std::byte, size> tmp;
for(int i = 0; i < size; i++)
{
tmp(i) = buffer_[pos];
pos++;
}
auto data = bit_cast<T>(tmp);
return data;
}
//
Array<std::byte, MaxSize> buffer_;
index_t size_ = 0;
};
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
namespace ck {
template <typename T>
__host__ __device__ inline void print(T t)
{
t.Print();
}
template <>
__host__ __device__ inline void print(bool v)
{
printf("%d", static_cast<int32_t>(v));
}
template <>
__host__ __device__ inline void print(int32_t v)
{
printf("%d", v);
}
template <>
__host__ __device__ inline void print(int64_t v)
{
printf("%ld", v);
}
template <>
__host__ __device__ inline void print(float v)
{
printf("%f", v);
}
template <>
__host__ __device__ inline void print(_Float16 v)
{
printf("%f", static_cast<float>(v));
}
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/ck.hpp"
namespace ck {
// remove_cvref_t
template <typename T>
using remove_reference_t = typename std::remove_reference<T>::type;
template <typename T>
using remove_cv_t = typename std::remove_cv<T>::type;
template <typename T>
using remove_cvref_t = remove_cv_t<std::remove_reference_t<T>>;
template <typename T>
using remove_pointer_t = typename std::remove_pointer<T>::type;
} // namespace ck
...@@ -42,8 +42,9 @@ struct Sequence ...@@ -42,8 +42,9 @@ struct Sequence
static constexpr index_t mSize = sizeof...(Is); static constexpr index_t mSize = sizeof...(Is);
__host__ __device__ static constexpr auto Size() { return Number<mSize>{}; } __host__ __device__ static constexpr index_t Size() { return mSize; }
// TODO: deprecate
__host__ __device__ static constexpr auto GetSize() { return Size(); } __host__ __device__ static constexpr auto GetSize() { return Size(); }
__host__ __device__ static constexpr index_t At(index_t I) __host__ __device__ static constexpr index_t At(index_t I)
...@@ -61,10 +62,18 @@ struct Sequence ...@@ -61,10 +62,18 @@ struct Sequence
return Number<At(I)>{}; return Number<At(I)>{};
} }
template <index_t I>
__host__ __device__ static constexpr auto At()
{
static_assert(I < mSize, "wrong! I too large");
return Number<At(I)>{};
}
template <index_t I> template <index_t I>
__host__ __device__ static constexpr auto Get(Number<I>) __host__ __device__ static constexpr auto Get(Number<I>)
{ {
return At(Number<I>{}); return At(I);
} }
template <typename I> template <typename I>
...@@ -171,12 +180,23 @@ struct Sequence ...@@ -171,12 +180,23 @@ struct Sequence
return Sequence<f(Is)...>{}; return Sequence<f(Is)...>{};
} }
__host__ __device__ static constexpr bool IsStatic() { return true; };
__host__ __device__ static void Print() __host__ __device__ static void Print()
{ {
printf("{"); printf("Sequence{size: %d, data: [", Size());
printf("size %d, ", index_t{Size()});
static_for<0, Size(), 1>{}([&](auto i) { printf("%d ", At(i).value); }); for(index_t i = 0; i < Size(); i++)
printf("}"); {
print(At(i));
if(i < Size() - 1)
{
printf(", ");
}
}
printf("]}");
} }
}; };
...@@ -890,8 +910,8 @@ __host__ __device__ constexpr bool sequence_all_of(Seq, F f) ...@@ -890,8 +910,8 @@ __host__ __device__ constexpr bool sequence_all_of(Seq, F f)
return flag; return flag;
} }
template <typename Sx, typename Sy> template <typename... Seqs>
using sequence_merge_t = typename sequence_merge<Sx, Sy>::type; using sequence_merge_t = typename sequence_merge<Seqs...>::type;
template <index_t NSize, index_t I> template <index_t NSize, index_t I>
using uniform_sequence_gen_t = typename uniform_sequence_gen<NSize, I>::type; using uniform_sequence_gen_t = typename uniform_sequence_gen<NSize, I>::type;
......
...@@ -13,14 +13,15 @@ __host__ __device__ constexpr auto make_sequence(Number<Is>...) ...@@ -13,14 +13,15 @@ __host__ __device__ constexpr auto make_sequence(Number<Is>...)
return Sequence<Is...>{}; return Sequence<Is...>{};
} }
// F returns index_t // F() returns index_t
// F use default constructor
template <typename F, index_t N> template <typename F, index_t N>
__host__ __device__ constexpr auto generate_sequence(F, Number<N>) __host__ __device__ constexpr auto generate_sequence(F, Number<N>)
{ {
return typename sequence_gen<N, F>::type{}; return typename sequence_gen<N, F>::type{};
} }
// F returns Number<> // F() returns Number<>
template <typename F, index_t N> template <typename F, index_t N>
__host__ __device__ constexpr auto generate_sequence_v2(F&& f, Number<N>) __host__ __device__ constexpr auto generate_sequence_v2(F&& f, Number<N>)
{ {
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#ifndef STATIC_ASSERT
#ifndef NDEBUG
#define STATIC_ASSERT(...) static_assert(__VA_ARGS__)
#else
#define STATIC_ASSERT(...)
#endif
#endif
...@@ -9,13 +9,14 @@ namespace ck { ...@@ -9,13 +9,14 @@ namespace ck {
// static buffer for scalar // static buffer for scalar
template <AddressSpaceEnum AddressSpace, template <AddressSpaceEnum AddressSpace,
typename T, typename S_,
index_t N, index_t N,
bool InvalidElementUseNumericalZeroValue> // TODO remove this bool, no longer needed bool InvalidElementUseNumericalZeroValue> // TODO remove this bool, no longer needed
struct StaticBuffer : public StaticallyIndexedArray<T, N> struct StaticBuffer : public StaticallyIndexedArray<remove_cvref_t<S_>, N>
{ {
using type = T; using S = remove_cvref_t<S_>;
using base = StaticallyIndexedArray<T, N>; using type = S;
using base = StaticallyIndexedArray<S, N>;
__host__ __device__ constexpr StaticBuffer() : base{} {} __host__ __device__ constexpr StaticBuffer() : base{} {}
...@@ -28,39 +29,84 @@ struct StaticBuffer : public StaticallyIndexedArray<T, N> ...@@ -28,39 +29,84 @@ struct StaticBuffer : public StaticallyIndexedArray<T, N>
return x; return x;
} }
__host__ __device__ constexpr StaticBuffer& operator=(const T& y)
{
StaticBuffer& x = *this;
static_for<0, base::Size(), 1>{}([&](auto i) { x(i) = y; });
return x;
}
__host__ __device__ static constexpr AddressSpaceEnum GetAddressSpace() { return AddressSpace; } __host__ __device__ static constexpr AddressSpaceEnum GetAddressSpace() { return AddressSpace; }
__host__ __device__ static constexpr index_t Size() { return N; }
__host__ __device__ static constexpr bool IsStaticBuffer() { return true; } __host__ __device__ static constexpr bool IsStaticBuffer() { return true; }
__host__ __device__ static constexpr bool IsDynamicBuffer() { return false; } __host__ __device__ static constexpr bool IsDynamicBuffer() { return false; }
// read access // read access to scalar
template <index_t I> template <index_t I>
__host__ __device__ constexpr const T& operator[](Number<I> i) const __host__ __device__ constexpr const S& operator[](Number<I> i) const
{ {
return base::operator[](i); return base::operator[](i);
} }
// write access // write access to scalar
template <index_t I> template <index_t I>
__host__ __device__ constexpr T& operator()(Number<I> i) __host__ __device__ constexpr S& operator()(Number<I> i)
{ {
return base::operator()(i); return base::operator()(i);
} }
__host__ __device__ void Set(T x) // Get a vector (type X)
// "is" is offset of S, not X.
// "is" should be aligned to X
template <typename X_,
index_t Is,
typename enable_if<has_same_scalar_type<S, X_>::value, bool>::type = false>
__host__ __device__ constexpr remove_reference_t<X_> GetAsType(Number<Is> is) const
{
using X = remove_cvref_t<X_>;
constexpr index_t kSPerX = scalar_type<X>::vector_size;
static_assert(Is % kSPerX == 0, "wrong! \"Is\" should be aligned to X");
vector_type<S, kSPerX> vx;
static_for<0, kSPerX, 1>{}(
[&](auto j) { vx.template AsType<S>()(j) = base::operator[](is + j); });
return vx.template AsType<X>().template At<0>();
}
// Set a vector (type X)
// "is" is offset of S, not X.
// "is" should be aligned to X
template <typename X_,
index_t Is,
typename enable_if<has_same_scalar_type<S, X_>::value, bool>::type = false>
__host__ __device__ constexpr void SetAsType(Number<Is> is, X_ x)
{ {
static_for<0, N, 1>{}([&](auto i) { operator()(i) = T{x}; }); using X = remove_cvref_t<X_>;
constexpr index_t kSPerX = scalar_type<X>::vector_size;
static_assert(Is % kSPerX == 0, "wrong! \"Is\" should be aligned to X");
const vector_type<S, kSPerX> vx{x};
static_for<0, kSPerX, 1>{}(
[&](auto j) { base::operator()(is + j) = vx.template AsType<S>()[j]; });
} }
__host__ __device__ void Clear() { Set(T{0}); } __host__ __device__ void Initialize(const S& x)
{
static_for<0, N, 1>{}([&](auto i) { operator()(i) = S{x}; });
}
// FIXME: deprecated
__host__ __device__ void Clear() { Initialize(0); }
// FIXME: deprecated
__host__ __device__ constexpr StaticBuffer& operator=(const S& v)
{
Initialize(v);
return *this;
}
}; };
// static buffer for vector // static buffer for vector
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_STATICALLY_INDEXED_ARRAY_HPP #pragma once
#define CK_STATICALLY_INDEXED_ARRAY_HPP
#include "functional2.hpp" #include "functional2.hpp"
#include "sequence.hpp" #include "sequence.hpp"
...@@ -57,49 +56,4 @@ __host__ __device__ constexpr auto make_statically_indexed_array() ...@@ -57,49 +56,4 @@ __host__ __device__ constexpr auto make_statically_indexed_array()
return StaticallyIndexedArray<X, 0>(); return StaticallyIndexedArray<X, 0>();
} }
template <typename T, index_t N>
struct StaticallyIndexedArray_v2
{
__host__ __device__ constexpr StaticallyIndexedArray_v2() = default;
__host__ __device__ static constexpr index_t Size() { return N; }
// read access
template <index_t I>
__host__ __device__ constexpr const auto& At(Number<I>) const
{
static_assert(I < N, "wrong! out of range");
return data_[I];
}
// write access
template <index_t I>
__host__ __device__ constexpr auto& At(Number<I>)
{
static_assert(I < N, "wrong! out of range");
return data_[I];
}
// read access
template <index_t I>
__host__ __device__ constexpr const auto& operator[](Number<I> i) const
{
return At(i);
}
// write access
template <index_t I>
__host__ __device__ constexpr auto& operator()(Number<I> i)
{
return At(i);
}
__host__ __device__ static constexpr bool IsStaticBuffer() { return true; }
T data_[N];
};
} // namespace ck } // namespace ck
#endif
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_STATICALLY_INDEXED_ARRAY_MULTI_INDEX_HPP #pragma once
#define CK_STATICALLY_INDEXED_ARRAY_MULTI_INDEX_HPP
#include "common_header.hpp" #include "common_header.hpp"
namespace ck { namespace ck {
#if 0
template <index_t N> template <index_t N>
using MultiIndex = StaticallyIndexedArray<index_t, N>; using MultiIndex = StaticallyIndexedArray<index_t, N>;
template <typename... Xs> template <typename... Xs>
__host__ __device__ constexpr auto make_multi_index(Xs&&... xs) __host__ __device__ constexpr auto make_multi_index(Xs&&... xs)
{ {
static_assert(false, "wrong! deprecated");
return make_statically_indexed_array<index_t>(index_t{xs}...); return make_statically_indexed_array<index_t>(index_t{xs}...);
} }
...@@ -29,134 +31,6 @@ __host__ __device__ constexpr auto to_multi_index(const T& x) ...@@ -29,134 +31,6 @@ __host__ __device__ constexpr auto to_multi_index(const T& x)
{ {
return unpack([](auto... ys) { return make_multi_index(ys...); }, x); return unpack([](auto... ys) { return make_multi_index(ys...); }, x);
} }
#endif
// Here should use MultiIndex<NSize>, instead of Tuple<Ys...>, although the former
// is the alias of the latter. This is because compiler cannot infer the NSize if
// using MultiIndex<NSize>
// TODO: how to fix this?
template <
typename... Ys,
typename X,
enable_if_t<!std::is_integral<X>::value && !std::is_floating_point<X>::value, bool> = false>
__host__ __device__ constexpr auto operator+=(Tuple<Ys...>& y, const X& x)
{
static_assert(X::Size() == sizeof...(Ys), "wrong! size not the same");
constexpr index_t NSize = sizeof...(Ys);
static_for<0, NSize, 1>{}([&](auto i) { y(i) += x[i]; });
return y;
}
template <
typename... Ys,
typename X,
enable_if_t<!std::is_integral<X>::value && !std::is_floating_point<X>::value, bool> = false>
__host__ __device__ constexpr auto operator-=(Tuple<Ys...>& y, const X& x)
{
static_assert(X::Size() == sizeof...(Ys), "wrong! size not the same");
constexpr index_t NSize = sizeof...(Ys);
static_for<0, NSize, 1>{}([&](auto i) { y(i) -= x[i]; });
return y;
}
template <
typename... Xs,
typename Y,
enable_if_t<!std::is_integral<Y>::value && !std::is_floating_point<Y>::value, bool> = false>
__host__ __device__ constexpr auto operator+(const Tuple<Xs...>& x, const Y& y)
{
static_assert(Y::Size() == sizeof...(Xs), "wrong! size not the same");
constexpr index_t NSize = sizeof...(Xs);
Tuple<Xs...> r;
static_for<0, NSize, 1>{}([&](auto i) { r(i) = x[i] + y[i]; });
return r;
}
template <
typename... Xs,
typename Y,
enable_if_t<!std::is_integral<Y>::value && !std::is_floating_point<Y>::value, bool> = false>
__host__ __device__ constexpr auto operator-(const Tuple<Xs...>& x, const Y& y)
{
static_assert(Y::Size() == sizeof...(Xs), "wrong! size not the same");
constexpr index_t NSize = sizeof...(Xs);
Tuple<Xs...> r;
static_for<0, NSize, 1>{}([&](auto i) { r(i) = x[i] - y[i]; });
return r;
}
template <
typename... Xs,
typename Y,
enable_if_t<!std::is_integral<Y>::value && !std::is_floating_point<Y>::value, bool> = false>
__host__ __device__ constexpr auto operator*(const Tuple<Xs...>& x, const Y& y)
{
static_assert(Y::Size() == sizeof...(Xs), "wrong! size not the same");
constexpr index_t NSize = sizeof...(Xs);
Tuple<Xs...> r;
static_for<0, NSize, 1>{}([&](auto i) { r(i) = x[i] * y[i]; });
return r;
}
// MultiIndex = scalar * MultiIndex
template <typename... Xs,
typename Y,
enable_if_t<std::is_integral<Y>::value || std::is_floating_point<Y>::value, bool> = false>
__host__ __device__ constexpr auto operator*(Y a, const Tuple<Xs...>& x)
{
constexpr index_t NSize = sizeof...(Xs);
Tuple<Xs...> r;
static_for<0, NSize, 1>{}([&](auto i) { r(i) = a * x[i]; });
return r;
}
// MultiIndex = MultiIndex * scalar
template <typename... Xs,
typename Y,
enable_if_t<std::is_integral<Y>::value || std::is_floating_point<Y>::value, bool> = false>
__host__ __device__ constexpr auto operator*(const Tuple<Xs...>& x, Y a)
{
return a * x;
}
namespace mathext {
template <typename... Xs>
__host__ __device__ constexpr auto exp(const Tuple<Xs...>& x)
{
constexpr index_t NSize = sizeof...(Xs);
Tuple<Xs...> r;
static_for<0, NSize, 1>{}([&](auto i) { r(i) = math::exp(x[i]); });
return r;
}
template <typename... Xs, typename Y>
__host__ __device__ constexpr auto max(const Tuple<Xs...>& x, const Y& y)
{
static_assert(Y::Size() == sizeof...(Xs), "wrong! size not the same");
constexpr index_t NSize = sizeof...(Xs);
Tuple<Xs...> r;
static_for<0, NSize, 1>{}([&](auto i) { r(i) = math::max(x[i], y[i]); });
return r;
}
} // namespace mathext
template <typename... Xs>
__host__ __device__ void print_multi_index(const Tuple<Xs...>& x)
{
printf("{");
printf("MultiIndex, ");
printf("size %d,", index_t{sizeof...(Xs)});
static_for<0, sizeof...(Xs), 1>{}(
[&](auto i) { printf("%d ", static_cast<index_t>(x.At(i))); });
printf("}");
}
} // namespace ck } // namespace ck
#endif
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
#pragma once #pragma once
#include "ck/utility/is_static.hpp"
#include "ck/utility/print.hpp"
#include "ck/utility/integral_constant.hpp" #include "ck/utility/integral_constant.hpp"
#include "ck/utility/sequence.hpp" #include "ck/utility/sequence.hpp"
#include "ck/utility/type.hpp" #include "ck/utility/type.hpp"
...@@ -136,6 +138,22 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X ...@@ -136,6 +138,22 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
__host__ __device__ static constexpr index_t Size() { return sizeof...(Xs); } __host__ __device__ static constexpr index_t Size() { return sizeof...(Xs); }
// read access
template <index_t I>
__host__ __device__ constexpr const auto& At() const
{
static_assert(I < base::Size(), "wrong! out of range");
return base::GetElementDataByKey(detail::TupleElementKey<I>{});
}
// write access
template <index_t I>
__host__ __device__ constexpr auto& At()
{
static_assert(I < base::Size(), "wrong! out of range");
return base::GetElementDataByKey(detail::TupleElementKey<I>{});
}
// read access // read access
template <index_t I> template <index_t I>
__host__ __device__ constexpr const auto& At(Number<I>) const __host__ __device__ constexpr const auto& At(Number<I>) const
...@@ -166,6 +184,20 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X ...@@ -166,6 +184,20 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
return At(i); return At(i);
} }
// WARNING: needed by compiler for C++ structured binding support only, don't use this function!
template <std::size_t I>
__host__ __device__ constexpr const auto& get() const
{
return this->template At<I>();
}
// WARNING: needed bu compiler for C++ structured binding support only, don't use this function!
template <std::size_t I>
__host__ __device__ constexpr auto& get()
{
return this->template At<I>();
}
template <typename T> template <typename T>
__host__ __device__ constexpr auto operator=(const T& a) __host__ __device__ constexpr auto operator=(const T& a)
{ {
...@@ -176,7 +208,35 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X ...@@ -176,7 +208,35 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
return *this; return *this;
} }
__host__ __device__ static constexpr bool IsStatic()
{
bool flag = true;
static_for<0, sizeof...(Xs), 1>{}([&flag](auto i) {
flag &= is_static_v<remove_cvref_t<type_pack_element<i.value, Xs...>>>;
});
return flag;
}
// FIXME: remove
__host__ __device__ static constexpr bool IsStaticBuffer() { return true; } __host__ __device__ static constexpr bool IsStaticBuffer() { return true; }
__host__ __device__ void Print() const
{
printf("Tuple{size: %d, data: [", static_cast<index_t>(Size()));
static_for<0, Size(), 1>{}([&](auto i) {
print(At(i));
if(i < Size() - 1)
{
printf(", ");
}
});
printf("]}");
}
}; };
template <> template <>
...@@ -192,6 +252,9 @@ struct Tuple<> ...@@ -192,6 +252,9 @@ struct Tuple<>
return *this; return *this;
} }
__host__ __device__ static constexpr bool IsStatic() { return true; }
// FIXME: remove
__host__ __device__ static constexpr bool IsStaticBuffer() { return true; } __host__ __device__ static constexpr bool IsStaticBuffer() { return true; }
}; };
...@@ -219,3 +282,19 @@ constexpr Tuple<Args&...> tie(Args&... args) noexcept ...@@ -219,3 +282,19 @@ constexpr Tuple<Args&...> tie(Args&... args) noexcept
} }
} // namespace ck } // namespace ck
namespace std {
// WARNING: needed by compiler for C++ structured binding support only, don't use this
template <typename... Ts>
struct tuple_size<ck::Tuple<Ts...>> : std::integral_constant<std::size_t, sizeof...(Ts)>
{
};
// WARNING: needed by compiler for C++ structured binding support only, don't use this
template <std::size_t I, typename... Ts>
struct tuple_element<I, ck::Tuple<Ts...>> : ck::tuple_element<I, ck::Tuple<Ts...>>
{
};
} // namespace std
...@@ -11,14 +11,14 @@ namespace ck { ...@@ -11,14 +11,14 @@ namespace ck {
template <typename F, index_t N> template <typename F, index_t N>
__host__ __device__ constexpr auto generate_tuple(F&& f, Number<N>) __host__ __device__ constexpr auto generate_tuple(F&& f, Number<N>)
{ {
return unpack([&f](auto&&... xs) { return make_tuple(f(xs)...); }, return unpack([&f](auto&&... is) { return make_tuple(f(is)...); },
typename arithmetic_sequence_gen<0, N, 1>::type{}); typename arithmetic_sequence_gen<0, N, 1>::type{});
} }
template <typename F, index_t N> template <typename F, index_t N>
__host__ __device__ constexpr auto generate_tie(F&& f, Number<N>) __host__ __device__ constexpr auto generate_tie(F&& f, Number<N>)
{ {
return unpack([&f](auto&&... xs) { return tie(f(xs)...); }, return unpack([&f](auto&&... is) { return tie(f(is)...); },
typename arithmetic_sequence_gen<0, N, 1>::type{}); typename arithmetic_sequence_gen<0, N, 1>::type{});
} }
...@@ -79,3 +79,50 @@ __host__ __device__ constexpr auto transform_tuples(F f, const X& x, const Y& y, ...@@ -79,3 +79,50 @@ __host__ __device__ constexpr auto transform_tuples(F f, const X& x, const Y& y,
} }
} // namespace ck } // namespace ck
// Macro function
// convert constexpr Array to Tuple of Number
#define TO_TUPLE_OF_NUMBER(arr, n) \
[&arr, &n] { \
static_assert(arr.Size() >= n, "wrong! out of bound"); \
\
static_assert(n < 7, "not implemented"); \
\
if constexpr(n == 0) \
{ \
return ck::Tuple<>{}; \
} \
else if constexpr(n == 1) \
{ \
return ck::Tuple<Number<arr[0]>>{}; \
} \
else if constexpr(n == 2) \
{ \
return ck::Tuple<Number<arr[0]>, Number<arr[1]>>{}; \
} \
else if constexpr(n == 3) \
{ \
return ck::Tuple<Number<arr[0]>, Number<arr[1]>, Number<arr[2]>>{}; \
} \
else if constexpr(n == 4) \
{ \
return ck::Tuple<Number<arr[0]>, Number<arr[1]>, Number<arr[2]>, Number<arr[3]>>{}; \
} \
else if constexpr(n == 5) \
{ \
return ck::Tuple<Number<arr[0]>, \
Number<arr[1]>, \
Number<arr[2]>, \
Number<arr[3]>, \
Number<arr[4]>>{}; \
} \
else if constexpr(n == 6) \
{ \
return ck::Tuple<Number<arr[0]>, \
Number<arr[1]>, \
Number<arr[2]>, \
Number<arr[3]>, \
Number<arr[4]>, \
Number<arr[5]>>{}; \
} \
}()
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "sequence.hpp"
#include "array.hpp"
#include "tuple.hpp"
namespace ck {
template <typename... Seqs>
__host__ __device__ constexpr auto to_array_of_array(Tuple<Seqs...> t_of_s)
{
constexpr index_t n0 = sizeof...(Seqs);
constexpr index_t max_n1 = [&] {
index_t max_n1_ = 0;
static_for<0, n0, 1>{}([&](auto i0) {
constexpr index_t n1 = t_of_s[i0].Size();
max_n1_ = max_n1_ < n1 ? n1 : max_n1_;
});
return max_n1_;
}();
Array<Array<index_t, max_n1>, n0> a_of_a{{-1}};
static_for<0, n0, 1>{}([&](auto i0) {
constexpr index_t n1 = t_of_s[i0].Size();
static_for<0, n1, 1>{}([&](auto i1) { a_of_a(i0)(i1) = t_of_s[i0][i1]; });
});
return a_of_a;
}
} // namespace ck
...@@ -9,52 +9,36 @@ ...@@ -9,52 +9,36 @@
namespace ck { namespace ck {
template <typename X, typename Y> // is_same
struct is_same : public integral_constant<bool, false> static_assert(__has_builtin(__is_same), "");
{
};
template <typename X> template <typename X, typename Y>
struct is_same<X, X> : public integral_constant<bool, true> using is_same = integral_constant<bool, __is_same(X, Y)>;
{
};
template <typename X, typename Y> template <typename X, typename Y>
inline constexpr bool is_same_v = is_same<X, Y>::value; inline constexpr bool is_same_v = is_same<X, Y>::value;
template <typename T> static_assert(__has_builtin(__type_pack_element), "");
using remove_reference_t = typename std::remove_reference<T>::type;
template <typename T>
using remove_cv_t = typename std::remove_cv<T>::type;
template <typename T> // type_pack_element
using remove_cvref_t = remove_cv_t<std::remove_reference_t<T>>; template <index_t I, typename... Ts>
using type_pack_element = __type_pack_element<I, Ts...>;
// is_pointer
template <typename T> template <typename T>
using remove_pointer_t = typename std::remove_pointer<T>::type; inline constexpr bool is_pointer_v = std::is_pointer<T>::value;
// is_empty
template <typename T> template <typename T>
inline constexpr bool is_pointer_v = std::is_pointer<T>::value; inline constexpr bool is_empty_v = std::is_empty<T>::value;
// bit_cast
template <typename Y, typename X, typename enable_if<sizeof(X) == sizeof(Y), bool>::type = false> template <typename Y, typename X, typename enable_if<sizeof(X) == sizeof(Y), bool>::type = false>
__host__ __device__ constexpr Y bit_cast(const X& x) __host__ __device__ constexpr Y bit_cast(const X& x)
{ {
#if CK_EXPERIMENTAL_USE_MEMCPY_FOR_BIT_CAST static_assert(__has_builtin(__builtin_bit_cast), "");
Y y;
__builtin_memcpy(&y, &x, sizeof(X));
return y;
#else
union AsType
{
X x;
Y y;
};
return AsType{x}.y; return __builtin_bit_cast(Y, x);
#endif
} }
} // namespace ck } // namespace ck
...@@ -219,10 +219,10 @@ struct ParallelTensorFunctor ...@@ -219,10 +219,10 @@ struct ParallelTensorFunctor
std::size_t iw_begin = it * work_per_thread; std::size_t iw_begin = it * work_per_thread;
std::size_t iw_end = std::min((it + 1) * work_per_thread, mN1d); std::size_t iw_end = std::min((it + 1) * work_per_thread, mN1d);
auto f = [=] { auto f = [this, iw_begin, iw_end] {
for(std::size_t iw = iw_begin; iw < iw_end; ++iw) for(std::size_t iw = iw_begin; iw < iw_end; ++iw)
{ {
call_f_unpack_args(mF, GetNdIndices(iw)); call_f_unpack_args(this->mF, this->GetNdIndices(iw));
} }
}; };
threads[it] = joinable_thread(f); threads[it] = joinable_thread(f);
......
...@@ -43,23 +43,10 @@ gpu_naive_division(int32_t divisor, const int32_t* p_dividend, int32_t* p_result ...@@ -43,23 +43,10 @@ gpu_naive_division(int32_t divisor, const int32_t* p_dividend, int32_t* p_result
} }
} }
__host__ void cpu_magic_number_division(uint32_t magic_multiplier,
uint32_t magic_shift,
const int32_t* p_dividend,
int32_t* p_result,
uint64_t num)
{
for(uint64_t data_id = 0; data_id < num; ++data_id)
{
p_result[data_id] =
ck::MagicDivision::DoMagicDivision(p_dividend[data_id], magic_multiplier, magic_shift);
}
}
int main(int, char*[]) int main(int, char*[])
{ {
uint64_t num_divisor = 4096; uint64_t num_divisor = 1UL << 12;
uint64_t num_dividend = 1L << 16; uint64_t num_dividend = 1UL << 20;
std::vector<int32_t> divisors_host(num_divisor); std::vector<int32_t> divisors_host(num_divisor);
std::vector<int32_t> dividends_host(num_dividend); std::vector<int32_t> dividends_host(num_dividend);
...@@ -71,7 +58,7 @@ int main(int, char*[]) ...@@ -71,7 +58,7 @@ int main(int, char*[])
} }
// generate dividend // generate dividend
for(uint64_t i = 0; i < num_divisor; ++i) for(uint64_t i = 0; i < num_dividend; ++i)
{ {
dividends_host[i] = i; dividends_host[i] = i;
} }
...@@ -82,7 +69,6 @@ int main(int, char*[]) ...@@ -82,7 +69,6 @@ int main(int, char*[])
std::vector<int32_t> naive_result_host(num_dividend); std::vector<int32_t> naive_result_host(num_dividend);
std::vector<int32_t> magic_result_host(num_dividend); std::vector<int32_t> magic_result_host(num_dividend);
std::vector<int32_t> magic_result_host2(num_dividend);
dividends_dev_buf.ToDevice(dividends_host.data()); dividends_dev_buf.ToDevice(dividends_host.data());
...@@ -121,20 +107,6 @@ int main(int, char*[]) ...@@ -121,20 +107,6 @@ int main(int, char*[])
pass = false; pass = false;
continue; continue;
} }
cpu_magic_number_division(magic_multiplier,
magic_shift,
dividends_host.data(),
magic_result_host2.data(),
num_dividend);
res = ck::utils::check_err(magic_result_host2, naive_result_host);
if(!res)
{
pass = false;
continue;
}
} }
if(pass) if(pass)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment