Unverified Commit 0a7174ad authored by Chao Liu's avatar Chao Liu Committed by GitHub
Browse files

Merge with (not the latest) upstream CK (#32)

* fix build for old ck examples

* fix build for old ck
parent 496be40e
...@@ -94,7 +94,7 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2 ...@@ -94,7 +94,7 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2
using dst_vector_t = typename dst_vector_type::type; using dst_vector_t = typename dst_vector_type::type;
const bool is_src_valid = const bool is_src_valid =
coordinate_has_valid_offset_assuming_visible_index_is_valid(src_desc, src_coord_); coordinate_has_valid_offset_assuming_top_index_is_valid(src_desc, src_coord_);
// copy data from src_buf into src_vector_container // copy data from src_buf into src_vector_container
auto src_vector_container = src_vector_type{ auto src_vector_container = src_vector_type{
...@@ -114,7 +114,7 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2 ...@@ -114,7 +114,7 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2
}); });
const bool is_dst_valid = const bool is_dst_valid =
coordinate_has_valid_offset_assuming_visible_index_is_valid(dst_desc, dst_coord_); coordinate_has_valid_offset_assuming_top_index_is_valid(dst_desc, dst_coord_);
// copy data from dst_vector into dst_buf // copy data from dst_vector into dst_buf
dst_buf.template Update<DstInMemOp, dst_vector_t>( dst_buf.template Update<DstInMemOp, dst_vector_t>(
...@@ -126,28 +126,20 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2 ...@@ -126,28 +126,20 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2
if constexpr(idx_1d.value != num_access - 1) if constexpr(idx_1d.value != num_access - 1)
{ {
constexpr auto forward_step = SpaceFillingCurve::GetForwardStep(idx_1d); constexpr auto forward_step = SpaceFillingCurve::GetForwardStep(idx_1d);
move_tensor_coordinate( move_tensor_coordinate(src_desc, src_coord_, forward_step);
src_desc, src_coord_, make_tensor_coordinate_step(src_desc, forward_step)); move_tensor_coordinate(dst_desc, dst_coord_, forward_step);
move_tensor_coordinate(
dst_desc, dst_coord_, make_tensor_coordinate_step(dst_desc, forward_step));
} }
}); });
// move coordinate back to slice origin (or not) // move coordinate back to slice origin (or not)
if constexpr(SrcResetCoordinateAfterRun) if constexpr(SrcResetCoordinateAfterRun)
{ {
const auto src_reset_step = move_tensor_coordinate(src_desc, src_coord_, GetCoordinateResetStep());
make_tensor_coordinate_step(src_desc, GetCoordinateResetStep());
move_tensor_coordinate(src_desc, src_coord_, src_reset_step);
} }
if constexpr(DstResetCoordinateAfterRun) if constexpr(DstResetCoordinateAfterRun)
{ {
const auto dst_reset_step = move_tensor_coordinate(dst_desc, dst_coord_, GetCoordinateResetStep());
make_tensor_coordinate_step(dst_desc, GetCoordinateResetStep());
move_tensor_coordinate(dst_desc, dst_coord_, dst_reset_step);
} }
} }
...@@ -179,13 +171,10 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2 ...@@ -179,13 +171,10 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2
const Index& src_slice_origin_step_idx) const Index& src_slice_origin_step_idx)
{ {
// if src coord was not reset by RunRead(), then need to adjust the step here // if src coord was not reset by RunRead(), then need to adjust the step here
const auto adjusted_step_idx = SrcResetCoordinateAfterRun const auto adjusted_step = SrcResetCoordinateAfterRun
? src_slice_origin_step_idx ? src_slice_origin_step_idx
: src_slice_origin_step_idx + GetCoordinateResetStep(); : src_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(src_desc, adjusted_step_idx);
move_tensor_coordinate(src_desc, src_coord_, adjusted_step); move_tensor_coordinate(src_desc, src_coord_, adjusted_step);
} }
...@@ -194,13 +183,10 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2 ...@@ -194,13 +183,10 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2
const Index& dst_slice_origin_step_idx) const Index& dst_slice_origin_step_idx)
{ {
// if dst coord was not reset by Run(), then need to adjust the step here // if dst coord was not reset by Run(), then need to adjust the step here
const auto adjusted_step_idx = DstResetCoordinateAfterRun const auto adjusted_step = DstResetCoordinateAfterRun
? dst_slice_origin_step_idx ? dst_slice_origin_step_idx
: dst_slice_origin_step_idx + GetCoordinateResetStep(); : dst_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(dst_desc, adjusted_step_idx);
move_tensor_coordinate(dst_desc, dst_coord_, adjusted_step); move_tensor_coordinate(dst_desc, dst_coord_, adjusted_step);
} }
......
...@@ -147,38 +147,26 @@ struct ThreadwiseTensorSliceTransfer_v6r2 ...@@ -147,38 +147,26 @@ struct ThreadwiseTensorSliceTransfer_v6r2
if constexpr(idx_1d.value != num_access - 1) if constexpr(idx_1d.value != num_access - 1)
{ {
constexpr auto forward_step = SpaceFillingCurve::GetForwardStep(idx_1d); constexpr auto forward_step = SpaceFillingCurve::GetForwardStep(idx_1d);
move_tensor_coordinate( move_tensor_coordinate(src0_desc, src0_coord_, forward_step);
src0_desc, src0_coord_, make_tensor_coordinate_step(src0_desc, forward_step)); move_tensor_coordinate(src1_desc, src1_coord_, forward_step);
move_tensor_coordinate( move_tensor_coordinate(dst_desc, dst_coord_, forward_step);
src1_desc, src1_coord_, make_tensor_coordinate_step(src1_desc, forward_step));
move_tensor_coordinate(
dst_desc, dst_coord_, make_tensor_coordinate_step(dst_desc, forward_step));
} }
}); });
// move coordinate back to slice origin (or not) // move coordinate back to slice origin (or not)
if constexpr(Src0ResetCoordinateAfterRun) if constexpr(Src0ResetCoordinateAfterRun)
{ {
const auto src0_reset_step = move_tensor_coordinate(src0_desc, src0_coord_, GetCoordinateResetStep());
make_tensor_coordinate_step(src0_desc, GetCoordinateResetStep());
move_tensor_coordinate(src0_desc, src0_coord_, src0_reset_step);
} }
if constexpr(Src1ResetCoordinateAfterRun) if constexpr(Src1ResetCoordinateAfterRun)
{ {
const auto src1_reset_step = move_tensor_coordinate(src1_desc, src1_coord_, GetCoordinateResetStep());
make_tensor_coordinate_step(src1_desc, GetCoordinateResetStep());
move_tensor_coordinate(src1_desc, src1_coord_, src1_reset_step);
} }
if constexpr(DstResetCoordinateAfterRun) if constexpr(DstResetCoordinateAfterRun)
{ {
const auto dst_reset_step = move_tensor_coordinate(dst_desc, dst_coord_, GetCoordinateResetStep());
make_tensor_coordinate_step(dst_desc, GetCoordinateResetStep());
move_tensor_coordinate(dst_desc, dst_coord_, dst_reset_step);
} }
} }
...@@ -210,13 +198,10 @@ struct ThreadwiseTensorSliceTransfer_v6r2 ...@@ -210,13 +198,10 @@ struct ThreadwiseTensorSliceTransfer_v6r2
const Index& src0_slice_origin_step_idx) const Index& src0_slice_origin_step_idx)
{ {
// if src coord was not reset by RunRead(), then need to adjust the step here // if src coord was not reset by RunRead(), then need to adjust the step here
const auto adjusted_step_idx = Src0ResetCoordinateAfterRun const auto adjusted_step = Src0ResetCoordinateAfterRun
? src0_slice_origin_step_idx ? src0_slice_origin_step_idx
: src0_slice_origin_step_idx + GetCoordinateResetStep(); : src0_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(src0_desc, adjusted_step_idx);
move_tensor_coordinate(src0_desc, src0_coord_, adjusted_step); move_tensor_coordinate(src0_desc, src0_coord_, adjusted_step);
} }
...@@ -225,13 +210,10 @@ struct ThreadwiseTensorSliceTransfer_v6r2 ...@@ -225,13 +210,10 @@ struct ThreadwiseTensorSliceTransfer_v6r2
const Index& src1_slice_origin_step_idx) const Index& src1_slice_origin_step_idx)
{ {
// if src coord was not reset by RunRead(), then need to adjust the step here // if src coord was not reset by RunRead(), then need to adjust the step here
const auto adjusted_step_idx = Src1ResetCoordinateAfterRun const auto adjusted_step = Src1ResetCoordinateAfterRun
? src1_slice_origin_step_idx ? src1_slice_origin_step_idx
: src1_slice_origin_step_idx + GetCoordinateResetStep(); : src1_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(src1_desc, adjusted_step_idx);
move_tensor_coordinate(src1_desc, src1_coord_, adjusted_step); move_tensor_coordinate(src1_desc, src1_coord_, adjusted_step);
} }
...@@ -240,13 +222,10 @@ struct ThreadwiseTensorSliceTransfer_v6r2 ...@@ -240,13 +222,10 @@ struct ThreadwiseTensorSliceTransfer_v6r2
const Index& dst_slice_origin_step_idx) const Index& dst_slice_origin_step_idx)
{ {
// if dst coord was not reset by Run(), then need to adjust the step here // if dst coord was not reset by Run(), then need to adjust the step here
const auto adjusted_step_idx = DstResetCoordinateAfterRun const auto adjusted_step = DstResetCoordinateAfterRun
? dst_slice_origin_step_idx ? dst_slice_origin_step_idx
: dst_slice_origin_step_idx + GetCoordinateResetStep(); : dst_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(dst_desc, adjusted_step_idx);
move_tensor_coordinate(dst_desc, dst_coord_, adjusted_step); move_tensor_coordinate(dst_desc, dst_coord_, adjusted_step);
} }
......
...@@ -171,48 +171,32 @@ struct ThreadwiseTensorSliceTransfer_v6r3 ...@@ -171,48 +171,32 @@ struct ThreadwiseTensorSliceTransfer_v6r3
if constexpr(idx_1d.value != num_access - 1) if constexpr(idx_1d.value != num_access - 1)
{ {
constexpr auto forward_step = SpaceFillingCurve::GetForwardStep(idx_1d); constexpr auto forward_step = SpaceFillingCurve::GetForwardStep(idx_1d);
move_tensor_coordinate( move_tensor_coordinate(src0_desc, src0_coord_, forward_step);
src0_desc, src0_coord_, make_tensor_coordinate_step(src0_desc, forward_step)); move_tensor_coordinate(src1_desc, src1_coord_, forward_step);
move_tensor_coordinate( move_tensor_coordinate(src2_desc, src2_coord_, forward_step);
src1_desc, src1_coord_, make_tensor_coordinate_step(src1_desc, forward_step)); move_tensor_coordinate(dst_desc, dst_coord_, forward_step);
move_tensor_coordinate(
src2_desc, src2_coord_, make_tensor_coordinate_step(src2_desc, forward_step));
move_tensor_coordinate(
dst_desc, dst_coord_, make_tensor_coordinate_step(dst_desc, forward_step));
} }
}); });
// move coordinate back to slice origin (or not) // move coordinate back to slice origin (or not)
if constexpr(Src0ResetCoordinateAfterRun) if constexpr(Src0ResetCoordinateAfterRun)
{ {
const auto src0_reset_step = move_tensor_coordinate(src0_desc, src0_coord_, GetCoordinateResetStep());
make_tensor_coordinate_step(src0_desc, GetCoordinateResetStep());
move_tensor_coordinate(src0_desc, src0_coord_, src0_reset_step);
} }
if constexpr(Src1ResetCoordinateAfterRun) if constexpr(Src1ResetCoordinateAfterRun)
{ {
const auto src1_reset_step = move_tensor_coordinate(src1_desc, src1_coord_, GetCoordinateResetStep());
make_tensor_coordinate_step(src1_desc, GetCoordinateResetStep());
move_tensor_coordinate(src1_desc, src1_coord_, src1_reset_step);
} }
if constexpr(Src2ResetCoordinateAfterRun) if constexpr(Src2ResetCoordinateAfterRun)
{ {
const auto src2_reset_step = move_tensor_coordinate(src2_desc, src2_coord_, GetCoordinateResetStep());
make_tensor_coordinate_step(src2_desc, GetCoordinateResetStep());
move_tensor_coordinate(src2_desc, src2_coord_, src2_reset_step);
} }
if constexpr(DstResetCoordinateAfterRun) if constexpr(DstResetCoordinateAfterRun)
{ {
const auto dst_reset_step = move_tensor_coordinate(dst_desc, dst_coord_, GetCoordinateResetStep());
make_tensor_coordinate_step(dst_desc, GetCoordinateResetStep());
move_tensor_coordinate(dst_desc, dst_coord_, dst_reset_step);
} }
} }
...@@ -244,13 +228,10 @@ struct ThreadwiseTensorSliceTransfer_v6r3 ...@@ -244,13 +228,10 @@ struct ThreadwiseTensorSliceTransfer_v6r3
const Index& src0_slice_origin_step_idx) const Index& src0_slice_origin_step_idx)
{ {
// if src coord was not reset by RunRead(), then need to adjust the step here // if src coord was not reset by RunRead(), then need to adjust the step here
const auto adjusted_step_idx = Src0ResetCoordinateAfterRun const auto adjusted_step = Src0ResetCoordinateAfterRun
? src0_slice_origin_step_idx ? src0_slice_origin_step_idx
: src0_slice_origin_step_idx + GetCoordinateResetStep(); : src0_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(src0_desc, adjusted_step_idx);
move_tensor_coordinate(src0_desc, src0_coord_, adjusted_step); move_tensor_coordinate(src0_desc, src0_coord_, adjusted_step);
} }
...@@ -259,13 +240,10 @@ struct ThreadwiseTensorSliceTransfer_v6r3 ...@@ -259,13 +240,10 @@ struct ThreadwiseTensorSliceTransfer_v6r3
const Index& src1_slice_origin_step_idx) const Index& src1_slice_origin_step_idx)
{ {
// if src coord was not reset by RunRead(), then need to adjust the step here // if src coord was not reset by RunRead(), then need to adjust the step here
const auto adjusted_step_idx = Src1ResetCoordinateAfterRun const auto adjusted_step = Src1ResetCoordinateAfterRun
? src1_slice_origin_step_idx ? src1_slice_origin_step_idx
: src1_slice_origin_step_idx + GetCoordinateResetStep(); : src1_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(src1_desc, adjusted_step_idx);
move_tensor_coordinate(src1_desc, src1_coord_, adjusted_step); move_tensor_coordinate(src1_desc, src1_coord_, adjusted_step);
} }
...@@ -274,13 +252,10 @@ struct ThreadwiseTensorSliceTransfer_v6r3 ...@@ -274,13 +252,10 @@ struct ThreadwiseTensorSliceTransfer_v6r3
const Index& src2_slice_origin_step_idx) const Index& src2_slice_origin_step_idx)
{ {
// if src coord was not reset by RunRead(), then need to adjust the step here // if src coord was not reset by RunRead(), then need to adjust the step here
const auto adjusted_step_idx = Src2ResetCoordinateAfterRun const auto adjusted_step = Src2ResetCoordinateAfterRun
? src2_slice_origin_step_idx ? src2_slice_origin_step_idx
: src2_slice_origin_step_idx + GetCoordinateResetStep(); : src2_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(src2_desc, adjusted_step_idx);
move_tensor_coordinate(src2_desc, src2_coord_, adjusted_step); move_tensor_coordinate(src2_desc, src2_coord_, adjusted_step);
} }
...@@ -289,13 +264,10 @@ struct ThreadwiseTensorSliceTransfer_v6r3 ...@@ -289,13 +264,10 @@ struct ThreadwiseTensorSliceTransfer_v6r3
const Index& dst_slice_origin_step_idx) const Index& dst_slice_origin_step_idx)
{ {
// if dst coord was not reset by Run(), then need to adjust the step here // if dst coord was not reset by Run(), then need to adjust the step here
const auto adjusted_step_idx = DstResetCoordinateAfterRun const auto adjusted_step = DstResetCoordinateAfterRun
? dst_slice_origin_step_idx ? dst_slice_origin_step_idx
: dst_slice_origin_step_idx + GetCoordinateResetStep(); : dst_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(dst_desc, adjusted_step_idx);
move_tensor_coordinate(dst_desc, dst_coord_, adjusted_step); move_tensor_coordinate(dst_desc, dst_coord_, adjusted_step);
} }
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include "ck/utility/common_header.hpp" #include "ck/utility/common_header.hpp"
#include "ck/tensor_description/tensor_adaptor.hpp" #include "ck/tensor_description/tensor_adaptor.hpp"
#include "ck/tensor_description/tensor_adaptor_coordinate.hpp" #include "ck/tensor_description/tensor_adaptor_coordinate.hpp"
#include "ck/tensor_description/tensor_space_filling_curve.hpp"
#include "ck/tile_program/tile/tile_distribution.hpp" #include "ck/tile_program/tile/tile_distribution.hpp"
#include "ck/tile_program/tile/static_tile_distribution_helper.hpp" #include "ck/tile_program/tile/static_tile_distribution_helper.hpp"
......
...@@ -75,4 +75,20 @@ __host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& a, const T ...@@ -75,4 +75,20 @@ __host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& a, const T
return r; return r;
} }
// MultiIndex = index_t * MultiIndex
template <index_t NSize>
__host__ __device__ constexpr auto operator*(index_t a, const MultiIndex<NSize>& x)
{
MultiIndex<NSize> r;
static_for<0, NSize, 1>{}([&](auto i) { r(i) = a * x[i]; });
return r;
}
// MultiIndex = MultiIndex * index_t
template <index_t NSize>
__host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& x, index_t a)
{
return a * x;
}
} // namespace ck } // namespace ck
...@@ -239,7 +239,7 @@ struct BufferView<AddressSpaceEnum::Global, ...@@ -239,7 +239,7 @@ struct BufferView<AddressSpaceEnum::Global,
{ {
constexpr index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector; constexpr index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
amd_buffer_atomic_add<remove_cvref_t<T>, t_per_x, Coherence>( amd_buffer_atomic_add<remove_cvref_t<T>, t_per_x>(
x, p_data_, i, is_valid_element, buffer_size_); x, p_data_, i, is_valid_element, buffer_size_);
} }
else else
......
...@@ -471,7 +471,7 @@ __host__ __device__ constexpr auto sequence_to_tuple_of_number(Sequence<Is...>) ...@@ -471,7 +471,7 @@ __host__ __device__ constexpr auto sequence_to_tuple_of_number(Sequence<Is...>)
constexpr index_t tmp = Seq::At(i); constexpr index_t tmp = Seq::At(i);
return Number<tmp>{}; return Number<tmp>{};
}, },
Seq::Size()); Number<Seq::Size()>{});
} }
} // namespace ck } // namespace ck
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
namespace ck { namespace ck {
using int64_t = long;
using bhalf_t = ushort; using bhalf_t = ushort;
using half_t = _Float16; using half_t = _Float16;
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4 #ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
...@@ -122,6 +123,13 @@ struct scalar_type<bhalf_t> ...@@ -122,6 +123,13 @@ struct scalar_type<bhalf_t>
static constexpr index_t vector_size = 1; static constexpr index_t vector_size = 1;
}; };
template <>
struct scalar_type<int64_t>
{
using type = int64_t;
static constexpr index_t vector_size = 1;
};
template <> template <>
struct scalar_type<int32_t> struct scalar_type<int32_t>
{ {
...@@ -908,8 +916,6 @@ struct vector_type<T, 256> ...@@ -908,8 +916,6 @@ struct vector_type<T, 256>
} }
}; };
using int64_t = long;
// fp64 // fp64
using double2_t = typename vector_type<double, 2>::type; using double2_t = typename vector_type<double, 2>::type;
using double4_t = typename vector_type<double, 4>::type; using double4_t = typename vector_type<double, 4>::type;
......
...@@ -4,10 +4,11 @@ ...@@ -4,10 +4,11 @@
#pragma once #pragma once
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "integral_constant.hpp" #include "ck/utility/integral_constant.hpp"
#include "number.hpp" #include "ck/utility/number.hpp"
#include "type.hpp" #include "ck/utility/type.hpp"
#include "tuple.hpp" #include "ck/utility/tuple.hpp"
#include "ck/utility/bit_cast.hpp"
namespace ck { namespace ck {
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <type_traits> #include <type_traits>
#include "ck/utility/functional2.hpp" #include "ck/utility/functional2.hpp"
#include "ck/utility/remove_cvref.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp" #include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"
using namespace ck; using namespace ck;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment