Unverified Commit 0a7174ad authored by Chao Liu's avatar Chao Liu Committed by GitHub
Browse files

Merge with (not the latest) upstream CK (#32)

* fix build for old ck examples

* fix build for old ck
parent 496be40e
......@@ -94,7 +94,7 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2
using dst_vector_t = typename dst_vector_type::type;
const bool is_src_valid =
coordinate_has_valid_offset_assuming_visible_index_is_valid(src_desc, src_coord_);
coordinate_has_valid_offset_assuming_top_index_is_valid(src_desc, src_coord_);
// copy data from src_buf into src_vector_container
auto src_vector_container = src_vector_type{
......@@ -114,7 +114,7 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2
});
const bool is_dst_valid =
coordinate_has_valid_offset_assuming_visible_index_is_valid(dst_desc, dst_coord_);
coordinate_has_valid_offset_assuming_top_index_is_valid(dst_desc, dst_coord_);
// copy data from dst_vector into dst_buf
dst_buf.template Update<DstInMemOp, dst_vector_t>(
......@@ -126,28 +126,20 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2
if constexpr(idx_1d.value != num_access - 1)
{
constexpr auto forward_step = SpaceFillingCurve::GetForwardStep(idx_1d);
move_tensor_coordinate(
src_desc, src_coord_, make_tensor_coordinate_step(src_desc, forward_step));
move_tensor_coordinate(
dst_desc, dst_coord_, make_tensor_coordinate_step(dst_desc, forward_step));
move_tensor_coordinate(src_desc, src_coord_, forward_step);
move_tensor_coordinate(dst_desc, dst_coord_, forward_step);
}
});
// move coordinate back to slice origin (or not)
if constexpr(SrcResetCoordinateAfterRun)
{
const auto src_reset_step =
make_tensor_coordinate_step(src_desc, GetCoordinateResetStep());
move_tensor_coordinate(src_desc, src_coord_, src_reset_step);
move_tensor_coordinate(src_desc, src_coord_, GetCoordinateResetStep());
}
if constexpr(DstResetCoordinateAfterRun)
{
const auto dst_reset_step =
make_tensor_coordinate_step(dst_desc, GetCoordinateResetStep());
move_tensor_coordinate(dst_desc, dst_coord_, dst_reset_step);
move_tensor_coordinate(dst_desc, dst_coord_, GetCoordinateResetStep());
}
}
......@@ -179,12 +171,9 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2
const Index& src_slice_origin_step_idx)
{
// if src coord was not reset by RunRead(), then need to adjust the step here
const auto adjusted_step_idx = SrcResetCoordinateAfterRun
? src_slice_origin_step_idx
: src_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(src_desc, adjusted_step_idx);
const auto adjusted_step = SrcResetCoordinateAfterRun
? src_slice_origin_step_idx
: src_slice_origin_step_idx + GetCoordinateResetStep();
move_tensor_coordinate(src_desc, src_coord_, adjusted_step);
}
......@@ -194,12 +183,9 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2
const Index& dst_slice_origin_step_idx)
{
// if dst coord was not reset by Run(), then need to adjust the step here
const auto adjusted_step_idx = DstResetCoordinateAfterRun
? dst_slice_origin_step_idx
: dst_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(dst_desc, adjusted_step_idx);
const auto adjusted_step = DstResetCoordinateAfterRun
? dst_slice_origin_step_idx
: dst_slice_origin_step_idx + GetCoordinateResetStep();
move_tensor_coordinate(dst_desc, dst_coord_, adjusted_step);
}
......
......@@ -147,38 +147,26 @@ struct ThreadwiseTensorSliceTransfer_v6r2
if constexpr(idx_1d.value != num_access - 1)
{
constexpr auto forward_step = SpaceFillingCurve::GetForwardStep(idx_1d);
move_tensor_coordinate(
src0_desc, src0_coord_, make_tensor_coordinate_step(src0_desc, forward_step));
move_tensor_coordinate(
src1_desc, src1_coord_, make_tensor_coordinate_step(src1_desc, forward_step));
move_tensor_coordinate(
dst_desc, dst_coord_, make_tensor_coordinate_step(dst_desc, forward_step));
move_tensor_coordinate(src0_desc, src0_coord_, forward_step);
move_tensor_coordinate(src1_desc, src1_coord_, forward_step);
move_tensor_coordinate(dst_desc, dst_coord_, forward_step);
}
});
// move coordinate back to slice origin (or not)
if constexpr(Src0ResetCoordinateAfterRun)
{
const auto src0_reset_step =
make_tensor_coordinate_step(src0_desc, GetCoordinateResetStep());
move_tensor_coordinate(src0_desc, src0_coord_, src0_reset_step);
move_tensor_coordinate(src0_desc, src0_coord_, GetCoordinateResetStep());
}
if constexpr(Src1ResetCoordinateAfterRun)
{
const auto src1_reset_step =
make_tensor_coordinate_step(src1_desc, GetCoordinateResetStep());
move_tensor_coordinate(src1_desc, src1_coord_, src1_reset_step);
move_tensor_coordinate(src1_desc, src1_coord_, GetCoordinateResetStep());
}
if constexpr(DstResetCoordinateAfterRun)
{
const auto dst_reset_step =
make_tensor_coordinate_step(dst_desc, GetCoordinateResetStep());
move_tensor_coordinate(dst_desc, dst_coord_, dst_reset_step);
move_tensor_coordinate(dst_desc, dst_coord_, GetCoordinateResetStep());
}
}
......@@ -210,12 +198,9 @@ struct ThreadwiseTensorSliceTransfer_v6r2
const Index& src0_slice_origin_step_idx)
{
// if src coord was not reset by RunRead(), then need to adjust the step here
const auto adjusted_step_idx = Src0ResetCoordinateAfterRun
? src0_slice_origin_step_idx
: src0_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(src0_desc, adjusted_step_idx);
const auto adjusted_step = Src0ResetCoordinateAfterRun
? src0_slice_origin_step_idx
: src0_slice_origin_step_idx + GetCoordinateResetStep();
move_tensor_coordinate(src0_desc, src0_coord_, adjusted_step);
}
......@@ -225,12 +210,9 @@ struct ThreadwiseTensorSliceTransfer_v6r2
const Index& src1_slice_origin_step_idx)
{
// if src coord was not reset by RunRead(), then need to adjust the step here
const auto adjusted_step_idx = Src1ResetCoordinateAfterRun
? src1_slice_origin_step_idx
: src1_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(src1_desc, adjusted_step_idx);
const auto adjusted_step = Src1ResetCoordinateAfterRun
? src1_slice_origin_step_idx
: src1_slice_origin_step_idx + GetCoordinateResetStep();
move_tensor_coordinate(src1_desc, src1_coord_, adjusted_step);
}
......@@ -240,12 +222,9 @@ struct ThreadwiseTensorSliceTransfer_v6r2
const Index& dst_slice_origin_step_idx)
{
// if dst coord was not reset by Run(), then need to adjust the step here
const auto adjusted_step_idx = DstResetCoordinateAfterRun
? dst_slice_origin_step_idx
: dst_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(dst_desc, adjusted_step_idx);
const auto adjusted_step = DstResetCoordinateAfterRun
? dst_slice_origin_step_idx
: dst_slice_origin_step_idx + GetCoordinateResetStep();
move_tensor_coordinate(dst_desc, dst_coord_, adjusted_step);
}
......
......@@ -171,48 +171,32 @@ struct ThreadwiseTensorSliceTransfer_v6r3
if constexpr(idx_1d.value != num_access - 1)
{
constexpr auto forward_step = SpaceFillingCurve::GetForwardStep(idx_1d);
move_tensor_coordinate(
src0_desc, src0_coord_, make_tensor_coordinate_step(src0_desc, forward_step));
move_tensor_coordinate(
src1_desc, src1_coord_, make_tensor_coordinate_step(src1_desc, forward_step));
move_tensor_coordinate(
src2_desc, src2_coord_, make_tensor_coordinate_step(src2_desc, forward_step));
move_tensor_coordinate(
dst_desc, dst_coord_, make_tensor_coordinate_step(dst_desc, forward_step));
move_tensor_coordinate(src0_desc, src0_coord_, forward_step);
move_tensor_coordinate(src1_desc, src1_coord_, forward_step);
move_tensor_coordinate(src2_desc, src2_coord_, forward_step);
move_tensor_coordinate(dst_desc, dst_coord_, forward_step);
}
});
// move coordinate back to slice origin (or not)
if constexpr(Src0ResetCoordinateAfterRun)
{
const auto src0_reset_step =
make_tensor_coordinate_step(src0_desc, GetCoordinateResetStep());
move_tensor_coordinate(src0_desc, src0_coord_, src0_reset_step);
move_tensor_coordinate(src0_desc, src0_coord_, GetCoordinateResetStep());
}
if constexpr(Src1ResetCoordinateAfterRun)
{
const auto src1_reset_step =
make_tensor_coordinate_step(src1_desc, GetCoordinateResetStep());
move_tensor_coordinate(src1_desc, src1_coord_, src1_reset_step);
move_tensor_coordinate(src1_desc, src1_coord_, GetCoordinateResetStep());
}
if constexpr(Src2ResetCoordinateAfterRun)
{
const auto src2_reset_step =
make_tensor_coordinate_step(src2_desc, GetCoordinateResetStep());
move_tensor_coordinate(src2_desc, src2_coord_, src2_reset_step);
move_tensor_coordinate(src2_desc, src2_coord_, GetCoordinateResetStep());
}
if constexpr(DstResetCoordinateAfterRun)
{
const auto dst_reset_step =
make_tensor_coordinate_step(dst_desc, GetCoordinateResetStep());
move_tensor_coordinate(dst_desc, dst_coord_, dst_reset_step);
move_tensor_coordinate(dst_desc, dst_coord_, GetCoordinateResetStep());
}
}
......@@ -244,12 +228,9 @@ struct ThreadwiseTensorSliceTransfer_v6r3
const Index& src0_slice_origin_step_idx)
{
// if src coord was not reset by RunRead(), then need to adjust the step here
const auto adjusted_step_idx = Src0ResetCoordinateAfterRun
? src0_slice_origin_step_idx
: src0_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(src0_desc, adjusted_step_idx);
const auto adjusted_step = Src0ResetCoordinateAfterRun
? src0_slice_origin_step_idx
: src0_slice_origin_step_idx + GetCoordinateResetStep();
move_tensor_coordinate(src0_desc, src0_coord_, adjusted_step);
}
......@@ -259,12 +240,9 @@ struct ThreadwiseTensorSliceTransfer_v6r3
const Index& src1_slice_origin_step_idx)
{
// if src coord was not reset by RunRead(), then need to adjust the step here
const auto adjusted_step_idx = Src1ResetCoordinateAfterRun
? src1_slice_origin_step_idx
: src1_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(src1_desc, adjusted_step_idx);
const auto adjusted_step = Src1ResetCoordinateAfterRun
? src1_slice_origin_step_idx
: src1_slice_origin_step_idx + GetCoordinateResetStep();
move_tensor_coordinate(src1_desc, src1_coord_, adjusted_step);
}
......@@ -274,12 +252,9 @@ struct ThreadwiseTensorSliceTransfer_v6r3
const Index& src2_slice_origin_step_idx)
{
// if src coord was not reset by RunRead(), then need to adjust the step here
const auto adjusted_step_idx = Src2ResetCoordinateAfterRun
? src2_slice_origin_step_idx
: src2_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(src2_desc, adjusted_step_idx);
const auto adjusted_step = Src2ResetCoordinateAfterRun
? src2_slice_origin_step_idx
: src2_slice_origin_step_idx + GetCoordinateResetStep();
move_tensor_coordinate(src2_desc, src2_coord_, adjusted_step);
}
......@@ -289,12 +264,9 @@ struct ThreadwiseTensorSliceTransfer_v6r3
const Index& dst_slice_origin_step_idx)
{
// if dst coord was not reset by Run(), then need to adjust the step here
const auto adjusted_step_idx = DstResetCoordinateAfterRun
? dst_slice_origin_step_idx
: dst_slice_origin_step_idx + GetCoordinateResetStep();
// is it OK to construct a new step every time?
const auto adjusted_step = make_tensor_coordinate_step(dst_desc, adjusted_step_idx);
const auto adjusted_step = DstResetCoordinateAfterRun
? dst_slice_origin_step_idx
: dst_slice_origin_step_idx + GetCoordinateResetStep();
move_tensor_coordinate(dst_desc, dst_coord_, adjusted_step);
}
......
......@@ -6,6 +6,7 @@
#include "ck/utility/common_header.hpp"
#include "ck/tensor_description/tensor_adaptor.hpp"
#include "ck/tensor_description/tensor_adaptor_coordinate.hpp"
#include "ck/tensor_description/tensor_space_filling_curve.hpp"
#include "ck/tile_program/tile/tile_distribution.hpp"
#include "ck/tile_program/tile/static_tile_distribution_helper.hpp"
......
......@@ -75,4 +75,20 @@ __host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& a, const T
return r;
}
// MultiIndex = index_t * MultiIndex
template <index_t NSize>
__host__ __device__ constexpr auto operator*(index_t a, const MultiIndex<NSize>& x)
{
MultiIndex<NSize> r;
static_for<0, NSize, 1>{}([&](auto i) { r(i) = a * x[i]; });
return r;
}
// MultiIndex = MultiIndex * index_t
template <index_t NSize>
__host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& x, index_t a)
{
return a * x;
}
} // namespace ck
......@@ -239,7 +239,7 @@ struct BufferView<AddressSpaceEnum::Global,
{
constexpr index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
amd_buffer_atomic_add<remove_cvref_t<T>, t_per_x, Coherence>(
amd_buffer_atomic_add<remove_cvref_t<T>, t_per_x>(
x, p_data_, i, is_valid_element, buffer_size_);
}
else
......
......@@ -471,7 +471,7 @@ __host__ __device__ constexpr auto sequence_to_tuple_of_number(Sequence<Is...>)
constexpr index_t tmp = Seq::At(i);
return Number<tmp>{};
},
Seq::Size());
Number<Seq::Size()>{});
}
} // namespace ck
......@@ -9,6 +9,7 @@
namespace ck {
using int64_t = long;
using bhalf_t = ushort;
using half_t = _Float16;
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
......@@ -122,6 +123,13 @@ struct scalar_type<bhalf_t>
static constexpr index_t vector_size = 1;
};
template <>
struct scalar_type<int64_t>
{
using type = int64_t;
static constexpr index_t vector_size = 1;
};
template <>
struct scalar_type<int32_t>
{
......@@ -908,8 +916,6 @@ struct vector_type<T, 256>
}
};
using int64_t = long;
// fp64
using double2_t = typename vector_type<double, 2>::type;
using double4_t = typename vector_type<double, 4>::type;
......
......@@ -4,10 +4,11 @@
#pragma once
#include "ck/ck.hpp"
#include "integral_constant.hpp"
#include "number.hpp"
#include "type.hpp"
#include "tuple.hpp"
#include "ck/utility/integral_constant.hpp"
#include "ck/utility/number.hpp"
#include "ck/utility/type.hpp"
#include "ck/utility/tuple.hpp"
#include "ck/utility/bit_cast.hpp"
namespace ck {
......
......@@ -7,6 +7,7 @@
#include <type_traits>
#include "ck/utility/functional2.hpp"
#include "ck/utility/remove_cvref.hpp"
namespace ck {
namespace tensor_operation {
......
......@@ -6,6 +6,7 @@
#include <gtest/gtest.h>
#include "ck/ck.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"
using namespace ck;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment