Unverified Commit 6bc9ee05 authored by Chao Liu's avatar Chao Liu Committed by GitHub
Browse files

Remove program server (#10)

* removing program server

* specify launch bound per kernel instance
parent f3baea0d
......@@ -24,8 +24,7 @@ struct GridGemm
using BlockGemmPipeline = typename Policy::template BlockGemmPipeline<Problem>;
template <typename AGridTensorView, typename BGridTensorView, typename CGridTensorView>
__host__ __device__ void operator()(ProgramServer& ps,
const AGridTensorView& a_grid,
__device__ void operator()(const AGridTensorView& a_grid,
const BGridTensorView& b_grid,
CGridTensorView& c_grid,
const AElementFunction& a_element_func,
......@@ -41,17 +40,17 @@ struct GridGemm
const auto K = a_grid.desc_.GetLength(Number<1>{});
// divide problem
const auto id_block = ps.get_block_id();
const auto id_block = get_block_id();
const auto num_tile_m = M / kMPerBlock;
const auto num_tile_n = N / kNPerBlock;
const auto block2tile = ps(Policy::MakeBlock2TileMap(num_tile_m, num_tile_n));
const auto block2tile = Policy::MakeBlock2TileMap(num_tile_m, num_tile_n);
const auto id_tile = block2tile(id_block);
const auto iM = ps.read_first_lane(id_tile.template At<0>() * kMPerBlock);
const auto iN = ps.read_first_lane(id_tile.template At<1>() * kNPerBlock);
const auto iM = __builtin_amdgcn_readfirstlane(id_tile.template At<0>() * kMPerBlock);
const auto iN = __builtin_amdgcn_readfirstlane(id_tile.template At<1>() * kNPerBlock);
// A block window
auto a_block_window = make_tile_window(
......
......@@ -14,7 +14,7 @@ namespace tile_program {
template <typename TileDistributedSpan_, // TileDistributedSpan<...>
typename F // signature: F(TileDistributedIndex<...>)
>
__host__ __device__ void sweep_tile_span(TileDistributedSpan_, const F& f)
__device__ void sweep_tile_span(TileDistributedSpan_, const F& f)
{
using DstrSpan = remove_cvref_t<TileDistributedSpan_>;
......
......@@ -160,25 +160,6 @@ __device__ auto load_sliced_thread_data_from_tile_window(
} // namespace detail
// FIXME: host dummy function for tile program
template <typename BottomTensorView_, typename WindowLengths_, typename TileDistribution_>
__host__ auto load_tile(
const TileWindowWithStaticDistribution<BottomTensorView_, WindowLengths_, TileDistribution_>&
tile_window)
{
using DataType = remove_cvref_t<typename BottomTensorView_::DataType>;
using BottomTensorView = remove_cvref_t<BottomTensorView_>;
using WindowLengths = remove_cvref_t<WindowLengths_>;
using TileDstr = remove_cvref_t<TileDistribution_>;
using TileWindow = TileWindowWithStaticDistribution<BottomTensorView, WindowLengths, TileDstr>;
static_assert(is_known_at_compile_time<WindowLengths>::value,
"wrong! lengths should be static");
static_assert(TileWindow::HasStaticTileDistribution(), "wrong!");
return make_static_distributed_tensor<DataType>(tile_window.GetTileDistribution());
}
template <typename BottomTensorView_, typename WindowLengths_, typename TileDistribution_>
__device__ auto
load_tile(TileWindowWithStaticDistribution<BottomTensorView_, WindowLengths_, TileDistribution_>&
......
......@@ -14,17 +14,6 @@
namespace ck {
namespace tile_program {
// FIXME: host dummy function for tile program
template <typename BottomTensorView_,
typename WindowLengths_,
typename TileDistribution_,
typename DataType_>
__host__ void
store_tile(TileWindowWithStaticDistribution<BottomTensorView_, WindowLengths_, TileDistribution_>&,
const StaticDistributedTensor<DataType_, TileDistribution_>&)
{
}
template <typename BottomTensorView_,
typename WindowLengths_,
typename TileDistribution_,
......
......@@ -15,16 +15,6 @@
namespace ck {
namespace tile_program {
// FIXME: host dummy function for tile program
template <typename BottomTensorView_,
typename WindowLengths_,
typename TileDistribution_,
typename DataType_>
__host__ void store_tile(TileWindowWithStaticLengths<BottomTensorView_, WindowLengths_>&,
const StaticDistributedTensor<DataType_, TileDistribution_>&)
{
}
template <typename BottomTensorView_,
typename WindowLengths_,
typename TileDistribution_,
......
......@@ -16,7 +16,7 @@ namespace tile_program {
// TODO: support tensors with different distribution
template <typename InOutElementFunc, typename... InOutDstrTensors>
__host__ __device__ void tile_elementwise_inout(const InOutElementFunc& inout_element_func,
__device__ void tile_elementwise_inout(const InOutElementFunc& inout_element_func,
InOutDstrTensors&... inout_dstr_tensors)
{
// TODO: make sure all distributed tensors have same lengths and distribution
......@@ -30,7 +30,7 @@ __host__ __device__ void tile_elementwise_inout(const InOutElementFunc& inout_el
}
template <typename InElementFunc, typename... InDstrTensors>
__host__ __device__ auto tile_elementwise_in(const InElementFunc& in_element_func,
__device__ auto tile_elementwise_in(const InElementFunc& in_element_func,
const InDstrTensors&... in_dstr_tensors)
{
using OutDataType = decltype(in_element_func(typename InDstrTensors::DataType{}...));
......
......@@ -45,20 +45,7 @@ struct TileWindowWithStaticDistribution
using BottomTensorCoord =
decltype(make_tensor_coordinate(BottomTensorDesc{}, BottomTensorIndex{}));
__host__ __device__ constexpr TileWindowWithStaticDistribution() = default;
// FIXME: host dummy constructor for tile program
__host__ constexpr TileWindowWithStaticDistribution(const BottomTensorView& bottom_tensor_view,
const WindowLengths&,
const BottomTensorIndex&,
const TileDstr&)
: bottom_tensor_view_{bottom_tensor_view},
window_lengths_{},
bottom_tensor_thread_coord_{},
tile_dstr_{},
window_adaptor_thread_coord_{}
{
}
__device__ constexpr TileWindowWithStaticDistribution() = default;
__device__ constexpr TileWindowWithStaticDistribution(
const BottomTensorView& bottom_tensor_view,
......@@ -86,22 +73,19 @@ struct TileWindowWithStaticDistribution
bottom_tensor_view_.GetTensorDescriptor(), bottom_tensor_thread_origin_idx);
}
__host__ __device__ static constexpr index_t GetNumOfDimension() { return NDimBottomTensor; }
__device__ static constexpr index_t GetNumOfDimension() { return NDimBottomTensor; }
__host__ __device__ static constexpr bool HasStaticTileDistribution()
{
return TileDstr::IsStatic();
}
__device__ static constexpr bool HasStaticTileDistribution() { return TileDstr::IsStatic(); }
__host__ __device__ constexpr auto GetWindowLengths() const { return window_lengths_; }
__device__ constexpr auto GetWindowLengths() const { return window_lengths_; }
__host__ __device__ constexpr auto GetTileDistribution() const { return tile_dstr_; }
__device__ constexpr auto GetTileDistribution() const { return tile_dstr_; }
__host__ __device__ constexpr auto GetBottomTensorView() const { return bottom_tensor_view_; }
__device__ constexpr auto GetBottomTensorView() const { return bottom_tensor_view_; }
__host__ __device__ constexpr auto GetWindowOrigin() const { return window_origin_; }
__device__ constexpr auto GetWindowOrigin() const { return window_origin_; }
__host__ __device__ constexpr auto GetBottomTensorThreadCoordinate() const
__device__ constexpr auto GetBottomTensorThreadCoordinate() const
{
return bottom_tensor_thread_coord_;
}
......@@ -141,7 +125,7 @@ struct TileWindowWithStaticDistribution
}
// return vector dimension among [y0, y1, ...]
__host__ __device__ static constexpr auto GetWindowAdaptorYsSafeVectorLengthStrides()
__device__ static constexpr auto GetWindowAdaptorYsSafeVectorLengthStrides()
{
// bottom tensor top dimension vector lengths and strides
const auto [bottom_tensor_top_dim_vector_lengths, bottom_tensor_top_dim_vector_strides] =
......@@ -201,7 +185,7 @@ struct TileWindowWithStaticDistribution
// TODO: use strategy
template <typename TensorView_, typename WindowLengths_, typename StaticTileDistribution_>
__host__ __device__ constexpr auto
__device__ constexpr auto
make_tile_window(const TensorView_& tensor_view,
const WindowLengths_& window_lengths,
const MultiIndex<TensorView_::GetNumOfDimension()>& origin,
......@@ -213,16 +197,6 @@ make_tile_window(const TensorView_& tensor_view,
tensor_view, window_lengths, origin, tile_distribution};
}
// FIXME: dummy host function for tile program
template <typename TensorView_, typename WindowLengths_, typename StaticTileDistribution_>
__host__ void move_tile_window(
TileWindowWithStaticDistribution<TensorView_, WindowLengths_, StaticTileDistribution_>&,
const MultiIndex<
TileWindowWithStaticDistribution<TensorView_, WindowLengths_, StaticTileDistribution_>::
GetNumOfDimension()>&)
{
}
template <typename TensorView_, typename WindowLengths_, typename StaticTileDistribution_>
__device__ void move_tile_window(
TileWindowWithStaticDistribution<TensorView_, WindowLengths_, StaticTileDistribution_>& window,
......
......@@ -27,15 +27,7 @@ struct TileWindowWithStaticLengths
using BottomTensorIndex = Array<index_t, NDimBottomTensor>;
__host__ __device__ constexpr TileWindowWithStaticLengths() = default;
// FIXME: host dummy constructor for tile program
__host__ constexpr TileWindowWithStaticLengths(const BottomTensorView& bottom_tensor_view,
const WindowLengths&,
const BottomTensorIndex&)
: bottom_tensor_view_{bottom_tensor_view}, window_lengths_{}, window_origin_{}
{
}
__device__ constexpr TileWindowWithStaticLengths() = default;
__device__ constexpr TileWindowWithStaticLengths(const BottomTensorView& bottom_tensor_view,
const WindowLengths& window_lengths,
......@@ -46,13 +38,13 @@ struct TileWindowWithStaticLengths
{
}
__host__ __device__ static constexpr index_t GetNumOfDimension() { return NDimBottomTensor; }
__device__ static constexpr index_t GetNumOfDimension() { return NDimBottomTensor; }
__host__ __device__ constexpr auto GetWindowLengths() const { return window_lengths_; }
__device__ constexpr auto GetWindowLengths() const { return window_lengths_; }
__host__ __device__ constexpr auto GetBottomTensorView() const { return bottom_tensor_view_; }
__device__ constexpr auto GetBottomTensorView() const { return bottom_tensor_view_; }
__host__ __device__ constexpr auto GetWindowOrigin() const { return window_origin_; }
__device__ constexpr auto GetWindowOrigin() const { return window_origin_; }
// this is the bottom tensor view
// [x0', x1', ...] ==> [offset]
......@@ -66,7 +58,7 @@ struct TileWindowWithStaticLengths
};
template <typename TensorView_, typename WindowLengths_>
__host__ __device__ constexpr auto
__device__ constexpr auto
make_tile_window(const TensorView_& tensor_view,
const WindowLengths_& window_lengths,
const MultiIndex<TensorView_::GetNumOfDimension()>& origin)
......@@ -78,15 +70,6 @@ make_tile_window(const TensorView_& tensor_view,
tensor_view, window_lengths, origin};
}
// FIXME: dummy host function for tile program
template <typename TensorView_, typename WindowLengths_>
__host__ void move_tile_window(
TileWindowWithStaticLengths<TensorView_, WindowLengths_>&,
const MultiIndex<
TileWindowWithStaticLengths<TensorView_, WindowLengths_>::GetNumOfDimension()>&)
{
}
template <typename TensorView_, typename WindowLengths_>
__device__ void move_tile_window(
TileWindowWithStaticLengths<TensorView_, WindowLengths_>& window,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment