Unverified Commit 6bc9ee05 authored by Chao Liu's avatar Chao Liu Committed by GitHub
Browse files

Remove program server (#10)

* removing program server

* specify launch bound per kernel instance
parent f3baea0d
...@@ -24,8 +24,7 @@ struct GridGemm ...@@ -24,8 +24,7 @@ struct GridGemm
using BlockGemmPipeline = typename Policy::template BlockGemmPipeline<Problem>; using BlockGemmPipeline = typename Policy::template BlockGemmPipeline<Problem>;
template <typename AGridTensorView, typename BGridTensorView, typename CGridTensorView> template <typename AGridTensorView, typename BGridTensorView, typename CGridTensorView>
__host__ __device__ void operator()(ProgramServer& ps, __device__ void operator()(const AGridTensorView& a_grid,
const AGridTensorView& a_grid,
const BGridTensorView& b_grid, const BGridTensorView& b_grid,
CGridTensorView& c_grid, CGridTensorView& c_grid,
const AElementFunction& a_element_func, const AElementFunction& a_element_func,
...@@ -41,17 +40,17 @@ struct GridGemm ...@@ -41,17 +40,17 @@ struct GridGemm
const auto K = a_grid.desc_.GetLength(Number<1>{}); const auto K = a_grid.desc_.GetLength(Number<1>{});
// divide problem // divide problem
const auto id_block = ps.get_block_id(); const auto id_block = get_block_id();
const auto num_tile_m = M / kMPerBlock; const auto num_tile_m = M / kMPerBlock;
const auto num_tile_n = N / kNPerBlock; const auto num_tile_n = N / kNPerBlock;
const auto block2tile = ps(Policy::MakeBlock2TileMap(num_tile_m, num_tile_n)); const auto block2tile = Policy::MakeBlock2TileMap(num_tile_m, num_tile_n);
const auto id_tile = block2tile(id_block); const auto id_tile = block2tile(id_block);
const auto iM = ps.read_first_lane(id_tile.template At<0>() * kMPerBlock); const auto iM = __builtin_amdgcn_readfirstlane(id_tile.template At<0>() * kMPerBlock);
const auto iN = ps.read_first_lane(id_tile.template At<1>() * kNPerBlock); const auto iN = __builtin_amdgcn_readfirstlane(id_tile.template At<1>() * kNPerBlock);
// A block window // A block window
auto a_block_window = make_tile_window( auto a_block_window = make_tile_window(
......
...@@ -14,7 +14,7 @@ namespace tile_program { ...@@ -14,7 +14,7 @@ namespace tile_program {
template <typename TileDistributedSpan_, // TileDistributedSpan<...> template <typename TileDistributedSpan_, // TileDistributedSpan<...>
typename F // signature: F(TileDistributedIndex<...>) typename F // signature: F(TileDistributedIndex<...>)
> >
__host__ __device__ void sweep_tile_span(TileDistributedSpan_, const F& f) __device__ void sweep_tile_span(TileDistributedSpan_, const F& f)
{ {
using DstrSpan = remove_cvref_t<TileDistributedSpan_>; using DstrSpan = remove_cvref_t<TileDistributedSpan_>;
......
...@@ -160,25 +160,6 @@ __device__ auto load_sliced_thread_data_from_tile_window( ...@@ -160,25 +160,6 @@ __device__ auto load_sliced_thread_data_from_tile_window(
} // namespace detail } // namespace detail
// FIXME: host dummy function for tile program
template <typename BottomTensorView_, typename WindowLengths_, typename TileDistribution_>
__host__ auto load_tile(
const TileWindowWithStaticDistribution<BottomTensorView_, WindowLengths_, TileDistribution_>&
tile_window)
{
using DataType = remove_cvref_t<typename BottomTensorView_::DataType>;
using BottomTensorView = remove_cvref_t<BottomTensorView_>;
using WindowLengths = remove_cvref_t<WindowLengths_>;
using TileDstr = remove_cvref_t<TileDistribution_>;
using TileWindow = TileWindowWithStaticDistribution<BottomTensorView, WindowLengths, TileDstr>;
static_assert(is_known_at_compile_time<WindowLengths>::value,
"wrong! lengths should be static");
static_assert(TileWindow::HasStaticTileDistribution(), "wrong!");
return make_static_distributed_tensor<DataType>(tile_window.GetTileDistribution());
}
template <typename BottomTensorView_, typename WindowLengths_, typename TileDistribution_> template <typename BottomTensorView_, typename WindowLengths_, typename TileDistribution_>
__device__ auto __device__ auto
load_tile(TileWindowWithStaticDistribution<BottomTensorView_, WindowLengths_, TileDistribution_>& load_tile(TileWindowWithStaticDistribution<BottomTensorView_, WindowLengths_, TileDistribution_>&
......
...@@ -14,17 +14,6 @@ ...@@ -14,17 +14,6 @@
namespace ck { namespace ck {
namespace tile_program { namespace tile_program {
// FIXME: host dummy function for tile program
template <typename BottomTensorView_,
typename WindowLengths_,
typename TileDistribution_,
typename DataType_>
__host__ void
store_tile(TileWindowWithStaticDistribution<BottomTensorView_, WindowLengths_, TileDistribution_>&,
const StaticDistributedTensor<DataType_, TileDistribution_>&)
{
}
template <typename BottomTensorView_, template <typename BottomTensorView_,
typename WindowLengths_, typename WindowLengths_,
typename TileDistribution_, typename TileDistribution_,
......
...@@ -15,16 +15,6 @@ ...@@ -15,16 +15,6 @@
namespace ck { namespace ck {
namespace tile_program { namespace tile_program {
// FIXME: host dummy function for tile program
template <typename BottomTensorView_,
typename WindowLengths_,
typename TileDistribution_,
typename DataType_>
__host__ void store_tile(TileWindowWithStaticLengths<BottomTensorView_, WindowLengths_>&,
const StaticDistributedTensor<DataType_, TileDistribution_>&)
{
}
template <typename BottomTensorView_, template <typename BottomTensorView_,
typename WindowLengths_, typename WindowLengths_,
typename TileDistribution_, typename TileDistribution_,
......
...@@ -16,7 +16,7 @@ namespace tile_program { ...@@ -16,7 +16,7 @@ namespace tile_program {
// TODO: support tensors with different distribution // TODO: support tensors with different distribution
template <typename InOutElementFunc, typename... InOutDstrTensors> template <typename InOutElementFunc, typename... InOutDstrTensors>
__host__ __device__ void tile_elementwise_inout(const InOutElementFunc& inout_element_func, __device__ void tile_elementwise_inout(const InOutElementFunc& inout_element_func,
InOutDstrTensors&... inout_dstr_tensors) InOutDstrTensors&... inout_dstr_tensors)
{ {
// TODO: make sure all distributed tensors have same lengths and distribution // TODO: make sure all distributed tensors have same lengths and distribution
...@@ -30,7 +30,7 @@ __host__ __device__ void tile_elementwise_inout(const InOutElementFunc& inout_el ...@@ -30,7 +30,7 @@ __host__ __device__ void tile_elementwise_inout(const InOutElementFunc& inout_el
} }
template <typename InElementFunc, typename... InDstrTensors> template <typename InElementFunc, typename... InDstrTensors>
__host__ __device__ auto tile_elementwise_in(const InElementFunc& in_element_func, __device__ auto tile_elementwise_in(const InElementFunc& in_element_func,
const InDstrTensors&... in_dstr_tensors) const InDstrTensors&... in_dstr_tensors)
{ {
using OutDataType = decltype(in_element_func(typename InDstrTensors::DataType{}...)); using OutDataType = decltype(in_element_func(typename InDstrTensors::DataType{}...));
......
...@@ -45,20 +45,7 @@ struct TileWindowWithStaticDistribution ...@@ -45,20 +45,7 @@ struct TileWindowWithStaticDistribution
using BottomTensorCoord = using BottomTensorCoord =
decltype(make_tensor_coordinate(BottomTensorDesc{}, BottomTensorIndex{})); decltype(make_tensor_coordinate(BottomTensorDesc{}, BottomTensorIndex{}));
__host__ __device__ constexpr TileWindowWithStaticDistribution() = default; __device__ constexpr TileWindowWithStaticDistribution() = default;
// FIXME: host dummy constructor for tile program
__host__ constexpr TileWindowWithStaticDistribution(const BottomTensorView& bottom_tensor_view,
const WindowLengths&,
const BottomTensorIndex&,
const TileDstr&)
: bottom_tensor_view_{bottom_tensor_view},
window_lengths_{},
bottom_tensor_thread_coord_{},
tile_dstr_{},
window_adaptor_thread_coord_{}
{
}
__device__ constexpr TileWindowWithStaticDistribution( __device__ constexpr TileWindowWithStaticDistribution(
const BottomTensorView& bottom_tensor_view, const BottomTensorView& bottom_tensor_view,
...@@ -86,22 +73,19 @@ struct TileWindowWithStaticDistribution ...@@ -86,22 +73,19 @@ struct TileWindowWithStaticDistribution
bottom_tensor_view_.GetTensorDescriptor(), bottom_tensor_thread_origin_idx); bottom_tensor_view_.GetTensorDescriptor(), bottom_tensor_thread_origin_idx);
} }
__host__ __device__ static constexpr index_t GetNumOfDimension() { return NDimBottomTensor; } __device__ static constexpr index_t GetNumOfDimension() { return NDimBottomTensor; }
__host__ __device__ static constexpr bool HasStaticTileDistribution() __device__ static constexpr bool HasStaticTileDistribution() { return TileDstr::IsStatic(); }
{
return TileDstr::IsStatic();
}
__host__ __device__ constexpr auto GetWindowLengths() const { return window_lengths_; } __device__ constexpr auto GetWindowLengths() const { return window_lengths_; }
__host__ __device__ constexpr auto GetTileDistribution() const { return tile_dstr_; } __device__ constexpr auto GetTileDistribution() const { return tile_dstr_; }
__host__ __device__ constexpr auto GetBottomTensorView() const { return bottom_tensor_view_; } __device__ constexpr auto GetBottomTensorView() const { return bottom_tensor_view_; }
__host__ __device__ constexpr auto GetWindowOrigin() const { return window_origin_; } __device__ constexpr auto GetWindowOrigin() const { return window_origin_; }
__host__ __device__ constexpr auto GetBottomTensorThreadCoordinate() const __device__ constexpr auto GetBottomTensorThreadCoordinate() const
{ {
return bottom_tensor_thread_coord_; return bottom_tensor_thread_coord_;
} }
...@@ -141,7 +125,7 @@ struct TileWindowWithStaticDistribution ...@@ -141,7 +125,7 @@ struct TileWindowWithStaticDistribution
} }
// return vector dimension among [y0, y1, ...] // return vector dimension among [y0, y1, ...]
__host__ __device__ static constexpr auto GetWindowAdaptorYsSafeVectorLengthStrides() __device__ static constexpr auto GetWindowAdaptorYsSafeVectorLengthStrides()
{ {
// bottom tensor top dimension vector lengths and strides // bottom tensor top dimension vector lengths and strides
const auto [bottom_tensor_top_dim_vector_lengths, bottom_tensor_top_dim_vector_strides] = const auto [bottom_tensor_top_dim_vector_lengths, bottom_tensor_top_dim_vector_strides] =
...@@ -201,7 +185,7 @@ struct TileWindowWithStaticDistribution ...@@ -201,7 +185,7 @@ struct TileWindowWithStaticDistribution
// TODO: use strategy // TODO: use strategy
template <typename TensorView_, typename WindowLengths_, typename StaticTileDistribution_> template <typename TensorView_, typename WindowLengths_, typename StaticTileDistribution_>
__host__ __device__ constexpr auto __device__ constexpr auto
make_tile_window(const TensorView_& tensor_view, make_tile_window(const TensorView_& tensor_view,
const WindowLengths_& window_lengths, const WindowLengths_& window_lengths,
const MultiIndex<TensorView_::GetNumOfDimension()>& origin, const MultiIndex<TensorView_::GetNumOfDimension()>& origin,
...@@ -213,16 +197,6 @@ make_tile_window(const TensorView_& tensor_view, ...@@ -213,16 +197,6 @@ make_tile_window(const TensorView_& tensor_view,
tensor_view, window_lengths, origin, tile_distribution}; tensor_view, window_lengths, origin, tile_distribution};
} }
// FIXME: dummy host function for tile program
template <typename TensorView_, typename WindowLengths_, typename StaticTileDistribution_>
__host__ void move_tile_window(
TileWindowWithStaticDistribution<TensorView_, WindowLengths_, StaticTileDistribution_>&,
const MultiIndex<
TileWindowWithStaticDistribution<TensorView_, WindowLengths_, StaticTileDistribution_>::
GetNumOfDimension()>&)
{
}
template <typename TensorView_, typename WindowLengths_, typename StaticTileDistribution_> template <typename TensorView_, typename WindowLengths_, typename StaticTileDistribution_>
__device__ void move_tile_window( __device__ void move_tile_window(
TileWindowWithStaticDistribution<TensorView_, WindowLengths_, StaticTileDistribution_>& window, TileWindowWithStaticDistribution<TensorView_, WindowLengths_, StaticTileDistribution_>& window,
......
...@@ -27,15 +27,7 @@ struct TileWindowWithStaticLengths ...@@ -27,15 +27,7 @@ struct TileWindowWithStaticLengths
using BottomTensorIndex = Array<index_t, NDimBottomTensor>; using BottomTensorIndex = Array<index_t, NDimBottomTensor>;
__host__ __device__ constexpr TileWindowWithStaticLengths() = default; __device__ constexpr TileWindowWithStaticLengths() = default;
// FIXME: host dummy constructor for tile program
__host__ constexpr TileWindowWithStaticLengths(const BottomTensorView& bottom_tensor_view,
const WindowLengths&,
const BottomTensorIndex&)
: bottom_tensor_view_{bottom_tensor_view}, window_lengths_{}, window_origin_{}
{
}
__device__ constexpr TileWindowWithStaticLengths(const BottomTensorView& bottom_tensor_view, __device__ constexpr TileWindowWithStaticLengths(const BottomTensorView& bottom_tensor_view,
const WindowLengths& window_lengths, const WindowLengths& window_lengths,
...@@ -46,13 +38,13 @@ struct TileWindowWithStaticLengths ...@@ -46,13 +38,13 @@ struct TileWindowWithStaticLengths
{ {
} }
__host__ __device__ static constexpr index_t GetNumOfDimension() { return NDimBottomTensor; } __device__ static constexpr index_t GetNumOfDimension() { return NDimBottomTensor; }
__host__ __device__ constexpr auto GetWindowLengths() const { return window_lengths_; } __device__ constexpr auto GetWindowLengths() const { return window_lengths_; }
__host__ __device__ constexpr auto GetBottomTensorView() const { return bottom_tensor_view_; } __device__ constexpr auto GetBottomTensorView() const { return bottom_tensor_view_; }
__host__ __device__ constexpr auto GetWindowOrigin() const { return window_origin_; } __device__ constexpr auto GetWindowOrigin() const { return window_origin_; }
// this is the bottom tensor view // this is the bottom tensor view
// [x0', x1', ...] ==> [offset] // [x0', x1', ...] ==> [offset]
...@@ -66,7 +58,7 @@ struct TileWindowWithStaticLengths ...@@ -66,7 +58,7 @@ struct TileWindowWithStaticLengths
}; };
template <typename TensorView_, typename WindowLengths_> template <typename TensorView_, typename WindowLengths_>
__host__ __device__ constexpr auto __device__ constexpr auto
make_tile_window(const TensorView_& tensor_view, make_tile_window(const TensorView_& tensor_view,
const WindowLengths_& window_lengths, const WindowLengths_& window_lengths,
const MultiIndex<TensorView_::GetNumOfDimension()>& origin) const MultiIndex<TensorView_::GetNumOfDimension()>& origin)
...@@ -78,15 +70,6 @@ make_tile_window(const TensorView_& tensor_view, ...@@ -78,15 +70,6 @@ make_tile_window(const TensorView_& tensor_view,
tensor_view, window_lengths, origin}; tensor_view, window_lengths, origin};
} }
// FIXME: dummy host function for tile program
template <typename TensorView_, typename WindowLengths_>
__host__ void move_tile_window(
TileWindowWithStaticLengths<TensorView_, WindowLengths_>&,
const MultiIndex<
TileWindowWithStaticLengths<TensorView_, WindowLengths_>::GetNumOfDimension()>&)
{
}
template <typename TensorView_, typename WindowLengths_> template <typename TensorView_, typename WindowLengths_>
__device__ void move_tile_window( __device__ void move_tile_window(
TileWindowWithStaticLengths<TensorView_, WindowLengths_>& window, TileWindowWithStaticLengths<TensorView_, WindowLengths_>& window,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment