Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
6bc9ee05
Unverified
Commit
6bc9ee05
authored
Sep 13, 2023
by
Chao Liu
Committed by
GitHub
Sep 13, 2023
Browse files
Remove program server (#10)
* removing program server * specify launch bound per kernel instance
parent
f3baea0d
Changes
28
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
31 additions
and
115 deletions
+31
-115
include/ck/tile_program/grid/grid_gemm.hpp
include/ck/tile_program/grid/grid_gemm.hpp
+10
-11
include/ck/tile_program/tile/distributed_tile_sweep.hpp
include/ck/tile_program/tile/distributed_tile_sweep.hpp
+1
-1
include/ck/tile_program/tile/load_tile.hpp
include/ck/tile_program/tile/load_tile.hpp
+0
-19
include/ck/tile_program/tile/store_tile_impl_static_distribution.hpp
...tile_program/tile/store_tile_impl_static_distribution.hpp
+0
-11
include/ck/tile_program/tile/store_tile_impl_static_lengths.hpp
...e/ck/tile_program/tile/store_tile_impl_static_lengths.hpp
+0
-10
include/ck/tile_program/tile/tile_elementwise.hpp
include/ck/tile_program/tile/tile_elementwise.hpp
+4
-4
include/ck/tile_program/tile/tile_window_impl_static_distribution.hpp
...ile_program/tile/tile_window_impl_static_distribution.hpp
+10
-36
include/ck/tile_program/tile/tile_window_impl_static_lengths.hpp
.../ck/tile_program/tile/tile_window_impl_static_lengths.hpp
+6
-23
No files found.
include/ck/tile_program/grid/grid_gemm.hpp
View file @
6bc9ee05
...
...
@@ -24,8 +24,7 @@ struct GridGemm
using
BlockGemmPipeline
=
typename
Policy
::
template
BlockGemmPipeline
<
Problem
>;
template
<
typename
AGridTensorView
,
typename
BGridTensorView
,
typename
CGridTensorView
>
__host__
__device__
void
operator
()(
ProgramServer
&
ps
,
const
AGridTensorView
&
a_grid
,
__device__
void
operator
()(
const
AGridTensorView
&
a_grid
,
const
BGridTensorView
&
b_grid
,
CGridTensorView
&
c_grid
,
const
AElementFunction
&
a_element_func
,
...
...
@@ -41,17 +40,17 @@ struct GridGemm
const
auto
K
=
a_grid
.
desc_
.
GetLength
(
Number
<
1
>
{});
// divide problem
const
auto
id_block
=
ps
.
get_block_id
();
const
auto
id_block
=
get_block_id
();
const
auto
num_tile_m
=
M
/
kMPerBlock
;
const
auto
num_tile_n
=
N
/
kNPerBlock
;
const
auto
block2tile
=
ps
(
Policy
::
MakeBlock2TileMap
(
num_tile_m
,
num_tile_n
)
)
;
const
auto
block2tile
=
Policy
::
MakeBlock2TileMap
(
num_tile_m
,
num_tile_n
);
const
auto
id_tile
=
block2tile
(
id_block
);
const
auto
iM
=
ps
.
read
_
first
_
lane
(
id_tile
.
template
At
<
0
>()
*
kMPerBlock
);
const
auto
iN
=
ps
.
read
_
first
_
lane
(
id_tile
.
template
At
<
1
>()
*
kNPerBlock
);
const
auto
iM
=
__builtin_amdgcn_
readfirstlane
(
id_tile
.
template
At
<
0
>()
*
kMPerBlock
);
const
auto
iN
=
__builtin_amdgcn_
readfirstlane
(
id_tile
.
template
At
<
1
>()
*
kNPerBlock
);
// A block window
auto
a_block_window
=
make_tile_window
(
...
...
include/ck/tile_program/tile/distributed_tile_sweep.hpp
View file @
6bc9ee05
...
...
@@ -14,7 +14,7 @@ namespace tile_program {
template
<
typename
TileDistributedSpan_
,
// TileDistributedSpan<...>
typename
F
// signature: F(TileDistributedIndex<...>)
>
__host__
__device__
void
sweep_tile_span
(
TileDistributedSpan_
,
const
F
&
f
)
__device__
void
sweep_tile_span
(
TileDistributedSpan_
,
const
F
&
f
)
{
using
DstrSpan
=
remove_cvref_t
<
TileDistributedSpan_
>
;
...
...
include/ck/tile_program/tile/load_tile.hpp
View file @
6bc9ee05
...
...
@@ -160,25 +160,6 @@ __device__ auto load_sliced_thread_data_from_tile_window(
}
// namespace detail
// FIXME: host dummy function for tile program
template
<
typename
BottomTensorView_
,
typename
WindowLengths_
,
typename
TileDistribution_
>
__host__
auto
load_tile
(
const
TileWindowWithStaticDistribution
<
BottomTensorView_
,
WindowLengths_
,
TileDistribution_
>&
tile_window
)
{
using
DataType
=
remove_cvref_t
<
typename
BottomTensorView_
::
DataType
>
;
using
BottomTensorView
=
remove_cvref_t
<
BottomTensorView_
>
;
using
WindowLengths
=
remove_cvref_t
<
WindowLengths_
>
;
using
TileDstr
=
remove_cvref_t
<
TileDistribution_
>
;
using
TileWindow
=
TileWindowWithStaticDistribution
<
BottomTensorView
,
WindowLengths
,
TileDstr
>
;
static_assert
(
is_known_at_compile_time
<
WindowLengths
>::
value
,
"wrong! lengths should be static"
);
static_assert
(
TileWindow
::
HasStaticTileDistribution
(),
"wrong!"
);
return
make_static_distributed_tensor
<
DataType
>
(
tile_window
.
GetTileDistribution
());
}
template
<
typename
BottomTensorView_
,
typename
WindowLengths_
,
typename
TileDistribution_
>
__device__
auto
load_tile
(
TileWindowWithStaticDistribution
<
BottomTensorView_
,
WindowLengths_
,
TileDistribution_
>&
...
...
include/ck/tile_program/tile/store_tile_impl_static_distribution.hpp
View file @
6bc9ee05
...
...
@@ -14,17 +14,6 @@
namespace
ck
{
namespace
tile_program
{
// FIXME: host dummy function for tile program
template
<
typename
BottomTensorView_
,
typename
WindowLengths_
,
typename
TileDistribution_
,
typename
DataType_
>
__host__
void
store_tile
(
TileWindowWithStaticDistribution
<
BottomTensorView_
,
WindowLengths_
,
TileDistribution_
>&
,
const
StaticDistributedTensor
<
DataType_
,
TileDistribution_
>&
)
{
}
template
<
typename
BottomTensorView_
,
typename
WindowLengths_
,
typename
TileDistribution_
,
...
...
include/ck/tile_program/tile/store_tile_impl_static_lengths.hpp
View file @
6bc9ee05
...
...
@@ -15,16 +15,6 @@
namespace
ck
{
namespace
tile_program
{
// FIXME: host dummy function for tile program
template
<
typename
BottomTensorView_
,
typename
WindowLengths_
,
typename
TileDistribution_
,
typename
DataType_
>
__host__
void
store_tile
(
TileWindowWithStaticLengths
<
BottomTensorView_
,
WindowLengths_
>&
,
const
StaticDistributedTensor
<
DataType_
,
TileDistribution_
>&
)
{
}
template
<
typename
BottomTensorView_
,
typename
WindowLengths_
,
typename
TileDistribution_
,
...
...
include/ck/tile_program/tile/tile_elementwise.hpp
View file @
6bc9ee05
...
...
@@ -16,7 +16,7 @@ namespace tile_program {
// TODO: support tensors with different distribution
template
<
typename
InOutElementFunc
,
typename
...
InOutDstrTensors
>
__host__
__device__
void
tile_elementwise_inout
(
const
InOutElementFunc
&
inout_element_func
,
__device__
void
tile_elementwise_inout
(
const
InOutElementFunc
&
inout_element_func
,
InOutDstrTensors
&
...
inout_dstr_tensors
)
{
// TODO: make sure all distributed tensors have same lengths and distribution
...
...
@@ -30,7 +30,7 @@ __host__ __device__ void tile_elementwise_inout(const InOutElementFunc& inout_el
}
template
<
typename
InElementFunc
,
typename
...
InDstrTensors
>
__host__
__device__
auto
tile_elementwise_in
(
const
InElementFunc
&
in_element_func
,
__device__
auto
tile_elementwise_in
(
const
InElementFunc
&
in_element_func
,
const
InDstrTensors
&
...
in_dstr_tensors
)
{
using
OutDataType
=
decltype
(
in_element_func
(
typename
InDstrTensors
::
DataType
{}...));
...
...
include/ck/tile_program/tile/tile_window_impl_static_distribution.hpp
View file @
6bc9ee05
...
...
@@ -45,20 +45,7 @@ struct TileWindowWithStaticDistribution
using
BottomTensorCoord
=
decltype
(
make_tensor_coordinate
(
BottomTensorDesc
{},
BottomTensorIndex
{}));
__host__
__device__
constexpr
TileWindowWithStaticDistribution
()
=
default
;
// FIXME: host dummy constructor for tile program
__host__
constexpr
TileWindowWithStaticDistribution
(
const
BottomTensorView
&
bottom_tensor_view
,
const
WindowLengths
&
,
const
BottomTensorIndex
&
,
const
TileDstr
&
)
:
bottom_tensor_view_
{
bottom_tensor_view
},
window_lengths_
{},
bottom_tensor_thread_coord_
{},
tile_dstr_
{},
window_adaptor_thread_coord_
{}
{
}
__device__
constexpr
TileWindowWithStaticDistribution
()
=
default
;
__device__
constexpr
TileWindowWithStaticDistribution
(
const
BottomTensorView
&
bottom_tensor_view
,
...
...
@@ -86,22 +73,19 @@ struct TileWindowWithStaticDistribution
bottom_tensor_view_
.
GetTensorDescriptor
(),
bottom_tensor_thread_origin_idx
);
}
__host__
__device__
static
constexpr
index_t
GetNumOfDimension
()
{
return
NDimBottomTensor
;
}
__device__
static
constexpr
index_t
GetNumOfDimension
()
{
return
NDimBottomTensor
;
}
__host__
__device__
static
constexpr
bool
HasStaticTileDistribution
()
{
return
TileDstr
::
IsStatic
();
}
__device__
static
constexpr
bool
HasStaticTileDistribution
()
{
return
TileDstr
::
IsStatic
();
}
__host__
__device__
constexpr
auto
GetWindowLengths
()
const
{
return
window_lengths_
;
}
__device__
constexpr
auto
GetWindowLengths
()
const
{
return
window_lengths_
;
}
__host__
__device__
constexpr
auto
GetTileDistribution
()
const
{
return
tile_dstr_
;
}
__device__
constexpr
auto
GetTileDistribution
()
const
{
return
tile_dstr_
;
}
__host__
__device__
constexpr
auto
GetBottomTensorView
()
const
{
return
bottom_tensor_view_
;
}
__device__
constexpr
auto
GetBottomTensorView
()
const
{
return
bottom_tensor_view_
;
}
__host__
__device__
constexpr
auto
GetWindowOrigin
()
const
{
return
window_origin_
;
}
__device__
constexpr
auto
GetWindowOrigin
()
const
{
return
window_origin_
;
}
__host__
__device__
constexpr
auto
GetBottomTensorThreadCoordinate
()
const
__device__
constexpr
auto
GetBottomTensorThreadCoordinate
()
const
{
return
bottom_tensor_thread_coord_
;
}
...
...
@@ -141,7 +125,7 @@ struct TileWindowWithStaticDistribution
}
// return vector dimension among [y0, y1, ...]
__host__
__device__
static
constexpr
auto
GetWindowAdaptorYsSafeVectorLengthStrides
()
__device__
static
constexpr
auto
GetWindowAdaptorYsSafeVectorLengthStrides
()
{
// bottom tensor top dimension vector lengths and strides
const
auto
[
bottom_tensor_top_dim_vector_lengths
,
bottom_tensor_top_dim_vector_strides
]
=
...
...
@@ -201,7 +185,7 @@ struct TileWindowWithStaticDistribution
// TODO: use strategy
template
<
typename
TensorView_
,
typename
WindowLengths_
,
typename
StaticTileDistribution_
>
__host__
__device__
constexpr
auto
__device__
constexpr
auto
make_tile_window
(
const
TensorView_
&
tensor_view
,
const
WindowLengths_
&
window_lengths
,
const
MultiIndex
<
TensorView_
::
GetNumOfDimension
()
>&
origin
,
...
...
@@ -213,16 +197,6 @@ make_tile_window(const TensorView_& tensor_view,
tensor_view
,
window_lengths
,
origin
,
tile_distribution
};
}
// FIXME: dummy host function for tile program
template
<
typename
TensorView_
,
typename
WindowLengths_
,
typename
StaticTileDistribution_
>
__host__
void
move_tile_window
(
TileWindowWithStaticDistribution
<
TensorView_
,
WindowLengths_
,
StaticTileDistribution_
>&
,
const
MultiIndex
<
TileWindowWithStaticDistribution
<
TensorView_
,
WindowLengths_
,
StaticTileDistribution_
>::
GetNumOfDimension
()
>&
)
{
}
template
<
typename
TensorView_
,
typename
WindowLengths_
,
typename
StaticTileDistribution_
>
__device__
void
move_tile_window
(
TileWindowWithStaticDistribution
<
TensorView_
,
WindowLengths_
,
StaticTileDistribution_
>&
window
,
...
...
include/ck/tile_program/tile/tile_window_impl_static_lengths.hpp
View file @
6bc9ee05
...
...
@@ -27,15 +27,7 @@ struct TileWindowWithStaticLengths
using
BottomTensorIndex
=
Array
<
index_t
,
NDimBottomTensor
>
;
__host__
__device__
constexpr
TileWindowWithStaticLengths
()
=
default
;
// FIXME: host dummy constructor for tile program
__host__
constexpr
TileWindowWithStaticLengths
(
const
BottomTensorView
&
bottom_tensor_view
,
const
WindowLengths
&
,
const
BottomTensorIndex
&
)
:
bottom_tensor_view_
{
bottom_tensor_view
},
window_lengths_
{},
window_origin_
{}
{
}
__device__
constexpr
TileWindowWithStaticLengths
()
=
default
;
__device__
constexpr
TileWindowWithStaticLengths
(
const
BottomTensorView
&
bottom_tensor_view
,
const
WindowLengths
&
window_lengths
,
...
...
@@ -46,13 +38,13 @@ struct TileWindowWithStaticLengths
{
}
__host__
__device__
static
constexpr
index_t
GetNumOfDimension
()
{
return
NDimBottomTensor
;
}
__device__
static
constexpr
index_t
GetNumOfDimension
()
{
return
NDimBottomTensor
;
}
__host__
__device__
constexpr
auto
GetWindowLengths
()
const
{
return
window_lengths_
;
}
__device__
constexpr
auto
GetWindowLengths
()
const
{
return
window_lengths_
;
}
__host__
__device__
constexpr
auto
GetBottomTensorView
()
const
{
return
bottom_tensor_view_
;
}
__device__
constexpr
auto
GetBottomTensorView
()
const
{
return
bottom_tensor_view_
;
}
__host__
__device__
constexpr
auto
GetWindowOrigin
()
const
{
return
window_origin_
;
}
__device__
constexpr
auto
GetWindowOrigin
()
const
{
return
window_origin_
;
}
// this is the bottom tensor view
// [x0', x1', ...] ==> [offset]
...
...
@@ -66,7 +58,7 @@ struct TileWindowWithStaticLengths
};
template
<
typename
TensorView_
,
typename
WindowLengths_
>
__host__
__device__
constexpr
auto
__device__
constexpr
auto
make_tile_window
(
const
TensorView_
&
tensor_view
,
const
WindowLengths_
&
window_lengths
,
const
MultiIndex
<
TensorView_
::
GetNumOfDimension
()
>&
origin
)
...
...
@@ -78,15 +70,6 @@ make_tile_window(const TensorView_& tensor_view,
tensor_view
,
window_lengths
,
origin
};
}
// FIXME: dummy host function for tile program
template
<
typename
TensorView_
,
typename
WindowLengths_
>
__host__
void
move_tile_window
(
TileWindowWithStaticLengths
<
TensorView_
,
WindowLengths_
>&
,
const
MultiIndex
<
TileWindowWithStaticLengths
<
TensorView_
,
WindowLengths_
>::
GetNumOfDimension
()
>&
)
{
}
template
<
typename
TensorView_
,
typename
WindowLengths_
>
__device__
void
move_tile_window
(
TileWindowWithStaticLengths
<
TensorView_
,
WindowLengths_
>&
window
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment