Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
1e2a641e
"src/include/vscode:/vscode.git/clone" did not exist on "0f3dcb5042f0387128a8253b7356e359b01ed755"
Commit
1e2a641e
authored
Oct 17, 2022
by
Paul
Browse files
Format
parent
d1a1a28b
Changes
4
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
2651 additions
and
26 deletions
+2651
-26
src/targets/gpu/jit/ck_gemm_instances.cpp
src/targets/gpu/jit/ck_gemm_instances.cpp
+2628
-8
src/targets/gpu/kernels/include/migraphx/kernels/ck.hpp
src/targets/gpu/kernels/include/migraphx/kernels/ck.hpp
+8
-6
src/targets/gpu/kernels/include/migraphx/kernels/ck_gemm.hpp
src/targets/gpu/kernels/include/migraphx/kernels/ck_gemm.hpp
+12
-10
src/targets/gpu/kernels/include/migraphx/kernels/ck_gemm_includes.hpp
...gpu/kernels/include/migraphx/kernels/ck_gemm_includes.hpp
+3
-2
No files found.
src/targets/gpu/jit/ck_gemm_instances.cpp
View file @
1e2a641e
This diff is collapsed.
Click to expand it.
src/targets/gpu/kernels/include/migraphx/kernels/ck.hpp
View file @
1e2a641e
...
...
@@ -59,14 +59,15 @@ constexpr auto to_ck_tensor()
});
}
template
<
class
F
>
template
<
class
F
>
struct
ck_function_adaptor
:
F
{
template
<
class
...
Ts
>
template
<
class
...
Ts
>
constexpr
ck_function_adaptor
(
Ts
&&
...
xs
)
:
F
(
static_cast
<
Ts
&&>
(
xs
)...)
{}
{
}
template
<
class
T
,
class
...
Ts
>
template
<
class
T
,
class
...
Ts
>
constexpr
void
operator
()(
T
&
out
,
Ts
&&
...
xs
)
const
{
out
=
static_cast
<
const
F
&>
(
*
this
)(
static_cast
<
Ts
&&>
(
xs
)...);
...
...
@@ -75,9 +76,10 @@ struct ck_function_adaptor : F
struct
ck_nop
{
template
<
class
T
>
template
<
class
T
>
constexpr
void
operator
()(
T
&
)
const
{}
{
}
};
}
// namespace migraphx
...
...
src/targets/gpu/kernels/include/migraphx/kernels/ck_gemm.hpp
View file @
1e2a641e
...
...
@@ -41,27 +41,29 @@ __device__ void ck_gemm(A a, B b, E e, Ds... ds)
constexpr
const
auto
a_grid_desc_m_k
=
gemm
.
matrix_padder
.
PadADescriptor_M_K
(
to_ck_tensor
<
A
>
());
constexpr
const
auto
b_grid_desc_n_k
=
gemm
.
matrix_padder
.
PadBDescriptor_N_K
(
to_ck_tensor
<
B
>
());
constexpr
const
auto
e_grid_desc_m_n
=
gemm
.
matrix_padder
.
PadCDescriptor_M_N
(
to_ck_tensor
<
E
>
());
constexpr
const
auto
ds_grid_desc_m_n
=
ck
::
make_tuple
(
gemm
.
matrix_padder
.
PadCDescriptor_M_N
(
to_ck_tensor
<
Ds
>
())...);
constexpr
const
auto
ds_grid_desc_m_n
=
ck
::
make_tuple
(
gemm
.
matrix_padder
.
PadCDescriptor_M_N
(
to_ck_tensor
<
Ds
>
())...);
constexpr
const
auto
block_2_etile_map
=
gemm
.
MakeDefaultBlock2ETileMap
(
e_grid_desc_m_n
);
using
GridwiseGemm
=
typename
G
::
GridwiseGemm
;
// tensor descriptors for block/thread-wise copy
constexpr
auto
a_grid_desc_ak0_m_ak1
=
GridwiseGemm
::
MakeDefaultAGridDescriptor_AK0_M_AK1
(
a_grid_desc_m_k
);
constexpr
auto
b_grid_desc_bk0_n_bk1
=
GridwiseGemm
::
MakeDefaultBGridDescriptor_BK0_N_BK1
(
b_grid_desc_n_k
);
constexpr
auto
a_grid_desc_ak0_m_ak1
=
GridwiseGemm
::
MakeDefaultAGridDescriptor_AK0_M_AK1
(
a_grid_desc_m_k
);
constexpr
auto
b_grid_desc_bk0_n_bk1
=
GridwiseGemm
::
MakeDefaultBGridDescriptor_BK0_N_BK1
(
b_grid_desc_n_k
);
constexpr
auto
ds_grid_desc_mblock_mperblock_nblock_nperblock
=
GridwiseGemm
::
MakeDsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
(
ds_grid_desc_m_n
);
GridwiseGemm
::
MakeDsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
(
ds_grid_desc_m_n
);
constexpr
auto
e_grid_desc_mblock_mperblock_nblock_nperblock
=
GridwiseGemm
::
MakeEGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
(
e_grid_desc_m_n
);
GridwiseGemm
::
MakeEGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
(
e_grid_desc_m_n
);
__shared__
char
p_shared_block
[
GridwiseGemm
::
GetSharedMemoryNumberOfByte
()];
constexpr
const
bool
HasMainKBlockLoop
=
GridwiseGemm
::
CalculateHasMainKBlockLoop
(
a_grid_desc_ak0_m_ak1
.
GetLength
(
ck
::
Number
<
0
>
{})
*
a_grid_desc_ak0_m_ak1
.
GetLength
(
ck
::
Number
<
2
>
{}));
GridwiseGemm
::
CalculateHasMainKBlockLoop
(
a_grid_desc_ak0_m_ak1
.
GetLength
(
ck
::
Number
<
0
>
{})
*
a_grid_desc_ak0_m_ak1
.
GetLength
(
ck
::
Number
<
2
>
{}));
GridwiseGemm
::
template
Run
<
HasMainKBlockLoop
>(
a
.
data
(),
b
.
data
(),
ck
::
make_tuple
(
ds
.
data
()...),
...
...
src/targets/gpu/kernels/include/migraphx/kernels/ck_gemm_includes.hpp
View file @
1e2a641e
...
...
@@ -156,7 +156,8 @@ template <typename ALayout,
ck
::
LoopScheduler
LoopSched
=
ck
::
make_default_loop_scheduler
()>
struct
CK_DeviceGemmMultipleD
{
ck
::
tensor_operation
::
device
::
MatrixPadder
<
GemmSpec
,
ck
::
index_t
,
ck
::
index_t
,
ck
::
index_t
>
matrix_padder
{
MPerBlock
,
NPerBlock
,
KPerBlock
};
ck
::
tensor_operation
::
device
::
MatrixPadder
<
GemmSpec
,
ck
::
index_t
,
ck
::
index_t
,
ck
::
index_t
>
matrix_padder
{
MPerBlock
,
NPerBlock
,
KPerBlock
};
// GridwiseGemm
using
GridwiseGemm
=
ck
::
GridwiseGemmMultipleD_xdl_cshuffle
<
...
...
@@ -203,7 +204,7 @@ struct CK_DeviceGemmMultipleD
LoopSched
>
;
// return block_id to E matrix tile idx (m0, n0) mapping
template
<
class
EGridDesc_M_N
>
template
<
class
EGridDesc_M_N
>
__device__
static
constexpr
auto
MakeDefaultBlock2ETileMap
(
const
EGridDesc_M_N
&
e_grid_desc_m_n_
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment