Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
8e897da7
Commit
8e897da7
authored
Oct 29, 2021
by
Jing Zhang
Browse files
add gridwise_gemm_v3
parent
baac64e4
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
10 additions
and
13 deletions
+10
-13
composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v2.hpp
...ernel/include/tensor_operation/gridwise_gemm_dlops_v2.hpp
+1
-1
composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v3.hpp
...ernel/include/tensor_operation/gridwise_gemm_dlops_v3.hpp
+3
-6
host/driver_offline/include/driver_convolution_add_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
...ward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
+2
-2
host/driver_offline/include/driver_convolution_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
...ward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
+2
-2
host/driver_offline/include/driver_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
...ward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
+2
-2
No files found.
composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v2.hpp
View file @
8e897da7
...
@@ -148,7 +148,7 @@ template <index_t BlockSize,
...
@@ -148,7 +148,7 @@ template <index_t BlockSize,
typename
AGlobalMoveSliceWindowStepHacks
,
typename
AGlobalMoveSliceWindowStepHacks
,
typename
BGlobalMoveSliceWindowStepHacks
,
typename
BGlobalMoveSliceWindowStepHacks
,
ActivTypeEnum_t
activ_type
=
ActivTypeEnum_t
::
None
>
ActivTypeEnum_t
activ_type
=
ActivTypeEnum_t
::
None
>
struct
GridwiseGemmDlops_km_kn_mn_v
3
struct
GridwiseGemmDlops_km_kn_mn_v
2
{
{
static
constexpr
auto
I0
=
Number
<
0
>
{};
static
constexpr
auto
I0
=
Number
<
0
>
{};
static
constexpr
auto
I1
=
Number
<
1
>
{};
static
constexpr
auto
I1
=
Number
<
1
>
{};
...
...
composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v
2_add
.hpp
→
composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v
3
.hpp
View file @
8e897da7
#ifndef CK_GRIDWISE_GEMM_V
2_ADD
_HPP
#ifndef CK_GRIDWISE_GEMM_V
3
_HPP
#define CK_GRIDWISE_GEMM_V
2_ADD
_HPP
#define CK_GRIDWISE_GEMM_V
3
_HPP
#include "common_header.hpp"
#include "common_header.hpp"
#include "multi_index_transform_helper.hpp"
#include "multi_index_transform_helper.hpp"
...
@@ -301,7 +301,7 @@ template <index_t BlockSize,
...
@@ -301,7 +301,7 @@ template <index_t BlockSize,
index_t
bias_type
=
0
,
index_t
bias_type
=
0
,
index_t
out_type
=
1
,
index_t
out_type
=
1
,
index_t
add_type
=
0
>
index_t
add_type
=
0
>
struct
GridwiseGemmDlops_km_kn_mn_v3
_add
struct
GridwiseGemmDlops_km_kn_mn_v3
{
{
static
constexpr
auto
I0
=
Number
<
0
>
{};
static
constexpr
auto
I0
=
Number
<
0
>
{};
static
constexpr
auto
I1
=
Number
<
1
>
{};
static
constexpr
auto
I1
=
Number
<
1
>
{};
...
@@ -733,8 +733,6 @@ struct GridwiseGemmDlops_km_kn_mn_v3_add
...
@@ -733,8 +733,6 @@ struct GridwiseGemmDlops_km_kn_mn_v3_add
{
{
constexpr
auto
c_k1_n_h2_w2_thread_gemm_desc
=
CThreadDesc_K1_N_H2_W2
{};
constexpr
auto
c_k1_n_h2_w2_thread_gemm_desc
=
CThreadDesc_K1_N_H2_W2
{};
if
constexpr
(
activ_type
>
0
)
{
static_for
<
0
,
c_k1_n_h2_w2_thread_gemm_desc
.
GetElementSpaceSize
(),
1
>
{}([
&
](
auto
i
)
{
static_for
<
0
,
c_k1_n_h2_w2_thread_gemm_desc
.
GetElementSpaceSize
(),
1
>
{}([
&
](
auto
i
)
{
if
constexpr
(
activ_type
==
1
)
if
constexpr
(
activ_type
==
1
)
{
{
...
@@ -753,7 +751,6 @@ struct GridwiseGemmDlops_km_kn_mn_v3_add
...
@@ -753,7 +751,6 @@ struct GridwiseGemmDlops_km_kn_mn_v3_add
c_thread_buf
(
i
)
=
x
;
c_thread_buf
(
i
)
=
x
;
}
}
});
});
}
}
}
template
<
typename
CThreadBuff
,
template
<
typename
CThreadBuff
,
...
...
host/driver_offline/include/driver_convolution_add_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
View file @
8e897da7
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
#include "common_header.hpp"
#include "common_header.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_gemm_dlops_v
2_add
.hpp"
#include "gridwise_gemm_dlops_v
3
.hpp"
template
<
ck
::
index_t
BlockSize
,
template
<
ck
::
index_t
BlockSize
,
typename
FloatAB
,
typename
FloatAB
,
...
@@ -294,7 +294,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
...
@@ -294,7 +294,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
static_assert
(
c_k_n_hop_wop_grid_desc
.
IsKnownAtCompileTime
(),
""
);
static_assert
(
c_k_n_hop_wop_grid_desc
.
IsKnownAtCompileTime
(),
""
);
// GEMM
// GEMM
using
GridwiseGemm
=
GridwiseGemmDlops_km_kn_mn_v3
_add
<
using
GridwiseGemm
=
GridwiseGemmDlops_km_kn_mn_v3
<
BlockSize
,
BlockSize
,
FloatAB
,
FloatAB
,
FloatAcc
,
FloatAcc
,
...
...
host/driver_offline/include/driver_convolution_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
View file @
8e897da7
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
#include "common_header.hpp"
#include "common_header.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_gemm_dlops_v
2_add
.hpp"
#include "gridwise_gemm_dlops_v
3
.hpp"
template
<
ck
::
index_t
BlockSize
,
template
<
ck
::
index_t
BlockSize
,
typename
FloatAB
,
typename
FloatAB
,
...
@@ -259,7 +259,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
...
@@ -259,7 +259,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
static_assert
(
c_k_n_hop_wop_grid_desc
.
IsKnownAtCompileTime
(),
""
);
static_assert
(
c_k_n_hop_wop_grid_desc
.
IsKnownAtCompileTime
(),
""
);
// GEMM
// GEMM
using
GridwiseGemm
=
GridwiseGemmDlops_km_kn_mn_v3
_add
<
using
GridwiseGemm
=
GridwiseGemmDlops_km_kn_mn_v3
<
BlockSize
,
BlockSize
,
FloatAB
,
FloatAB
,
FloatAcc
,
FloatAcc
,
...
...
host/driver_offline/include/driver_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
View file @
8e897da7
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
#include "common_header.hpp"
#include "common_header.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_gemm_dlops_v
2_add
.hpp"
#include "gridwise_gemm_dlops_v
3
.hpp"
template
<
ck
::
index_t
BlockSize
,
template
<
ck
::
index_t
BlockSize
,
typename
FloatAB
,
typename
FloatAB
,
...
@@ -298,7 +298,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
...
@@ -298,7 +298,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
static_assert
(
c_k_n_hop_wop_grid_desc
.
IsKnownAtCompileTime
(),
""
);
static_assert
(
c_k_n_hop_wop_grid_desc
.
IsKnownAtCompileTime
(),
""
);
// GEMM
// GEMM
using
GridwiseGemm
=
GridwiseGemmDlops_km_kn_mn_v3
_add
<
using
GridwiseGemm
=
GridwiseGemmDlops_km_kn_mn_v3
<
BlockSize
,
BlockSize
,
FloatAB
,
FloatAB
,
FloatAcc
,
FloatAcc
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment