Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
6c37035f
Commit
6c37035f
authored
May 23, 2021
by
Chao Liu
Browse files
refactor
parent
3a7fd7d6
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
406 additions
and
19 deletions
+406
-19
composable_kernel/include/driver/driver_dynamic_gemm_v1r1.hpp
...osable_kernel/include/driver/driver_dynamic_gemm_v1r1.hpp
+387
-0
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_v1r1.hpp
...l/include/tensor_operation/gridwise_dynamic_gemm_v1r1.hpp
+15
-15
driver/include/device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
...convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
+2
-2
driver/include/device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk.hpp
...convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk.hpp
+2
-2
No files found.
composable_kernel/include/driver/driver_dynamic_gemm_v1.hpp
→
composable_kernel/include/driver/driver_dynamic_gemm_v1
r1
.hpp
View file @
6c37035f
This diff is collapsed.
Click to expand it.
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm.hpp
→
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm
_v1r1
.hpp
View file @
6c37035f
...
...
@@ -27,13 +27,13 @@ __global__ void
#if CK_USE_LAUNCH_BOUNDS
__launch_bounds__
(
CK_MAX_THREAD_PER_BLOCK
,
CK_MIN_BLOCK_PER_CU
)
#endif
kernel_dynamic_gemm_v1
(
const
FloatA
*
__restrict__
p_a_global
,
const
FloatB
*
__restrict__
p_b_global
,
FloatC
*
__restrict__
p_c_global
,
const
AGlobalDesc
a_k_m_global_desc
,
const
BGlobalDesc
b_k_n_global_desc
,
const
CGlobalDesc
c_m0_m1_n0_n1_global_desc
,
const
CBlockClusterDesc
c_block_cluster_desc
)
kernel_dynamic_gemm_v1
r1
(
const
FloatA
*
__restrict__
p_a_global
,
const
FloatB
*
__restrict__
p_b_global
,
FloatC
*
__restrict__
p_c_global
,
const
AGlobalDesc
a_k_m_global_desc
,
const
BGlobalDesc
b_k_n_global_desc
,
const
CGlobalDesc
c_m0_m1_n0_n1_global_desc
,
const
CBlockClusterDesc
c_block_cluster_desc
)
{
GridwiseGemm
::
Run
(
p_a_global
,
p_b_global
,
...
...
@@ -63,13 +63,13 @@ __global__ void
#if CK_USE_LAUNCH_BOUNDS
__launch_bounds__
(
CK_MAX_THREAD_PER_BLOCK
,
CK_MIN_BLOCK_PER_CU
)
#endif
kernel_dynamic_gemm_v1
(
const
FloatA
*
__restrict__
p_a_global
,
const
FloatB
*
__restrict__
p_b_global
,
FloatC
*
__restrict__
p_c_global
,
const
void
__CONSTANT__
*
p_a_k_m_global_desc
,
const
void
__CONSTANT__
*
p_b_k_n_global_desc
,
const
void
__CONSTANT__
*
p_c_m0_m1_n0_n1_global_desc
,
const
void
__CONSTANT__
*
p_c_block_cluster_desc
)
kernel_dynamic_gemm_v1
r1
(
const
FloatA
*
__restrict__
p_a_global
,
const
FloatB
*
__restrict__
p_b_global
,
FloatC
*
__restrict__
p_c_global
,
const
void
__CONSTANT__
*
p_a_k_m_global_desc
,
const
void
__CONSTANT__
*
p_b_k_n_global_desc
,
const
void
__CONSTANT__
*
p_c_m0_m1_n0_n1_global_desc
,
const
void
__CONSTANT__
*
p_c_block_cluster_desc
)
{
// first cast void __CONSTANT__ void* to void*
// second cast void* to Desc*
...
...
@@ -139,7 +139,7 @@ template <index_t BlockSize,
typename
CGlobalIteratorHacks
,
typename
AGlobalMoveSliceWindowIteratorHacks
,
typename
BGlobalMoveSliceWindowIteratorHacks
>
struct
GridwiseDynamicGemm_km_kn_m0m1n0n1_v1
struct
GridwiseDynamicGemm_km_kn_m0m1n0n1_v1
r1
{
__host__
__device__
static
constexpr
index_t
GetSharedMemoryNumberOfByte
()
{
...
...
driver/include/device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
View file @
6c37035f
...
...
@@ -2,7 +2,7 @@
#include "device.hpp"
#include "host_tensor.hpp"
#include "transform_forward_convolution_into_gemm_v4r4_nchw_kcyx_nkhw.hpp"
#include "driver_dynamic_gemm_v1.hpp"
#include "driver_dynamic_gemm_v1
r1
.hpp"
template
<
class
TInWei
,
ck
::
index_t
InWeiVectorSize
,
...
...
@@ -490,7 +490,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(
for
(
index_t
i
=
0
;
i
<
5
;
++
i
)
{
float
ave_time
=
launch_kernel_dynamic_gemm_v1
<
float
ave_time
=
launch_kernel_dynamic_gemm_v1
r1
<
BlockSize
,
typename
vector_type
<
TInWei
,
InWeiVectorSize
>::
type
,
TAcc
,
...
...
driver/include/device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk.hpp
View file @
6c37035f
...
...
@@ -2,7 +2,7 @@
#include "device.hpp"
#include "host_tensor.hpp"
#include "transform_forward_convolution_into_gemm_v4r4_nhwc_kyxc_nhwk.hpp"
#include "driver_dynamic_gemm_v1.hpp"
#include "driver_dynamic_gemm_v1
r1
.hpp"
template
<
class
TInWei
,
ck
::
index_t
InWeiVectorSize
,
...
...
@@ -396,7 +396,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk(
for
(
index_t
i
=
0
;
i
<
5
;
++
i
)
{
float
ave_time
=
launch_kernel_dynamic_gemm_v1
<
float
ave_time
=
launch_kernel_dynamic_gemm_v1
r1
<
BlockSize
,
typename
vector_type
<
TInWei
,
InWeiVectorSize
>::
type
,
TAcc
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment