Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
fe2ea9b6
Commit
fe2ea9b6
authored
Feb 17, 2023
by
Chao Liu
Browse files
tile im2col
parent
c5b3f69f
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
186 additions
and
91 deletions
+186
-91
example/91_tile_program/CMakeLists.txt
example/91_tile_program/CMakeLists.txt
+1
-0
example/91_tile_program/common_func.hpp
example/91_tile_program/common_func.hpp
+7
-0
example/91_tile_program/gemm.cpp
example/91_tile_program/gemm.cpp
+2
-1
example/91_tile_program/im2col.cpp
example/91_tile_program/im2col.cpp
+81
-76
example/91_tile_program/tile_program.hpp
example/91_tile_program/tile_program.hpp
+8
-0
include/ck/tensor/tensor.hpp
include/ck/tensor/tensor.hpp
+69
-0
include/ck/tensor_description/tensor_descriptor.hpp
include/ck/tensor_description/tensor_descriptor.hpp
+2
-0
include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp
...eration/operator_transform/transform_conv_fwd_to_gemm.hpp
+15
-13
include/ck/utility/dynamic_buffer.hpp
include/ck/utility/dynamic_buffer.hpp
+1
-1
No files found.
example/91_tile_program/CMakeLists.txt
View file @
fe2ea9b6
add_example_executable
(
example_hello_world hello_world.cpp
)
add_example_executable
(
example_im2col im2col.cpp
)
example/91_tile_program/common_func.hpp
0 → 100644
View file @
fe2ea9b6
#pragma once
#include "tile_program.hpp"
template
namespace
ck
{
make_tensor
}
;
example/91_tile_program/gemm.cpp
View file @
fe2ea9b6
...
...
@@ -27,6 +27,7 @@ struct GemmMultiD
{
using
namespace
ck
;
const
auto
a
=
tp
(
make_naive_tensor
(
a_m_k_lengths
,
a_m_k_strides
),
p_a
);
const
auto
b
=
tp
(
make_naive_tensor
(
b_n_k_lengths
,
b_n_k_strides
),
p_b
);
const
auto
ds
=
tp
(
generate_tuple
(
[
&
](
auto
i
)
{
...
...
@@ -95,7 +96,7 @@ struct GemmMultiD
a_dram_window_map_strategy
);
auto
window_a_block
=
make_window
(
a_lds_block
,
{
N
PerTile
,
KPerTile
},
{
0
,
0
},
a_lds_window_map_strategy
);
make_window
(
a_lds_block
,
{
M
PerTile
,
KPerTile
},
{
0
,
0
},
a_lds_window_map_strategy
);
#endif
...
...
example/91_tile_program/im2col.cpp
View file @
fe2ea9b6
#include "tile_program.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_description/cluster_descriptor.hpp"
#include "ck/tensor/tensor.hpp"
#include "ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp"
#include "ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_batched_gemm_gemm.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/matrix_padder.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp"
#include "ck/host_utility/io.hpp"
#include "ck/library/utility/device_memory.hpp"
// program
template
<
ck
::
index_t
NDimSpatial
,
typename
ALayout
>
template
<
ck
::
index_t
NDimSpatial
,
typename
ALayout
,
typename
T
,
// tuning parameter
ck
::
index_t
kMPerTile
,
ck
::
index_t
kKPerTile
>
struct
Im2Col
{
__host__
__device__
void
operator
()(
TileProgram
&
tp
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_dilations
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_left_pads
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_right_pads
,
const
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
const
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_strides
,
const
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
const
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_strides
,
const
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>&
c_g_n_k_wos_lengths
,
const
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>&
c_g_n_k_wos_strides
,
const
std
::
array
<
ck
::
index_t
,
NDimSpatial
>&
conv_filter_strides
,
const
std
::
array
<
ck
::
index_t
,
NDimSpatial
>&
conv_filter_dilations
,
const
std
::
array
<
ck
::
index_t
,
NDimSpatial
>&
input_left_pads
,
const
std
::
array
<
ck
::
index_t
,
NDimSpatial
>&
input_right_pads
,
//
const
std
::
array
<
index_t
,
2
>
a_gemmg_gemmm_gemmk_lengths
,
const
std
::
array
<
index_t
,
2
>
a_gemmg_gemmm_gemmk_strides
,
const
std
::
array
<
ck
::
index_t
,
3
>
a_gemmg_gemmm_gemmk_lengths
,
const
std
::
array
<
ck
::
index_t
,
3
>
a_gemmg_gemmm_gemmk_strides
,
//
const
T
*
p_a_img
,
T
*
p_a_mtx
)
{
using
namespace
ck
;
const
auto
a_src_desc
=
tp
(
TransformConvFwdToGemm
<
NDimSpatial
,
ConvolutionForwardSpecialization
::
Default
>::
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
const
auto
a_src_desc
=
tensor_operation
::
TransformConvFwdToGemm
<
NDimSpatial
,
tensor_operation
::
device
::
ConvolutionForwardSpecialization
::
Default
>::
template
MakeADescriptor_M_K
<
ALayout
>(
a_g_n_c_wis_lengths
,
a_g_n_c_wis_strides
,
b_g_k_c_xs_lengths
,
...
...
@@ -45,48 +57,37 @@ struct Im2Col
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
input_right_pads
)
)
;
input_right_pads
);
const
auto
a_dst_desc
=
tp
(
make_naive_tensor_descriptor
(
a_gemmm_gemmk_lengths
,
a_gemmm_gemmk_strides
));
make_naive_tensor_descriptor
(
make_tuple
(
a_gemmg_gemmm_gemmk_lengths
[
0
],
a_gemmg_gemmm_gemmk_lengths
[
1
],
a_gemmg_gemmm_gemmk_lengths
[
2
]),
make_tuple
(
a_gemmg_gemmm_gemmk_strides
[
0
],
a_gemmg_gemmm_gemmk_strides
[
1
],
a_gemmg_gemmm_gemmk_strides
[
2
]));
const
auto
a_src
=
make_tensor
(
a_src_desc
,
p_a_img
);
const
auto
a_src
=
tp
(
make_tensor
<
AddressSpaceEnum
::
Global
,
true
>
(
a_src_desc
,
p_a_img
)
)
;
const
auto
a_dst
=
make_tensor
(
a_dst_desc
,
p_a_mtx
);
auto
a_dst
=
tp
(
make_tensor
<
AddressSpaceEnum
::
Global
,
true
>
(
a_dst_desc
,
p_a_mtx
)
)
;
const
auto
num_gemmg
=
a_gemmg_gemmm_gemmk_
c_wis_
lengths
[
0
];
const
auto
num_gemmm
=
a_gemmg_gemmm_gemmk_
c_wis_
lengths
[
1
];
const
auto
num_gemmk
=
a_gemmg_gemmm_gemmk_
c_wis_
lengths
[
2
];
const
auto
num_gemmg
=
a_gemmg_gemmm_gemmk_lengths
[
0
];
const
auto
num_gemmm
=
a_gemmg_gemmm_gemmk_lengths
[
1
];
const
auto
num_gemmk
=
a_gemmg_gemmm_gemmk_lengths
[
2
];
const
auto
id_block
=
get_block_1d_id
();
const
auto
id_block
=
tp
.
get_block_1d_id
();
const
auto
num_tile_m
=
num_gemmm
/
MPerTile
;
const
auto
num_tile_k
=
num_gemmk
/
KPerTile
;
const
auto
num_tile_m
=
num_gemmm
/
k
MPerTile
;
const
auto
num_tile_k
=
num_gemmk
/
k
KPerTile
;
const
auto
block2tile
=
tp
(
make_cluster_descriptor
(
make_tuple
(
num_tile_m
,
num_tile_k
)));
const
auto
id_tile
=
block2tile
.
CalculateBotto
n
Index
(
id_block
);
const
auto
id_tile
=
block2tile
.
CalculateBotto
m
Index
(
make_tuple
(
id_block
)
)
;
const
auto
id_tile_m
=
id_tile
.
At
<
0
>
();
const
auto
id_tile_k
=
id_tile
.
At
<
1
>
();
#if 0
// operation-based syntax: per-oeration solution strategy
// operation here is data movement
auto copier = make_copier(a_src,
a_dst,
make_tuple(1, MPerTile, KPerTile),
make_tuple(0, id_tile_m * MPerTile, id_tile_k * KPerTile),
copy_strategy);
for(ck::index_t id_gemmg = 0; id_gemmg < num_gemmg; id_gemmg++)
{
copier();
const
auto
id_tile_m
=
id_tile
[
I0
];
const
auto
id_tile_k
=
id_tile
[
I1
];
copier.move_src_window(make_tuple(1, 0, 0));
copier.move_dst_window(make_tuple(1, 0, 0));
}
#else
#if 1
// data-based syntax: per-data solution strategy
auto
window_a_src
=
make_window
(
a_src
,
make_tuple
(
1
,
MPerTile
,
KPerTile
),
...
...
@@ -105,13 +106,17 @@ struct Im2Col
window_a_src
+=
make_tuple
(
1
,
0
,
0
);
window_a_dst
+=
make_tuple
(
1
,
0
,
0
);
}
#else
#endif
}
};
int
main
()
{
ck
::
index_t
NumDimSpatial
=
2
;
using
DataType
=
float
;
constexpr
ck
::
index_t
NumDimSpatial
=
2
;
ck
::
index_t
G
=
32
;
ck
::
index_t
N
=
256
;
ck
::
index_t
K
=
192
;
...
...
@@ -165,20 +170,20 @@ int main()
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
input_right_pads
{
1
,
1
};
// matrix
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
in_mtx_lengths
{
G
,
G
*
Ho
*
Wo
,
C
*
Y
*
X
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
in_mtx_strides
{
0
,
0
,
1
};
std
::
array
<
ck
::
index_t
,
3
>
in_mtx_lengths
{
G
,
G
*
Ho
*
Wo
,
C
*
Y
*
X
};
std
::
array
<
ck
::
index_t
,
3
>
in_mtx_strides
{
0
,
0
,
1
};
std
::
partial_sum
(
rbegin
(
in_mtx_lengths
),
std
::
prev
(
rend
(
in_mtx_lengths
)),
std
::
next
(
rbegin
(
in_mtx_strides
)),
std
::
multiplies
<>
{});
DeviceMem
in
(
sizeof
(
In
DataType
)
*
G
*
N
*
Hi
*
Wi
*
C
);
DeviceMem
in_mtx
(
sizeof
(
In
DataType
)
*
G
*
N
*
Ho
*
Wo
*
C
*
Y
*
X
);
DeviceMem
in
(
sizeof
(
DataType
)
*
G
*
N
*
Hi
*
Wi
*
C
);
DeviceMem
in_mtx
(
sizeof
(
DataType
)
*
G
*
N
*
Ho
*
Wo
*
C
*
Y
*
X
);
launch
(
HelloWorld
{},
1
,
launch
(
Im2Col
<
2
,
ck
::
tensor_layout
::
convolution
::
GNHWC
,
float
,
128
,
128
>
{},
1
,
256
,
in_lengths
,
in_strides
,
wei_lengths
,
...
...
@@ -193,8 +198,8 @@ int main()
in_mtx_lengths
,
in_mtx_strides
,
//
in
.
GetDeviceBuffer
(),
in_mtx
.
GetDeviceBuffer
());
static_cast
<
DataType
*>
(
in
.
GetDeviceBuffer
()
)
,
static_cast
<
DataType
*>
(
in_mtx
.
GetDeviceBuffer
())
)
;
return
0
;
}
example/91_tile_program/tile_program.hpp
View file @
fe2ea9b6
...
...
@@ -65,6 +65,14 @@ struct TileProgram
{
return
arg_
.
pull
<
T
>
();
}
__host__
static
ck
::
index_t
get_block_1d_id
()
{
return
-
1
;
}
__host__
static
ck
::
index_t
get_grid_size
()
{
return
-
1
;
}
__device__
static
ck
::
index_t
get_block_1d_id
()
{
return
ck
::
get_block_1d_id
();
}
__device__
static
ck
::
index_t
get_grid_size
()
{
return
ck
::
get_grid_size
();
}
};
template
<
typename
Program
,
typename
...
Xs
>
...
...
include/ck/tensor/tensor.hpp
0 → 100644
View file @
fe2ea9b6
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
namespace
ck
{
template
<
AddressSpaceEnum
AddressSpace
,
bool
InvalidElementUseNumericalZeroValue
,
typename
T
,
typename
TensorDesc
>
struct
Tensor
{
static
constexpr
AddressSpaceEnum
kAdressSpace_
=
AddressSpace
;
static
constexpr
bool
kInvalidElementUseNumericalZeroValue_
=
InvalidElementUseNumericalZeroValue
;
__host__
__device__
constexpr
Tensor
()
:
buf_
{
nullptr
,
0
},
desc_
{}
{}
__host__
__device__
constexpr
Tensor
(
T
*
p_data
,
TensorDesc
desc
)
:
buf_
{
p_data
,
desc
.
GetElementSpaceSize
()},
desc_
{
desc
}
{
}
__host__
__device__
constexpr
Tensor
(
T
*
p_data
,
TensorDesc
desc
,
T
invalid_element_value
)
:
buf_
{
p_data
,
desc
.
GetElementSpaceSize
(),
invalid_element_value
},
desc_
{
desc
}
{
}
DynamicBuffer
<
AddressSpace
,
T
,
typename
TensorDesc
::
ElementSpaceSizeType
,
InvalidElementUseNumericalZeroValue
>
buf_
;
TensorDesc
desc_
;
};
template
<
AddressSpaceEnum
AddressSpace
,
bool
InvalidElementUseNumericalZeroValue
,
typename
T
,
typename
TensorDesc
>
__host__
__device__
constexpr
auto
make_tensor
(
const
TensorDesc
&
desc
,
T
*
p_data
)
{
return
Tensor
<
AddressSpace
,
InvalidElementUseNumericalZeroValue
,
T
,
remove_cvref_t
<
TensorDesc
>>
{
p_data
,
desc
};
}
template
<
typename
OldTensor
,
typename
NewTransforms
,
typename
NewLowerDimensionOldVisibleIdss
,
typename
NewUpperDimensionNewVisibleIdss
>
__host__
__device__
constexpr
auto
transform_tensor
(
const
OldTensor
&
old_tensor
,
const
NewTransforms
&
new_transforms
,
NewLowerDimensionOldVisibleIdss
,
NewUpperDimensionNewVisibleIdss
)
{
const
auto
new_desc
=
transform_tensor
(
old_tensor
.
desc_
,
new_transforms
,
NewLowerDimensionOldVisibleIdss
{},
NewUpperDimensionNewVisibleIdss
{});
return
Tensor
<
OldTensor
::
kAddressSpace_
,
OldTensor
::
kInvalidElementUseNumericalZeroValue
,
typename
OldTensor
::
DataType
,
remove_cvref_t
<
decltype
(
new_desc
)
>>
{
old_tensor
.
buf_
.
p_data_
,
new_desc
};
}
}
// namespace ck
include/ck/tensor_description/tensor_descriptor.hpp
View file @
fe2ea9b6
...
...
@@ -26,6 +26,8 @@ template <typename Transforms,
typename
ElementSpaceSize
>
struct
TensorDescriptor
{
using
ElementSpaceSizeType
=
ElementSpaceSize
;
// TODO make these private
__host__
__device__
static
constexpr
index_t
GetNumOfTransform
()
{
return
Transforms
::
Size
();
}
...
...
include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp
View file @
fe2ea9b6
...
...
@@ -24,7 +24,7 @@ struct TransformConvFwdToGemm
typename
std
::
enable_if
<
NDimSpatial
==
1
&&
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
GNWC
>,
bool
>::
type
=
false
>
static
auto
__host__
__device__
static
auto
MakeADescriptor_M_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
/* a_g_n_c_wis_strides */
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
...
...
@@ -119,7 +119,7 @@ struct TransformConvFwdToGemm
typename
std
::
enable_if
<
NDimSpatial
==
2
&&
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
GNHWC
>,
bool
>::
type
=
false
>
static
auto
__host__
__device__
static
auto
MakeADescriptor_M_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
/* a_g_n_c_wis_strides */
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
...
...
@@ -230,7 +230,7 @@ struct TransformConvFwdToGemm
typename
std
::
enable_if
<
NDimSpatial
==
3
&&
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
GNDHWC
>,
bool
>::
type
=
false
>
static
auto
__host__
__device__
static
auto
MakeADescriptor_M_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
/* a_g_n_c_wis_strides */
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
...
...
@@ -363,7 +363,7 @@ struct TransformConvFwdToGemm
(
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
G_NW_C
>
||
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
NWGC
>
),
bool
>::
type
=
false
>
static
auto
__host__
__device__
static
auto
MakeADescriptor_M_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
...
...
@@ -475,7 +475,7 @@ struct TransformConvFwdToGemm
NDimSpatial
==
2
&&
(
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
G_NHW_C
>
||
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
NHWGC
>
),
bool
>::
type
=
false
>
static
auto
__host__
__device__
static
auto
MakeADescriptor_M_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
...
...
@@ -603,7 +603,7 @@ struct TransformConvFwdToGemm
NDimSpatial
==
3
&&
(
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
G_NDHW_C
>
||
is_same_v
<
ALayout
,
tensor_layout
::
convolution
::
NDHWGC
>
),
bool
>::
type
=
false
>
static
auto
__host__
__device__
static
auto
MakeADescriptor_M_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
...
...
@@ -754,7 +754,7 @@ struct TransformConvFwdToGemm
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
GKYXC
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
GKZYXC
>
,
bool
>::
type
=
false
>
static
auto
__host__
__device__
static
auto
MakeBDescriptor_N_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
/* b_g_k_c_xs_strides */
)
{
...
...
@@ -779,7 +779,8 @@ struct TransformConvFwdToGemm
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
KYXGC
>
||
is_same_v
<
BLayout
,
tensor_layout
::
convolution
::
KZYXGC
>
,
bool
>::
type
=
false
>
static
auto
MakeBDescriptor_N_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
__host__
__device__
static
auto
MakeBDescriptor_N_K
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_strides
)
{
const
index_t
K
=
b_g_k_c_xs_lengths
[
1
];
...
...
@@ -809,7 +810,7 @@ struct TransformConvFwdToGemm
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
GNHWK
>
||
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
GNDHWK
>
,
bool
>::
type
=
false
>
static
auto
__host__
__device__
static
auto
MakeCDescriptor_M_N
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
c_g_n_k_wos_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
/* c_g_n_k_wos_strides */
)
{
...
...
@@ -834,7 +835,8 @@ struct TransformConvFwdToGemm
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
NHWGK
>
||
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
NDHWGK
>
,
bool
>::
type
=
false
>
static
auto
MakeCDescriptor_M_N
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
c_g_n_k_wos_lengths
,
__host__
__device__
static
auto
MakeCDescriptor_M_N
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
c_g_n_k_wos_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
c_g_n_k_wos_strides
)
{
const
index_t
N
=
c_g_n_k_wos_lengths
[
1
];
...
...
@@ -858,7 +860,7 @@ struct TransformConvFwdToGemm
typename
std
::
enable_if
<
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
GK
>
||
is_same_v
<
CLayout
,
tensor_layout
::
convolution
::
G_K
>
,
bool
>::
type
=
false
>
static
auto
__host__
__device__
static
auto
MakeCDescriptor_M_N
(
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
c_g_n_k_wos_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
/* c_g_n_k_wos_strides */
)
{
...
...
include/ck/utility/dynamic_buffer.hpp
View file @
fe2ea9b6
...
...
@@ -26,7 +26,7 @@ struct DynamicBuffer
T
*
p_data_
;
ElementSpaceSize
element_space_size_
;
T
invalid_element_value_
=
T
{
0
};
remove_cvref_t
<
T
>
invalid_element_value_
=
T
{
0
};
__host__
__device__
constexpr
DynamicBuffer
(
T
*
p_data
,
ElementSpaceSize
element_space_size
)
:
p_data_
{
p_data
},
element_space_size_
{
element_space_size
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment