Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
c5335964
"vscode:/vscode.git/clone" did not exist on "abcda5cd298a323a44061e14729278b9a67f09d2"
Commit
c5335964
authored
Nov 02, 2023
by
root
Browse files
added pk_cvt
parents
cc9d2a84
f27ea94e
Changes
65
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
718 additions
and
38 deletions
+718
-38
library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwgc_2d_instance.cpp
...umn_to_image/device_column_to_image_nhwgc_2d_instance.cpp
+62
-0
library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nwgc_1d_instance.cpp
...lumn_to_image/device_column_to_image_nwgc_1d_instance.cpp
+61
-0
library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt
...ary/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt
+6
-4
library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp
.../device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp
+1
-1
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeLists.txt
.../grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeLists.txt
+7
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
...eadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
+55
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
...leadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
+55
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
...leadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
+55
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
...eadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
+54
-0
library/src/tensor_operation_instance/gpu/image_to_column/CMakeLists.txt
...sor_operation_instance/gpu/image_to_column/CMakeLists.txt
+6
-3
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_gndhwc_3d_instance.cpp
...e_to_column/device_image_to_column_gndhwc_3d_instance.cpp
+5
-5
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_gnhwc_2d_instance.cpp
...ge_to_column/device_image_to_column_gnhwc_2d_instance.cpp
+5
-5
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_gnwc_1d_instance.cpp
...age_to_column/device_image_to_column_gnwc_1d_instance.cpp
+5
-5
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_ndhwgc_3d_instance.cpp
...e_to_column/device_image_to_column_ndhwgc_3d_instance.cpp
+62
-0
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwgc_2d_instance.cpp
...ge_to_column/device_image_to_column_nhwgc_2d_instance.cpp
+62
-0
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nwgc_1d_instance.cpp
...age_to_column/device_image_to_column_nwgc_1d_instance.cpp
+61
-0
profiler/README.md
profiler/README.md
+2
-1
profiler/include/profiler/profile_conv_tensor_rearrange_impl.hpp
...r/include/profiler/profile_conv_tensor_rearrange_impl.hpp
+26
-5
profiler/include/profiler/profile_gemm_impl.hpp
profiler/include/profiler/profile_gemm_impl.hpp
+2
-2
profiler/src/profile_conv_tensor_rearrange.cpp
profiler/src/profile_conv_tensor_rearrange.cpp
+126
-7
No files found.
library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwgc_2d_instance.cpp
0 → 100644
View file @
c5335964
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
using
namespace
ck
::
conv_tensor_rearrange_op
;
void
add_device_column_to_image_nhwgc_2d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
NHWGC
,
BF16
,
BF16
,
ColumnToImage
>>>&
instances
)
{
#ifdef CK_ENABLE_BF16
add_device_operation_instances
(
instances
,
device_column_to_image_bf16_instances
<
2
,
NHWGC
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_column_to_image_nhwgc_2d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
NHWGC
,
F16
,
F16
,
ColumnToImage
>>>&
instances
)
{
#ifdef CK_ENABLE_FP16
add_device_operation_instances
(
instances
,
device_column_to_image_f16_instances
<
2
,
NHWGC
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_column_to_image_nhwgc_2d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
NHWGC
,
F32
,
F32
,
ColumnToImage
>>>&
instances
)
{
#ifdef CK_ENABLE_FP32
add_device_operation_instances
(
instances
,
device_column_to_image_f32_instances
<
2
,
NHWGC
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_column_to_image_nhwgc_2d_i8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
NHWGC
,
int8_t
,
int8_t
,
ColumnToImage
>>>&
instances
)
{
#ifdef CK_ENABLE_INT8
add_device_operation_instances
(
instances
,
device_column_to_image_i8_instances
<
2
,
NHWGC
>
{});
#else
ignore
=
instances
;
#endif
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nwgc_1d_instance.cpp
0 → 100644
View file @
c5335964
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
using
namespace
ck
::
conv_tensor_rearrange_op
;
void
add_device_column_to_image_nwgc_1d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
NWGC
,
BF16
,
BF16
,
ColumnToImage
>>>&
instances
)
{
#ifdef CK_ENABLE_BF16
add_device_operation_instances
(
instances
,
device_column_to_image_bf16_instances
<
1
,
NWGC
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_column_to_image_nwgc_1d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
NWGC
,
F16
,
F16
,
ColumnToImage
>>>&
instances
)
{
#ifdef CK_ENABLE_FP16
add_device_operation_instances
(
instances
,
device_column_to_image_f16_instances
<
1
,
NWGC
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_column_to_image_nwgc_1d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
NWGC
,
F32
,
F32
,
ColumnToImage
>>>&
instances
)
{
#ifdef CK_ENABLE_FP32
add_device_operation_instances
(
instances
,
device_column_to_image_f32_instances
<
1
,
NWGC
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_column_to_image_nwgc_1d_i8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
NWGC
,
int8_t
,
int8_t
,
ColumnToImage
>>>&
instances
)
{
#ifdef CK_ENABLE_INT8
add_device_operation_instances
(
instances
,
device_column_to_image_i8_instances
<
1
,
NWGC
>
{});
#else
ignore
=
instances
;
#endif
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt
View file @
c5335964
...
@@ -113,13 +113,15 @@ if (ENABLE_PIPELINE_V2_OPT)
...
@@ -113,13 +113,15 @@ if (ENABLE_PIPELINE_V2_OPT)
CK_EXPERIMENTAL_PIPELINE_V2_IGLP_OPT=1
CK_EXPERIMENTAL_PIPELINE_V2_IGLP_OPT=1
)
)
# TODO: The "-vectorize-slp=false" LLVM option is a workaround to prevent inefficient instruction scheduling
# caused by the SLP Vectorizer. Remove this option after fix the SLP Vectorizer issue.
# layout=NT
# layout=NT
set_source_files_properties
(
device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_opt_instance.cpp PROPERTIES
set_source_files_properties
(
device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_opt_instance.cpp PROPERTIES
COMPILE_OPTIONS
";
;
"
COMPILE_OPTIONS
";
-mllvm;-vectorize-slp=false
"
COMPILE_DEFINITIONS
"
${
WAVES_PER_EU_DEFS
}
;
${
IGLP_OPT_DEFS
}
"
)
COMPILE_DEFINITIONS
"
${
WAVES_PER_EU_DEFS
}
;
${
IGLP_OPT_DEFS
}
"
)
# layout=NN
# layout=NN
set_source_files_properties
(
device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_opt_instance.cpp PROPERTIES
set_source_files_properties
(
device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_opt_instance.cpp PROPERTIES
COMPILE_OPTIONS
";
;
"
COMPILE_OPTIONS
";
-mllvm;-vectorize-slp=false
"
COMPILE_DEFINITIONS
"
${
WAVES_PER_EU_DEFS
}
;
${
IGLP_OPT_DEFS
}
"
)
COMPILE_DEFINITIONS
"
${
WAVES_PER_EU_DEFS
}
;
${
IGLP_OPT_DEFS
}
"
)
# layout=TT
# layout=TT
set_source_files_properties
(
device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_opt_instance.cpp PROPERTIES
set_source_files_properties
(
device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_opt_instance.cpp PROPERTIES
...
...
library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp
View file @
c5335964
...
@@ -28,7 +28,7 @@ using S = ck::Sequence<Is...>;
...
@@ -28,7 +28,7 @@ using S = ck::Sequence<Is...>;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
// static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
// static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
static
constexpr
auto
GemmMNPadding
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNPadding
;
//
static constexpr auto GemmMNPadding = ck::tensor_operation::device::GemmSpecialization::MNPadding;
static
constexpr
auto
GemmMNKPadding
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNKPadding
;
static
constexpr
auto
GemmMNKPadding
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNKPadding
;
using
device_gemm_xdl_splitk_f16_f8_f16_mk_kn_mn_generic_instances
=
std
::
tuple
<
using
device_gemm_xdl_splitk_f16_f8_f16_mk_kn_mn_generic_instances
=
std
::
tuple
<
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeLists.txt
0 → 100644
View file @
c5335964
set
(
GROUPED_CONV3D_FWD_scaleadd_scaleadd_RELU
xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
)
add_instance_library
(
device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance
${
GROUPED_CONV3D_FWD_scaleadd_scaleadd_RELU
}
)
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
0 → 100644
View file @
c5335964
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
BF16
,
BF16
,
ck
::
Tuple
<
BF16
,
BF16
>
,
BF16
,
PassThrough
,
PassThrough
,
ScaleAddScaleAddRelu
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_bf16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwdDefault
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_bf16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1P0
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_bf16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1S1P0
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
0 → 100644
View file @
c5335964
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
F16
,
F16
,
ck
::
Tuple
<
half_t
,
half_t
>
,
F16
,
PassThrough
,
PassThrough
,
ScaleAddScaleAddRelu
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_f16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwdDefault
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_f16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1P0
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_f16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1S1P0
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
0 → 100644
View file @
c5335964
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
F32
,
F32
,
ck
::
Tuple
<
F32
,
F32
>
,
F32
,
PassThrough
,
PassThrough
,
ScaleAddScaleAddRelu
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_f32_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwdDefault
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_f32_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1P0
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_f32_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1S1P0
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
0 → 100644
View file @
c5335964
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
int8_t
,
int8_t
,
ck
::
Tuple
<
F32
,
F32
>
,
int8_t
,
PassThrough
,
PassThrough
,
ScaleAddScaleAddRelu
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_int8_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwdDefault
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_int8_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1P0
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_int8_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1S1P0
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/image_to_column/CMakeLists.txt
View file @
c5335964
add_instance_library
(
device_image_to_column_instance
add_instance_library
(
device_image_to_column_instance
device_image_to_column_nhwc_1d_instance.cpp
device_image_to_column_gnwc_1d_instance.cpp
device_image_to_column_nhwc_2d_instance.cpp
device_image_to_column_gnhwc_2d_instance.cpp
device_image_to_column_nhwc_3d_instance.cpp
device_image_to_column_gndhwc_3d_instance.cpp
device_image_to_column_nwgc_1d_instance.cpp
device_image_to_column_nhwgc_2d_instance.cpp
device_image_to_column_ndhwgc_3d_instance.cpp
)
)
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_
n
hwc_3d_instance.cpp
→
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_
gnd
hwc_3d_instance.cpp
View file @
c5335964
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c)
2018-
2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
...
@@ -11,7 +11,7 @@ namespace instance {
...
@@ -11,7 +11,7 @@ namespace instance {
using
namespace
ck
::
conv_tensor_rearrange_op
;
using
namespace
ck
::
conv_tensor_rearrange_op
;
void
add_device_image_to_column_ndhwc_3d_bf16_instances
(
void
add_device_image_to_column_
g
ndhwc_3d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
BF16
,
BF16
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
BF16
,
BF16
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
@@ -22,7 +22,7 @@ void add_device_image_to_column_ndhwc_3d_bf16_instances(
...
@@ -22,7 +22,7 @@ void add_device_image_to_column_ndhwc_3d_bf16_instances(
#endif
#endif
}
}
void
add_device_image_to_column_ndhwc_3d_f16_instances
(
void
add_device_image_to_column_
g
ndhwc_3d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F16
,
F16
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F16
,
F16
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
@@ -33,7 +33,7 @@ void add_device_image_to_column_ndhwc_3d_f16_instances(
...
@@ -33,7 +33,7 @@ void add_device_image_to_column_ndhwc_3d_f16_instances(
#endif
#endif
}
}
void
add_device_image_to_column_ndhwc_3d_f32_instances
(
void
add_device_image_to_column_
g
ndhwc_3d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F32
,
F32
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F32
,
F32
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
@@ -44,7 +44,7 @@ void add_device_image_to_column_ndhwc_3d_f32_instances(
...
@@ -44,7 +44,7 @@ void add_device_image_to_column_ndhwc_3d_f32_instances(
#endif
#endif
}
}
void
add_device_image_to_column_ndhwc_3d_i8_instances
(
void
add_device_image_to_column_
g
ndhwc_3d_i8_instances
(
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
instances
)
instances
)
...
...
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_2d_instance.cpp
→
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_
g
nhwc_2d_instance.cpp
View file @
c5335964
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c)
2018-
2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
...
@@ -11,7 +11,7 @@ namespace instance {
...
@@ -11,7 +11,7 @@ namespace instance {
using
namespace
ck
::
conv_tensor_rearrange_op
;
using
namespace
ck
::
conv_tensor_rearrange_op
;
void
add_device_image_to_column_nhwc_2d_bf16_instances
(
void
add_device_image_to_column_
g
nhwc_2d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
GNHWC
,
BF16
,
BF16
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
GNHWC
,
BF16
,
BF16
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
@@ -22,7 +22,7 @@ void add_device_image_to_column_nhwc_2d_bf16_instances(
...
@@ -22,7 +22,7 @@ void add_device_image_to_column_nhwc_2d_bf16_instances(
#endif
#endif
}
}
void
add_device_image_to_column_nhwc_2d_f16_instances
(
void
add_device_image_to_column_
g
nhwc_2d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
GNHWC
,
F16
,
F16
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
GNHWC
,
F16
,
F16
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
@@ -33,7 +33,7 @@ void add_device_image_to_column_nhwc_2d_f16_instances(
...
@@ -33,7 +33,7 @@ void add_device_image_to_column_nhwc_2d_f16_instances(
#endif
#endif
}
}
void
add_device_image_to_column_nhwc_2d_f32_instances
(
void
add_device_image_to_column_
g
nhwc_2d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
GNHWC
,
F32
,
F32
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
GNHWC
,
F32
,
F32
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
@@ -44,7 +44,7 @@ void add_device_image_to_column_nhwc_2d_f32_instances(
...
@@ -44,7 +44,7 @@ void add_device_image_to_column_nhwc_2d_f32_instances(
#endif
#endif
}
}
void
add_device_image_to_column_nhwc_2d_i8_instances
(
void
add_device_image_to_column_
g
nhwc_2d_i8_instances
(
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
GNHWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
GNHWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
instances
)
instances
)
...
...
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_n
h
wc_1d_instance.cpp
→
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_
g
nwc_1d_instance.cpp
View file @
c5335964
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c)
2018-
2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
...
@@ -11,7 +11,7 @@ namespace instance {
...
@@ -11,7 +11,7 @@ namespace instance {
using
namespace
ck
::
conv_tensor_rearrange_op
;
using
namespace
ck
::
conv_tensor_rearrange_op
;
void
add_device_image_to_column_nwc_1d_bf16_instances
(
void
add_device_image_to_column_
g
nwc_1d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
BF16
,
BF16
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
BF16
,
BF16
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
@@ -22,7 +22,7 @@ void add_device_image_to_column_nwc_1d_bf16_instances(
...
@@ -22,7 +22,7 @@ void add_device_image_to_column_nwc_1d_bf16_instances(
#endif
#endif
}
}
void
add_device_image_to_column_nwc_1d_f16_instances
(
void
add_device_image_to_column_
g
nwc_1d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F16
,
F16
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F16
,
F16
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
@@ -33,7 +33,7 @@ void add_device_image_to_column_nwc_1d_f16_instances(
...
@@ -33,7 +33,7 @@ void add_device_image_to_column_nwc_1d_f16_instances(
#endif
#endif
}
}
void
add_device_image_to_column_nwc_1d_f32_instances
(
void
add_device_image_to_column_
g
nwc_1d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F32
,
F32
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F32
,
F32
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
@@ -44,7 +44,7 @@ void add_device_image_to_column_nwc_1d_f32_instances(
...
@@ -44,7 +44,7 @@ void add_device_image_to_column_nwc_1d_f32_instances(
#endif
#endif
}
}
void
add_device_image_to_column_nwc_1d_i8_instances
(
void
add_device_image_to_column_
g
nwc_1d_i8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
...
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_ndhwgc_3d_instance.cpp
0 → 100644
View file @
c5335964
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
using
namespace
ck
::
conv_tensor_rearrange_op
;
void
add_device_image_to_column_ndhwgc_3d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
NDHWGC
,
BF16
,
BF16
,
ImageToColumn
>>>&
instances
)
{
#ifdef CK_ENABLE_BF16
add_device_operation_instances
(
instances
,
device_image_to_column_bf16_instances
<
3
,
NDHWGC
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_image_to_column_ndhwgc_3d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
NDHWGC
,
F16
,
F16
,
ImageToColumn
>>>&
instances
)
{
#ifdef CK_ENABLE_FP16
add_device_operation_instances
(
instances
,
device_image_to_column_f16_instances
<
3
,
NDHWGC
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_image_to_column_ndhwgc_3d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
NDHWGC
,
F32
,
F32
,
ImageToColumn
>>>&
instances
)
{
#ifdef CK_ENABLE_FP32
add_device_operation_instances
(
instances
,
device_image_to_column_f32_instances
<
3
,
NDHWGC
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_image_to_column_ndhwgc_3d_i8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
NDHWGC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
instances
)
{
#ifdef CK_ENABLE_INT8
add_device_operation_instances
(
instances
,
device_image_to_column_i8_instances
<
3
,
NDHWGC
>
{});
#else
ignore
=
instances
;
#endif
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwgc_2d_instance.cpp
0 → 100644
View file @
c5335964
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
using
namespace
ck
::
conv_tensor_rearrange_op
;
void
add_device_image_to_column_nhwgc_2d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
NHWGC
,
BF16
,
BF16
,
ImageToColumn
>>>&
instances
)
{
#ifdef CK_ENABLE_BF16
add_device_operation_instances
(
instances
,
device_image_to_column_bf16_instances
<
2
,
NHWGC
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_image_to_column_nhwgc_2d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
NHWGC
,
F16
,
F16
,
ImageToColumn
>>>&
instances
)
{
#ifdef CK_ENABLE_FP16
add_device_operation_instances
(
instances
,
device_image_to_column_f16_instances
<
2
,
NHWGC
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_image_to_column_nhwgc_2d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
NHWGC
,
F32
,
F32
,
ImageToColumn
>>>&
instances
)
{
#ifdef CK_ENABLE_FP32
add_device_operation_instances
(
instances
,
device_image_to_column_f32_instances
<
2
,
NHWGC
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_image_to_column_nhwgc_2d_i8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
NHWGC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
instances
)
{
#ifdef CK_ENABLE_INT8
add_device_operation_instances
(
instances
,
device_image_to_column_i8_instances
<
2
,
NHWGC
>
{});
#else
ignore
=
instances
;
#endif
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nwgc_1d_instance.cpp
0 → 100644
View file @
c5335964
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
using
namespace
ck
::
conv_tensor_rearrange_op
;
void
add_device_image_to_column_nwgc_1d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
NWGC
,
BF16
,
BF16
,
ImageToColumn
>>>&
instances
)
{
#ifdef CK_ENABLE_BF16
add_device_operation_instances
(
instances
,
device_image_to_column_bf16_instances
<
1
,
NWGC
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_image_to_column_nwgc_1d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
NWGC
,
F16
,
F16
,
ImageToColumn
>>>&
instances
)
{
#ifdef CK_ENABLE_FP16
add_device_operation_instances
(
instances
,
device_image_to_column_f16_instances
<
1
,
NWGC
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_image_to_column_nwgc_1d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
NWGC
,
F32
,
F32
,
ImageToColumn
>>>&
instances
)
{
#ifdef CK_ENABLE_FP32
add_device_operation_instances
(
instances
,
device_image_to_column_f32_instances
<
1
,
NWGC
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_image_to_column_nwgc_1d_i8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
NWGC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
instances
)
{
#ifdef CK_ENABLE_INT8
add_device_operation_instances
(
instances
,
device_image_to_column_i8_instances
<
1
,
NWGC
>
{});
#else
ignore
=
instances
;
#endif
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
profiler/README.md
View file @
c5335964
...
@@ -194,7 +194,8 @@ Note: This kernel use atomic add, this will cause output buffer to be accumulate
...
@@ -194,7 +194,8 @@ Note: This kernel use atomic add, this will cause output buffer to be accumulate
# 1: Input fp16, Weight fp16, Output fp16
# 1: Input fp16, Weight fp16, Output fp16
# 2: Input bf16, Weight bf16, Output bf16
# 2: Input bf16, Weight bf16, Output bf16
# 3: Input int8, Weight int8, Output int8)
# 3: Input int8, Weight int8, Output int8)
# arg3: tensor layout (0: Input[N, Hi, Wi, C], Output[N * Ho * Wo, Y * X * C])
# arg3: tensor layout (0: Input[G, N, Hi, Wi, C], Output[G * N * Ho * Wo, Y * X * C],
# 1: Input[N, Hi, Wi, G, C], Output[N * Ho * Wo * G, Y * X * C])
# arg4: verification (0: no, 1: yes)
# arg4: verification (0: no, 1: yes)
# arg5: initialization (0: no init, 1: integer value, 2: decimal value)
# arg5: initialization (0: no init, 1: integer value, 2: decimal value)
# arg6: print tensor value (0: no; 1: yes)
# arg6: print tensor value (0: no; 1: yes)
...
...
profiler/include/profiler/profile_conv_tensor_rearrange_impl.hpp
View file @
c5335964
...
@@ -93,6 +93,26 @@ static auto make_ref_op()
...
@@ -93,6 +93,26 @@ static auto make_ref_op()
}
}
}
}
template
<
typename
InputLayout
>
static
auto
create_gemm_desc
(
const
ck
::
index_t
G
,
const
ck
::
index_t
NDoHoWo
,
const
ck
::
index_t
CZYX
)
{
using
namespace
ck
::
tensor_layout
::
convolution
;
if
constexpr
(
std
::
is_same_v
<
InputLayout
,
GNWC
>
||
std
::
is_same_v
<
InputLayout
,
GNHWC
>
||
std
::
is_same_v
<
InputLayout
,
GNDHWC
>
)
{
return
HostTensorDescriptor
({
G
,
NDoHoWo
,
CZYX
});
}
else
if
constexpr
(
std
::
is_same_v
<
InputLayout
,
NWGC
>
||
std
::
is_same_v
<
InputLayout
,
NHWGC
>
||
std
::
is_same_v
<
InputLayout
,
NDHWGC
>
)
{
return
HostTensorDescriptor
({
G
,
NDoHoWo
,
CZYX
},
{
CZYX
,
CZYX
*
G
,
1
});
}
else
{
throw
std
::
runtime_error
(
"Unsupported layout!"
);
}
}
template
<
index_t
NDimSpatial
,
template
<
index_t
NDimSpatial
,
typename
InputLayout
,
typename
InputLayout
,
typename
InputDataType
,
typename
InputDataType
,
...
@@ -116,13 +136,13 @@ bool profile_conv_tensor_rearrange_impl(int do_verification,
...
@@ -116,13 +136,13 @@ bool profile_conv_tensor_rearrange_impl(int do_verification,
const
auto
image_desc
=
const
auto
image_desc
=
ck
::
utils
::
conv
::
make_input_host_tensor_descriptor_g_n_c_wis_packed
<
InputLayout
>
(
ck
::
utils
::
conv
::
make_input_host_tensor_descriptor_g_n_c_wis_packed
<
InputLayout
>
(
conv_param
);
conv_param
);
const
auto
gemm_desc
=
HostTensorDescriptor
({
NDoHoWo
,
CZYX
}
);
const
auto
gemm_desc
=
create_gemm_desc
<
InputLayout
>
(
conv_param
.
G_
,
NDoHoWo
,
CZYX
);
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_spatial_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_spatial_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
filter_spatial_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
filter_spatial_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
output_spatial_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
output_spatial_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
image_g_n_c_wis_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
image_g_n_c_wis_strides
{};
std
::
array
<
ck
::
index_t
,
2
>
gemm_m_k_strides
{};
std
::
array
<
ck
::
index_t
,
3
>
gemm_
g_
m_k_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
{};
...
@@ -134,7 +154,7 @@ bool profile_conv_tensor_rearrange_impl(int do_verification,
...
@@ -134,7 +154,7 @@ bool profile_conv_tensor_rearrange_impl(int do_verification,
copy
(
conv_param
.
filter_spatial_lengths_
,
filter_spatial_lengths
);
copy
(
conv_param
.
filter_spatial_lengths_
,
filter_spatial_lengths
);
copy
(
conv_param
.
output_spatial_lengths_
,
output_spatial_lengths
);
copy
(
conv_param
.
output_spatial_lengths_
,
output_spatial_lengths
);
copy
(
image_desc
.
GetStrides
(),
image_g_n_c_wis_strides
);
copy
(
image_desc
.
GetStrides
(),
image_g_n_c_wis_strides
);
copy
(
gemm_desc
.
GetStrides
(),
gemm_m_k_strides
);
copy
(
gemm_desc
.
GetStrides
(),
gemm_
g_
m_k_strides
);
copy
(
conv_param
.
conv_filter_strides_
,
conv_filter_strides
);
copy
(
conv_param
.
conv_filter_strides_
,
conv_filter_strides
);
copy
(
conv_param
.
conv_filter_dilations_
,
conv_filter_dilations
);
copy
(
conv_param
.
conv_filter_dilations_
,
conv_filter_dilations
);
copy
(
conv_param
.
input_left_pads_
,
input_left_pads
);
copy
(
conv_param
.
input_left_pads_
,
input_left_pads
);
...
@@ -212,13 +232,14 @@ bool profile_conv_tensor_rearrange_impl(int do_verification,
...
@@ -212,13 +232,14 @@ bool profile_conv_tensor_rearrange_impl(int do_verification,
auto
argument_ptr
=
op_ptr
->
MakeArgumentPointer
(
auto
argument_ptr
=
op_ptr
->
MakeArgumentPointer
(
static_cast
<
InputDataType
*>
(
in_device_buf
.
GetDeviceBuffer
()),
static_cast
<
InputDataType
*>
(
in_device_buf
.
GetDeviceBuffer
()),
static_cast
<
OutputDataType
*>
(
out_device_buf
.
GetDeviceBuffer
()),
static_cast
<
OutputDataType
*>
(
out_device_buf
.
GetDeviceBuffer
()),
conv_param
.
G_
,
conv_param
.
N_
,
conv_param
.
N_
,
conv_param
.
C_
,
conv_param
.
C_
,
input_spatial_lengths
,
input_spatial_lengths
,
filter_spatial_lengths
,
filter_spatial_lengths
,
output_spatial_lengths
,
output_spatial_lengths
,
image_g_n_c_wis_strides
,
image_g_n_c_wis_strides
,
gemm_m_k_strides
,
gemm_
g_
m_k_strides
,
conv_filter_strides
,
conv_filter_strides
,
conv_filter_dilations
,
conv_filter_dilations
,
input_left_pads
,
input_left_pads
,
...
@@ -234,7 +255,7 @@ bool profile_conv_tensor_rearrange_impl(int do_verification,
...
@@ -234,7 +255,7 @@ bool profile_conv_tensor_rearrange_impl(int do_verification,
float
avg_time
=
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
num_btype
=
std
::
size_t
num_btype
=
NDoHoWo
*
CZYX
*
(
sizeof
(
OutputDataType
)
+
sizeof
(
InputDataType
));
conv_param
.
G_
*
NDoHoWo
*
CZYX
*
(
sizeof
(
OutputDataType
)
+
sizeof
(
InputDataType
));
float
gb_per_sec
=
num_btype
/
1.E6
/
avg_time
;
float
gb_per_sec
=
num_btype
/
1.E6
/
avg_time
;
std
::
cout
<<
"Perf: "
<<
std
::
setw
(
10
)
<<
avg_time
<<
" ms, "
<<
gb_per_sec
<<
" GB/s, "
std
::
cout
<<
"Perf: "
<<
std
::
setw
(
10
)
<<
avg_time
<<
" ms, "
<<
gb_per_sec
<<
" GB/s, "
<<
op_name
<<
std
::
endl
;
<<
op_name
<<
std
::
endl
;
...
...
profiler/include/profiler/profile_gemm_impl.hpp
View file @
c5335964
...
@@ -76,8 +76,8 @@ int profile_gemm_impl(int do_verification,
...
@@ -76,8 +76,8 @@ int profile_gemm_impl(int do_verification,
b_k_n
.
GenerateTensorValue
(
GeneratorTensor_2
<
BDataType
>
{
-
5
,
5
});
b_k_n
.
GenerateTensorValue
(
GeneratorTensor_2
<
BDataType
>
{
-
5
,
5
});
break
;
break
;
default:
default:
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_3
<
ADataType
>
{
0.0
,
1.0
});
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_3
<
ADataType
>
{
0.0
,
0.1
});
b_k_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
b_k_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.
0
5
,
0.
0
5
});
}
}
using
AElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
AElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
...
...
profiler/src/profile_conv_tensor_rearrange.cpp
View file @
c5335964
...
@@ -19,7 +19,8 @@ enum struct RearrangeOp
...
@@ -19,7 +19,8 @@ enum struct RearrangeOp
enum
struct
ConvLayout
enum
struct
ConvLayout
{
{
NHWC
,
// 0
GNHWC
,
// 0
NHWGC
,
// 1
};
};
enum
struct
DataType
enum
struct
DataType
...
@@ -42,7 +43,8 @@ static void print_helper_msg()
...
@@ -42,7 +43,8 @@ static void print_helper_msg()
<<
" 1: Input fp16, Weight fp16, Output fp16
\n
"
<<
" 1: Input fp16, Weight fp16, Output fp16
\n
"
<<
" 2: Input bf16, Weight bf16, Output bf16
\n
"
<<
" 2: Input bf16, Weight bf16, Output bf16
\n
"
<<
" 3: Input int8, Weight int8, Output int8)
\n
"
<<
" 3: Input int8, Weight int8, Output int8)
\n
"
<<
"arg3: tensor layout (0: Input[N, Hi, Wi, C], Output[N * Ho * Wo, Y * X * C])
\n
"
<<
"arg3: tensor layout (0: Input[G, N, Hi, Wi, C], Output[G * N * Ho * Wo, Y * X * C],
\n
"
<<
" 1: Input[N, Hi, Wi, G, C], Output[N * Ho * Wo * G, Y * X * C])
\n
"
<<
"arg4: verification (0: no, 1: yes)
\n
"
<<
"arg4: verification (0: no, 1: yes)
\n
"
<<
"arg5: initialization (0: no init, 1: integer value, 2: decimal value)
\n
"
<<
"arg5: initialization (0: no init, 1: integer value, 2: decimal value)
\n
"
<<
"arg6: print tensor value (0: no; 1: yes)
\n
"
<<
"arg6: print tensor value (0: no; 1: yes)
\n
"
...
@@ -114,11 +116,9 @@ int profile_conv_tensor_rearrange(int argc, char* argv[])
...
@@ -114,11 +116,9 @@ int profile_conv_tensor_rearrange(int argc, char* argv[])
return
pass
?
0
:
1
;
return
pass
?
0
:
1
;
};
};
// Image To Column
if
(
rearrange_op
==
RearrangeOp
::
ImageToColumn
)
if
(
rearrange_op
==
RearrangeOp
::
ImageToColumn
)
{
{
// NHWC
if
(
layout
==
ConvLayout
::
GNHWC
)
if
(
layout
==
ConvLayout
::
NHWC
)
{
{
if
(
num_dim_spatial
==
1
)
if
(
num_dim_spatial
==
1
)
{
{
...
@@ -178,11 +178,70 @@ int profile_conv_tensor_rearrange(int argc, char* argv[])
...
@@ -178,11 +178,70 @@ int profile_conv_tensor_rearrange(int argc, char* argv[])
}
}
}
}
}
}
else
if
(
layout
==
ConvLayout
::
NHWGC
)
{
if
(
num_dim_spatial
==
1
)
{
if
(
data_type
==
DataType
::
F32_F32
)
{
return
profile
(
I1
,
NWGC
{},
F32
{},
F32
{},
ImageToColumn
{});
}
else
if
(
data_type
==
DataType
::
F16_F16
)
{
return
profile
(
I1
,
NWGC
{},
F16
{},
F16
{},
ImageToColumn
{});
}
else
if
(
data_type
==
DataType
::
BF16_BF16
)
{
return
profile
(
I1
,
NWGC
{},
BF16
{},
BF16
{},
ImageToColumn
{});
}
else
if
(
data_type
==
DataType
::
INT8_INT8
)
{
return
profile
(
I1
,
NWGC
{},
INT8
{},
INT8
{},
ImageToColumn
{});
}
}
else
if
(
num_dim_spatial
==
2
)
{
if
(
data_type
==
DataType
::
F32_F32
)
{
return
profile
(
I2
,
NHWGC
{},
F32
{},
F32
{},
ImageToColumn
{});
}
else
if
(
data_type
==
DataType
::
F16_F16
)
{
return
profile
(
I2
,
NHWGC
{},
F16
{},
F16
{},
ImageToColumn
{});
}
else
if
(
data_type
==
DataType
::
BF16_BF16
)
{
return
profile
(
I2
,
NHWGC
{},
BF16
{},
BF16
{},
ImageToColumn
{});
}
else
if
(
data_type
==
DataType
::
INT8_INT8
)
{
return
profile
(
I2
,
NHWGC
{},
INT8
{},
INT8
{},
ImageToColumn
{});
}
}
else
if
(
num_dim_spatial
==
3
)
{
if
(
data_type
==
DataType
::
F32_F32
)
{
return
profile
(
I3
,
NDHWGC
{},
F32
{},
F32
{},
ImageToColumn
{});
}
else
if
(
data_type
==
DataType
::
F16_F16
)
{
return
profile
(
I3
,
NDHWGC
{},
F16
{},
F16
{},
ImageToColumn
{});
}
else
if
(
data_type
==
DataType
::
BF16_BF16
)
{
return
profile
(
I3
,
NDHWGC
{},
BF16
{},
BF16
{},
ImageToColumn
{});
}
else
if
(
data_type
==
DataType
::
INT8_INT8
)
{
return
profile
(
I3
,
NDHWGC
{},
INT8
{},
INT8
{},
ImageToColumn
{});
}
}
}
}
}
else
if
(
rearrange_op
==
RearrangeOp
::
ColumnToImage
)
else
if
(
rearrange_op
==
RearrangeOp
::
ColumnToImage
)
{
{
// NHWC
if
(
layout
==
ConvLayout
::
GNHWC
)
if
(
layout
==
ConvLayout
::
NHWC
)
{
{
if
(
num_dim_spatial
==
1
)
if
(
num_dim_spatial
==
1
)
{
{
...
@@ -242,6 +301,66 @@ int profile_conv_tensor_rearrange(int argc, char* argv[])
...
@@ -242,6 +301,66 @@ int profile_conv_tensor_rearrange(int argc, char* argv[])
}
}
}
}
}
}
else
if
(
layout
==
ConvLayout
::
NHWGC
)
{
if
(
num_dim_spatial
==
1
)
{
if
(
data_type
==
DataType
::
F32_F32
)
{
return
profile
(
I1
,
NWGC
{},
F32
{},
F32
{},
ColumnToImage
{});
}
else
if
(
data_type
==
DataType
::
F16_F16
)
{
return
profile
(
I1
,
NWGC
{},
F16
{},
F16
{},
ColumnToImage
{});
}
else
if
(
data_type
==
DataType
::
BF16_BF16
)
{
return
profile
(
I1
,
NWGC
{},
BF16
{},
BF16
{},
ColumnToImage
{});
}
else
if
(
data_type
==
DataType
::
INT8_INT8
)
{
return
profile
(
I1
,
NWGC
{},
INT8
{},
INT8
{},
ColumnToImage
{});
}
}
else
if
(
num_dim_spatial
==
2
)
{
if
(
data_type
==
DataType
::
F32_F32
)
{
return
profile
(
I2
,
NHWGC
{},
F32
{},
F32
{},
ColumnToImage
{});
}
else
if
(
data_type
==
DataType
::
F16_F16
)
{
return
profile
(
I2
,
NHWGC
{},
F16
{},
F16
{},
ColumnToImage
{});
}
else
if
(
data_type
==
DataType
::
BF16_BF16
)
{
return
profile
(
I2
,
NHWGC
{},
BF16
{},
BF16
{},
ColumnToImage
{});
}
else
if
(
data_type
==
DataType
::
INT8_INT8
)
{
return
profile
(
I2
,
NHWGC
{},
INT8
{},
INT8
{},
ColumnToImage
{});
}
}
else
if
(
num_dim_spatial
==
3
)
{
if
(
data_type
==
DataType
::
F32_F32
)
{
return
profile
(
I3
,
NDHWGC
{},
F32
{},
F32
{},
ColumnToImage
{});
}
else
if
(
data_type
==
DataType
::
F16_F16
)
{
return
profile
(
I3
,
NDHWGC
{},
F16
{},
F16
{},
ColumnToImage
{});
}
else
if
(
data_type
==
DataType
::
BF16_BF16
)
{
return
profile
(
I3
,
NDHWGC
{},
BF16
{},
BF16
{},
ColumnToImage
{});
}
else
if
(
data_type
==
DataType
::
INT8_INT8
)
{
return
profile
(
I3
,
NDHWGC
{},
INT8
{},
INT8
{},
ColumnToImage
{});
}
}
}
}
}
std
::
cout
<<
"this data_type & layout is not implemented"
<<
std
::
endl
;
std
::
cout
<<
"this data_type & layout is not implemented"
<<
std
::
endl
;
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment