Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
2edac9f1
"megatron/vscode:/vscode.git/clone" did not exist on "c52c9dbf3e928ce7fbeda2b0681ac15792fc0ff5"
Commit
2edac9f1
authored
May 30, 2024
by
Bartlomiej Kocot
Browse files
Integrate universal gemm with conv bwd data
parent
34f3dfdd
Changes
50
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
780 additions
and
97 deletions
+780
-97
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_mem_inter_instance.cpp
...bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_mem_inter_instance.cpp
+51
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_mem_intra_instance.cpp
...bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_mem_intra_instance.cpp
+51
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp
...wd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp
+51
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp
...wd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp
+51
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instance.cpp
...bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instance.cpp
+51
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instance.cpp
...bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instance.cpp
+51
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instance.cpp
...bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instance.cpp
+51
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instance.cpp
...bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instance.cpp
+51
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt
...ation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt
+22
-7
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/comp/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_comp_instance.cpp
..._bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_comp_instance.cpp
+15
-15
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/comp/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_comp_instance.cpp
...d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_comp_instance.cpp
+15
-15
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/comp/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_comp_instance.cpp
...d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_comp_instance.cpp
+15
-15
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/comp/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp
..._bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp
+15
-15
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/comp/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instance.cpp
...d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instance.cpp
+15
-15
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/comp/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instance.cpp
...d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instance.cpp
+15
-15
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_mem_inter_instance.cpp
...data_xdl_gndhwc_gkzyxc_gndhwk_bf16_mem_inter_instance.cpp
+52
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_mem_intra_instance.cpp
...data_xdl_gndhwc_gkzyxc_gndhwk_bf16_mem_intra_instance.cpp
+52
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_mem_inter_instance.cpp
..._data_xdl_gndhwc_gkzyxc_gndhwk_f16_mem_inter_instance.cpp
+52
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_mem_intra_instance.cpp
..._data_xdl_gndhwc_gkzyxc_gndhwk_f16_mem_intra_instance.cpp
+52
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_mem_inter_instance.cpp
..._data_xdl_gndhwc_gkzyxc_gndhwk_f32_mem_inter_instance.cpp
+52
-0
No files found.
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_mem_inter_instance.cpp
0 → 100644
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[g, n, hi, wi, c] * wei[g, k, y, x, c] = in[g, n, ho, wo, k]
void
add_device_grouped_conv2d_bwd_data_xdl_gnhwk_gkyxc_gnhwc_f32_mem_inter_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
2
,
GNHWK
,
GKYXC
,
Empty_Tuple
,
GNHWC
,
F32
,
F32
,
Empty_Tuple
,
F32
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f32_mem_instances
<
2
,
GNHWK
,
GKYXC
,
Empty_Tuple
,
GNHWC
,
ConvBwdDataDefault
,
Interwave
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f32_mem_instances
<
2
,
GNHWK
,
GKYXC
,
Empty_Tuple
,
GNHWC
,
ConvBwdDataFilter1x1Stride1Pad0
,
Interwave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_mem_intra_instance.cpp
0 → 100644
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[g, n, hi, wi, c] * wei[g, k, y, x, c] = in[g, n, ho, wo, k]
void
add_device_grouped_conv2d_bwd_data_xdl_gnhwk_gkyxc_gnhwc_f32_mem_intra_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
2
,
GNHWK
,
GKYXC
,
Empty_Tuple
,
GNHWC
,
F32
,
F32
,
Empty_Tuple
,
F32
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f32_mem_instances
<
2
,
GNHWK
,
GKYXC
,
Empty_Tuple
,
GNHWC
,
ConvBwdDataDefault
,
Intrawave
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f32_mem_instances
<
2
,
GNHWK
,
GKYXC
,
Empty_Tuple
,
GNHWC
,
ConvBwdDataFilter1x1Stride1Pad0
,
Intrawave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp
0 → 100644
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[n, hi, wi, g, c] * wei[g, k, y, x, c] = in[n, ho, wo, g, k]
void
add_device_grouped_conv2d_bwd_data_xdl_nhwgk_gkyxc_nhwgc_bf16_mem_inter_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
BF16
,
BF16
,
Empty_Tuple
,
BF16
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bf16_mem_instances
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
ConvBwdDataDefault
,
Interwave
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bf16_mem_instances
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
ConvBwdDataFilter1x1Stride1Pad0
,
Interwave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp
0 → 100644
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[n, hi, wi, g, c] * wei[g, k, y, x, c] = in[n, ho, wo, g, k]
void
add_device_grouped_conv2d_bwd_data_xdl_nhwgk_gkyxc_nhwgc_bf16_mem_intra_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
BF16
,
BF16
,
Empty_Tuple
,
BF16
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bf16_mem_instances
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
ConvBwdDataDefault
,
Intrawave
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bf16_mem_instances
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
ConvBwdDataFilter1x1Stride1Pad0
,
Intrawave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instance.cpp
0 → 100644
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[n, hi, wi, g, c] * wei[g, k, y, x, c] = in[n, ho, wo, g, k]
void
add_device_grouped_conv2d_bwd_data_xdl_nhwgk_gkyxc_nhwgc_f16_mem_inter_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
F16
,
F16
,
Empty_Tuple
,
F16
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f16_mem_instances
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
ConvBwdDataDefault
,
Interwave
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f16_mem_instances
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
ConvBwdDataFilter1x1Stride1Pad0
,
Interwave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instance.cpp
0 → 100644
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[n, hi, wi, g, c] * wei[g, k, y, x, c] = in[n, ho, wo, g, k]
void
add_device_grouped_conv2d_bwd_data_xdl_nhwgk_gkyxc_nhwgc_f16_mem_intra_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
F16
,
F16
,
Empty_Tuple
,
F16
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f16_mem_instances
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
ConvBwdDataDefault
,
Intrawave
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f16_mem_instances
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
ConvBwdDataFilter1x1Stride1Pad0
,
Intrawave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instance.cpp
0 → 100644
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[n, hi, wi, g, c] * wei[g, k, y, x, c] = in[n, ho, wo, g, k]
void
add_device_grouped_conv2d_bwd_data_xdl_nhwgk_gkyxc_nhwgc_f32_mem_inter_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
F32
,
F32
,
Empty_Tuple
,
F32
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f32_mem_instances
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
ConvBwdDataDefault
,
Interwave
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f32_mem_instances
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
ConvBwdDataFilter1x1Stride1Pad0
,
Interwave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/mem/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instance.cpp
0 → 100644
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[n, hi, wi, g, c] * wei[g, k, y, x, c] = in[n, ho, wo, g, k]
void
add_device_grouped_conv2d_bwd_data_xdl_nhwgk_gkyxc_nhwgc_f32_mem_intra_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
F32
,
F32
,
Empty_Tuple
,
F32
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f32_mem_instances
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
ConvBwdDataDefault
,
Intrawave
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f32_mem_instances
<
2
,
NHWGK
,
GKYXC
,
Empty_Tuple
,
NHWGC
,
ConvBwdDataFilter1x1Stride1Pad0
,
Intrawave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt
View file @
2edac9f1
# ONLY XDL_AND_WMMA_KERNELS
set
(
GROUPED_CONV3D_BWD_DATA
xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp
xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp
xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp
xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
xdl/comp/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_comp_instance.cpp
xdl/comp/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_comp_instance.cpp
xdl/comp/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_comp_instance.cpp
xdl/comp/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instance.cpp
xdl/comp/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp
xdl/comp/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instance.cpp
xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_mem_inter_instance.cpp
xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_mem_inter_instance.cpp
xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_mem_inter_instance.cpp
xdl/mem/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instance.cpp
xdl/mem/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp
xdl/mem/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instance.cpp
xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_mem_intra_instance.cpp
xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_mem_intra_instance.cpp
xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_mem_intra_instance.cpp
xdl/mem/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instance.cpp
xdl/mem/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp
xdl/mem/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instance.cpp
wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp
wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp
...
...
@@ -17,7 +32,7 @@ set(GROUPED_CONV3D_BWD_DATA
if
((
DTYPES MATCHES
"fp8"
AND DTYPES MATCHES
"bf8"
AND DTYPES MATCHES
"fp16"
)
OR NOT DEFINED DTYPES
)
list
(
APPEND GROUPED_CONV3D_BWD_DATA
xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp
)
xdl/
mem/
device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_
mem_
instance.cpp
)
endif
()
add_instance_library
(
device_grouped_conv3d_bwd_data_instance
${
GROUPED_CONV3D_BWD_DATA
}
)
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp
→
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/
comp/
device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_
comp_
instance.cpp
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_
comp_
instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
@@ -10,7 +10,7 @@ namespace device {
namespace
instance
{
// Compilation parameters for out[g, n, di, hi, wi, c] * wei[g, k, z, y, x, c] = in[g, n, do, ho,
// wo, k]
void
add_device_grouped_conv3d_bwd_data_xdl_gndhwk_gkzyxc_gndhwc_bf16_instances
(
void
add_device_grouped_conv3d_bwd_data_xdl_gndhwk_gkzyxc_gndhwc_bf16_
comp_
instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
GNDHWK
,
GKZYXC
,
...
...
@@ -27,21 +27,21 @@ void add_device_grouped_conv3d_bwd_data_xdl_gndhwk_gkzyxc_gndhwc_bf16_instances(
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bf16_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataDefault
>
{});
device_grouped_conv_bwd_data_xdl_bf16_
comp_
instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataDefault
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bf16_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataFilter1x1Stride1Pad0
>
{});
device_grouped_conv_bwd_data_xdl_bf16_
comp_
instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataFilter1x1Stride1Pad0
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp
→
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/
comp/
device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_
comp_
instance.cpp
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_
comp_
instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
@@ -10,7 +10,7 @@ namespace device {
namespace
instance
{
// Compilation parameters for out[g, n, di, hi, wi, c] * wei[g, k, z, y, x, c] = in[g, n, do, ho,
// wo, k]
void
add_device_grouped_conv3d_bwd_data_xdl_gndhwk_gkzyxc_gndhwc_f16_instances
(
void
add_device_grouped_conv3d_bwd_data_xdl_gndhwk_gkzyxc_gndhwc_f16_
comp_
instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
GNDHWK
,
GKZYXC
,
...
...
@@ -27,21 +27,21 @@ void add_device_grouped_conv3d_bwd_data_xdl_gndhwk_gkzyxc_gndhwc_f16_instances(
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f16_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataDefault
>
{});
device_grouped_conv_bwd_data_xdl_f16_
comp_
instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataDefault
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f16_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataFilter1x1Stride1Pad0
>
{});
device_grouped_conv_bwd_data_xdl_f16_
comp_
instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataFilter1x1Stride1Pad0
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp
→
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/
comp/
device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_
comp_
instance.cpp
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_
comp_
instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
@@ -10,7 +10,7 @@ namespace device {
namespace
instance
{
// Compilation parameters for out[g, n, di, hi, wi, c] * wei[g, k, z, y, x, c] = in[g, n, do, ho,
// wo, k]
void
add_device_grouped_conv3d_bwd_data_xdl_gndhwk_gkzyxc_gndhwc_f32_instances
(
void
add_device_grouped_conv3d_bwd_data_xdl_gndhwk_gkzyxc_gndhwc_f32_
comp_
instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
GNDHWK
,
GKZYXC
,
...
...
@@ -27,21 +27,21 @@ void add_device_grouped_conv3d_bwd_data_xdl_gndhwk_gkzyxc_gndhwc_f32_instances(
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f32_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataDefault
>
{});
device_grouped_conv_bwd_data_xdl_f32_
comp_
instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataDefault
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f32_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataFilter1x1Stride1Pad0
>
{});
device_grouped_conv_bwd_data_xdl_f32_
comp_
instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataFilter1x1Stride1Pad0
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
→
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/
comp/
device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_
comp_
instance.cpp
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_
comp_
instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
@@ -10,7 +10,7 @@ namespace device {
namespace
instance
{
// Compilation parameters for out[n, di, hi, wi, g, c] * wei[g, k, z, y, x, c] = in[n, do, ho, wo,
// g, k]
void
add_device_grouped_conv3d_bwd_data_xdl_ndhwgk_gkzyxc_ndhwgc_bf16_instances
(
void
add_device_grouped_conv3d_bwd_data_xdl_ndhwgk_gkzyxc_ndhwgc_bf16_
comp_
instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
NDHWGK
,
GKZYXC
,
...
...
@@ -27,21 +27,21 @@ void add_device_grouped_conv3d_bwd_data_xdl_ndhwgk_gkzyxc_ndhwgc_bf16_instances(
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bf16_instances
<
3
,
NDHWGK
,
GKZYXC
,
Empty_Tuple
,
NDHWGC
,
ConvBwdDataDefault
>
{});
device_grouped_conv_bwd_data_xdl_bf16_
comp_
instances
<
3
,
NDHWGK
,
GKZYXC
,
Empty_Tuple
,
NDHWGC
,
ConvBwdDataDefault
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bf16_instances
<
3
,
NDHWGK
,
GKZYXC
,
Empty_Tuple
,
NDHWGC
,
ConvBwdDataFilter1x1Stride1Pad0
>
{});
device_grouped_conv_bwd_data_xdl_bf16_
comp_
instances
<
3
,
NDHWGK
,
GKZYXC
,
Empty_Tuple
,
NDHWGC
,
ConvBwdDataFilter1x1Stride1Pad0
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
→
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/
comp/
device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_
comp_
instance.cpp
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_
comp_
instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
@@ -10,7 +10,7 @@ namespace device {
namespace
instance
{
// Compilation parameters for out[n, di, hi, wi, g, c] * wei[g, k, z, y, x, c] = in[n, do, ho, wo,
// g, k]
void
add_device_grouped_conv3d_bwd_data_xdl_ndhwgk_gkzyxc_ndhwgc_f16_instances
(
void
add_device_grouped_conv3d_bwd_data_xdl_ndhwgk_gkzyxc_ndhwgc_f16_
comp_
instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
NDHWGK
,
GKZYXC
,
...
...
@@ -27,21 +27,21 @@ void add_device_grouped_conv3d_bwd_data_xdl_ndhwgk_gkzyxc_ndhwgc_f16_instances(
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f16_instances
<
3
,
NDHWGK
,
GKZYXC
,
Empty_Tuple
,
NDHWGC
,
ConvBwdDataDefault
>
{});
device_grouped_conv_bwd_data_xdl_f16_
comp_
instances
<
3
,
NDHWGK
,
GKZYXC
,
Empty_Tuple
,
NDHWGC
,
ConvBwdDataDefault
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f16_instances
<
3
,
NDHWGK
,
GKZYXC
,
Empty_Tuple
,
NDHWGC
,
ConvBwdDataFilter1x1Stride1Pad0
>
{});
device_grouped_conv_bwd_data_xdl_f16_
comp_
instances
<
3
,
NDHWGK
,
GKZYXC
,
Empty_Tuple
,
NDHWGC
,
ConvBwdDataFilter1x1Stride1Pad0
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
→
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/
comp/
device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_
comp_
instance.cpp
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_
comp_
instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
@@ -10,7 +10,7 @@ namespace device {
namespace
instance
{
// Compilation parameters for out[n, di, hi, wi, g, c] * wei[g, k, z, y, x, c] = in[n, do, ho, wo,
// g, k]
void
add_device_grouped_conv3d_bwd_data_xdl_ndhwgk_gkzyxc_ndhwgc_f32_instances
(
void
add_device_grouped_conv3d_bwd_data_xdl_ndhwgk_gkzyxc_ndhwgc_f32_
comp_
instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
NDHWGK
,
GKZYXC
,
...
...
@@ -27,21 +27,21 @@ void add_device_grouped_conv3d_bwd_data_xdl_ndhwgk_gkzyxc_ndhwgc_f32_instances(
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f32_instances
<
3
,
NDHWGK
,
GKZYXC
,
Empty_Tuple
,
NDHWGC
,
ConvBwdDataDefault
>
{});
device_grouped_conv_bwd_data_xdl_f32_
comp_
instances
<
3
,
NDHWGK
,
GKZYXC
,
Empty_Tuple
,
NDHWGC
,
ConvBwdDataDefault
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f32_instances
<
3
,
NDHWGK
,
GKZYXC
,
Empty_Tuple
,
NDHWGC
,
ConvBwdDataFilter1x1Stride1Pad0
>
{});
device_grouped_conv_bwd_data_xdl_f32_
comp_
instances
<
3
,
NDHWGK
,
GKZYXC
,
Empty_Tuple
,
NDHWGC
,
ConvBwdDataFilter1x1Stride1Pad0
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_mem_inter_instance.cpp
0 → 100644
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[g, n, di, hi, wi, c] * wei[g, k, z, y, x, c] = in[g, n, do, ho,
// wo, k]
void
add_device_grouped_conv3d_bwd_data_xdl_gndhwk_gkzyxc_gndhwc_bf16_mem_inter_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
BF16
,
BF16
,
Empty_Tuple
,
BF16
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bf16_mem_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataDefault
,
Interwave
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bf16_mem_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataFilter1x1Stride1Pad0
,
Interwave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_mem_intra_instance.cpp
0 → 100644
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[g, n, di, hi, wi, c] * wei[g, k, z, y, x, c] = in[g, n, do, ho,
// wo, k]
void
add_device_grouped_conv3d_bwd_data_xdl_gndhwk_gkzyxc_gndhwc_bf16_mem_intra_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
BF16
,
BF16
,
Empty_Tuple
,
BF16
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bf16_mem_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataDefault
,
Intrawave
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bf16_mem_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataFilter1x1Stride1Pad0
,
Intrawave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_mem_inter_instance.cpp
0 → 100644
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[g, n, di, hi, wi, c] * wei[g, k, z, y, x, c] = in[g, n, do, ho,
// wo, k]
void
add_device_grouped_conv3d_bwd_data_xdl_gndhwk_gkzyxc_gndhwc_f16_mem_inter_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
F16
,
F16
,
Empty_Tuple
,
F16
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f16_mem_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataDefault
,
Interwave
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f16_mem_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataFilter1x1Stride1Pad0
,
Interwave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_mem_intra_instance.cpp
0 → 100644
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[g, n, di, hi, wi, c] * wei[g, k, z, y, x, c] = in[g, n, do, ho,
// wo, k]
void
add_device_grouped_conv3d_bwd_data_xdl_gndhwk_gkzyxc_gndhwc_f16_mem_intra_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
F16
,
F16
,
Empty_Tuple
,
F16
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f16_mem_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataDefault
,
Intrawave
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f16_mem_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataFilter1x1Stride1Pad0
,
Intrawave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/mem/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_mem_inter_instance.cpp
0 → 100644
View file @
2edac9f1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[g, n, di, hi, wi, c] * wei[g, k, z, y, x, c] = in[g, n, do, ho,
// wo, k]
void
add_device_grouped_conv3d_bwd_data_xdl_gndhwk_gkzyxc_gndhwc_f32_mem_inter_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
F32
,
F32
,
Empty_Tuple
,
F32
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f32_mem_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataDefault
,
Interwave
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_f32_mem_instances
<
3
,
GNDHWK
,
GKZYXC
,
Empty_Tuple
,
GNDHWC
,
ConvBwdDataFilter1x1Stride1Pad0
,
Interwave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment