Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
612539c7
Commit
612539c7
authored
Jun 22, 2023
by
Bartlomiej Kocot
Browse files
Support NHWGC conv2d_bwd_weight
parent
3b18f1e3
Changes
32
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
621 additions
and
106 deletions
+621
-106
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp
...conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp
+45
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp
..._conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp
+47
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp
..._conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp
+47
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp
...v3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp
+35
-35
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp
...nv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp
+35
-35
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp
...nv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp
+35
-35
profiler/README.md
profiler/README.md
+43
-0
profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp
...include/profiler/profile_grouped_conv_bwd_weight_impl.hpp
+6
-0
profiler/src/profile_grouped_conv_bwd_weight.cpp
profiler/src/profile_grouped_conv_bwd_weight.cpp
+21
-0
test/grouped_convnd_bwd_weight/CMakeLists.txt
test/grouped_convnd_bwd_weight/CMakeLists.txt
+3
-1
test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp
...uped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp
+124
-0
test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface.cpp
...d_bwd_weight/test_grouped_convnd_bwd_weight_interface.cpp
+180
-0
No files found.
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp
0 → 100644
View file @
612539c7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "device_grouped_conv2d_bwd_weight_xdl_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void
add_device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_f32_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdWeight
<
2
,
NHWGC
,
GKYXC
,
NHWGK
,
BF16
,
F32
,
BF16
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv2d_bwd_weight_xdl_c_shuffle_bf16_instances
<
NHWGC
,
GKYXC
,
NHWGK
,
ConvBwdWeightDefault
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv2d_bwd_weight_xdl_c_shuffle_bf16_instances
<
NHWGC
,
GKYXC
,
NHWGK
,
ConvBwdWeightFilter1x1Stride1Pad0
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp
0 → 100644
View file @
612539c7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "device_grouped_conv2d_bwd_weight_xdl_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void
add_device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdWeight
<
2
,
NHWGC
,
GKYXC
,
NHWGK
,
F16
,
F16
,
F16
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv2d_bwd_weight_xdl_c_shuffle_f16_default_instances
<
NHWGC
,
GKYXC
,
NHWGK
,
ConvBwdWeightDefault
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv2d_bwd_weight_xdl_c_shuffle_f16_default_instances
<
NHWGC
,
GKYXC
,
NHWGK
,
ConvBwdWeightFilter1x1Stride1Pad0
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp
0 → 100644
View file @
612539c7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "device_grouped_conv2d_bwd_weight_xdl_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void
add_device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdWeight
<
2
,
NHWGC
,
GKYXC
,
NHWGK
,
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv2d_bwd_weight_xdl_c_shuffle_f32_default_instances
<
NHWGC
,
GKYXC
,
NHWGK
,
ConvBwdWeightDefault
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv2d_bwd_weight_xdl_c_shuffle_f32_default_instances
<
NHWGC
,
GKYXC
,
NHWGK
,
ConvBwdWeightFilter1x1Stride1Pad0
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp
View file @
612539c7
This diff is collapsed.
Click to expand it.
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp
View file @
612539c7
This diff is collapsed.
Click to expand it.
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp
View file @
612539c7
This diff is collapsed.
Click to expand it.
profiler/README.md
View file @
612539c7
...
...
@@ -141,3 +141,46 @@ avg_time: 0.768321
tflops: 86.6679
GB/s: 127.947
```
## Profile grouped convolution backward weight kernels
```bash
# arg1: tensor operation (grouped_conv_bwd_data: Grouped Convolution Backward Data)
# arg2: data type (0: Input fp32, Weight fp32, Output fp32
# 1: Input fp16, Weight fp16, Output fp16
# 2: Input bf16, Weight fp32, Output bf16)
# arg3: tensor layout (0: Input[G, N, C, Hi, Wi], Weight[G, K, C, Y, X], Output[G, N, K, Ho, Wo]
# 1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, N, Ho, Wo, K]
# 2: Input[N, Hi, Wi, G, C], Weight[G, K, Y, X, C], Output[N, Ho, Wo, G, K]
# arg4: verification (0: no, 1: yes)
# arg5: initialization (0: no init, 1: integer value, 2: decimal value)
# arg6: print tensor value (0: no; 1: yes)
# arg7: time kernel (0: no, 1: yes)
# Following arguments (depending on number of spatial dims):
# Number of spatial dimensions (1=Conv1d, 2=Conv2d, 3=Conv3d)
# G, N, K, C,
# <filter spatial dimensions>, (ie Y, X for 2D)
# <input image spatial dimensions>, (ie Hi, Wi for 2D)
# <strides>, (ie Sy, Sx for 2D)
# <dilations>, (ie Dy, Dx for 2D)
# <left padding>, (ie LeftPy, LeftPx for 2D)
# <right padding>, (ie RightPy, RightPx for 2D)
# SplitK
################ op datatype layout verify init log time Ndims G N K C Y X Hi Wi Sy Sx Dy Dx LeftPy LeftPx RightPy RightPx SplitK
./bin/ckProfiler grouped_conv_bwd_data 1 0 1 1 0 1 2 32 256 256 512 3 3 28 28 1 1 1 1 1 0 0 0 1
```
Result (MI100, FP16, GNHWC_GKYXC_GNHWK)
```
input: dim 5, lengths {32, 512, 1024, 28, 28}, strides {411041792, 802816, 1, 28672, 1024}
weight: dim 5, lengths {32, 512, 1024, 3, 3}, strides {4718592, 9216, 1, 3072, 1024}
output: dim 5, lengths {32, 512, 512, 26, 26}, strides {177209344, 346112, 1, 13312, 512}
....
Best configuration parameters:
name: DeviceGroupedConvBwdWeight_Xdl_CShuffle<256, 256, 128, 4, Default, 8, 4, 2, 8, 4, 8, 2, 1, 1, 8>
avg_time: 68.5216
tflops: 95.337
GB/s: 69.2301
```
Note: This kernel use atomic add, this will cause output buffer to be accumulated multiple times, causing verification failure. To work around it, do not use CK's own timer and do verification at the same time.
profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp
View file @
612539c7
...
...
@@ -139,6 +139,8 @@ bool profile_grouped_conv_bwd_weight_impl(int do_verification,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_spatial_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
filter_spatial_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
output_spatial_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
input_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
output_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
{};
...
...
@@ -149,6 +151,8 @@ bool profile_grouped_conv_bwd_weight_impl(int do_verification,
range_copy
(
conv_param
.
input_spatial_lengths_
,
begin
(
input_spatial_lengths
));
range_copy
(
conv_param
.
filter_spatial_lengths_
,
begin
(
filter_spatial_lengths
));
range_copy
(
conv_param
.
output_spatial_lengths_
,
begin
(
output_spatial_lengths
));
range_copy
(
in_g_n_c_wis_desc
.
GetStrides
(),
begin
(
input_strides
));
range_copy
(
out_g_n_k_wos_desc
.
GetStrides
(),
begin
(
output_strides
));
range_copy
(
conv_param
.
conv_filter_strides_
,
begin
(
conv_filter_strides
));
range_copy
(
conv_param
.
conv_filter_dilations_
,
begin
(
conv_filter_dilations
));
range_copy
(
conv_param
.
input_left_pads_
,
begin
(
input_left_pads
));
...
...
@@ -167,6 +171,8 @@ bool profile_grouped_conv_bwd_weight_impl(int do_verification,
input_spatial_lengths
,
filter_spatial_lengths
,
output_spatial_lengths
,
input_strides
,
output_strides
,
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
...
...
profiler/src/profile_grouped_conv_bwd_weight.cpp
View file @
612539c7
...
...
@@ -15,6 +15,7 @@ enum struct ConvLayout
{
GNCHW_GKCYX_GNKHW
,
// 0
GNHWC_GKYXC_GNHWK
,
// 1
NHWGC_GKYXC_NHWGK
,
// 2
};
enum
struct
ConvDataType
...
...
@@ -37,6 +38,8 @@ static void print_helper_msg()
"N, K, Ho, Wo]
\n
"
<<
" 1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, "
"N, Ho, Wo, K]
\n
"
<<
" 2: Input[N, Hi, Wi, G, C], Weight[G, K, Y, X, C], Output[N, "
"Ho, Wo, G, K]
\n
"
<<
"arg4: verification (0: no, 1: yes)
\n
"
<<
"arg5: initialization (0: no init, 1: integer value, 2: decimal value)
\n
"
<<
"arg6: print tensor value (0: no; 1: yes)
\n
"
...
...
@@ -82,6 +85,7 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
using
GNWC
=
ck
::
tensor_layout
::
convolution
::
GNWC
;
using
GNHWC
=
ck
::
tensor_layout
::
convolution
::
GNHWC
;
using
NHWGC
=
ck
::
tensor_layout
::
convolution
::
NHWGC
;
using
GNDHWC
=
ck
::
tensor_layout
::
convolution
::
GNDHWC
;
using
GKXC
=
ck
::
tensor_layout
::
convolution
::
GKXC
;
...
...
@@ -90,6 +94,7 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
using
GNWK
=
ck
::
tensor_layout
::
convolution
::
GNWK
;
using
GNHWK
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
NHWGK
=
ck
::
tensor_layout
::
convolution
::
NHWGK
;
using
GNDHWK
=
ck
::
tensor_layout
::
convolution
::
GNDHWK
;
constexpr
auto
I1
=
ck
::
Number
<
1
>
{};
...
...
@@ -157,6 +162,22 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
return
profile
(
I2
,
GNHWC
{},
GKYXC
{},
GNHWK
{},
BF16
{},
F32
{},
BF16
{});
}
}
else
if
(
num_dim_spatial
==
2
&&
layout
==
ConvLayout
::
NHWGC_GKYXC_NHWGK
)
{
if
(
data_type
==
ConvDataType
::
F32_F32_F32
)
{
return
profile
(
I2
,
NHWGC
{},
GKYXC
{},
NHWGK
{},
F32
{},
F32
{},
F32
{});
}
else
if
(
data_type
==
ConvDataType
::
F16_F16_F16
)
{
return
profile
(
I2
,
NHWGC
{},
GKYXC
{},
NHWGK
{},
F16
{},
F16
{},
F16
{});
}
else
if
(
data_type
==
ConvDataType
::
BF16_F32_BF16
)
{
// fp32 atomic add is used for weight tensor in bf16 kernel
return
profile
(
I2
,
NHWGC
{},
GKYXC
{},
NHWGK
{},
BF16
{},
F32
{},
BF16
{});
}
}
else
if
(
num_dim_spatial
==
3
&&
layout
==
ConvLayout
::
GNHWC_GKYXC_GNHWK
)
{
if
(
data_type
==
ConvDataType
::
F32_F32_F32
)
...
...
test/grouped_convnd_bwd_weight/CMakeLists.txt
View file @
612539c7
...
...
@@ -2,8 +2,10 @@ list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
add_gtest_executable
(
test_grouped_convnd_bwd_weight grouped_convnd_bwd_weight.cpp
)
add_gtest_executable
(
test_grouped_convnd_bwd_weight
test_
grouped_convnd_bwd_weight.cpp
)
target_link_libraries
(
test_grouped_convnd_bwd_weight PRIVATE utility device_grouped_conv1d_bwd_weight_instance device_grouped_conv2d_bwd_weight_instance device_grouped_conv3d_bwd_weight_instance
)
add_gtest_executable
(
test_grouped_convnd_bwd_weight_interface test_grouped_convnd_bwd_weight_interface.cpp
)
target_link_libraries
(
test_grouped_convnd_bwd_weight_interface PRIVATE utility device_grouped_conv1d_bwd_weight_instance device_grouped_conv2d_bwd_weight_instance device_grouped_conv3d_bwd_weight_instance
)
set
(
target 1
)
endif
()
endforeach
()
\ No newline at end of file
test/grouped_convnd_bwd_weight/grouped_convnd_bwd_weight.cpp
→
test/grouped_convnd_bwd_weight/
test_
grouped_convnd_bwd_weight.cpp
View file @
612539c7
...
...
@@ -9,55 +9,88 @@
#include <gtest/gtest.h>
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "profiler/profile_grouped_conv_bwd_weight_impl.hpp"
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeight
:
public
::
testing
::
Test
{
protected:
using
DataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
InDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
WeiDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
OutDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
InLayout
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
WeiLayout
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
using
OutLayout
=
std
::
tuple_element_t
<
5
,
Tuple
>
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
ck
::
index_t
split_k
{
2
};
template
<
ck
::
index_t
NDimSpatial
>
void
Run
()
{
EXPECT_FALSE
(
conv_params
.
empty
());
bool
pass
=
true
;
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
EXPECT_FALSE
(
conv_params
.
empty
());
pass
=
ck
::
profiler
::
profile_grouped_conv_bwd_weight_impl
<
NDimSpatial
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
GNWC
,
ck
::
tensor_layout
::
convolution
::
GNHWC
,
ck
::
tensor_layout
::
convolution
::
GNDHWC
>>
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
GKXC
,
ck
::
tensor_layout
::
convolution
::
GKYXC
,
ck
::
tensor_layout
::
convolution
::
GKZYXC
>>
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
GNWK
,
ck
::
tensor_layout
::
convolution
::
GNHWK
,
ck
::
tensor_layout
::
convolution
::
GNDHWK
>>
,
DataType
,
DataType
,
DataType
>
(
true
,
// do_verification
1
,
// init_method: integer value
false
,
// do_log
false
,
// time_kernel
param
,
split_k
);
EXPECT_TRUE
(
pass
);
pass
=
pass
&&
ck
::
profiler
::
profile_grouped_conv_bwd_weight_impl
<
NDimSpatial
,
InLayout
,
WeiLayout
,
OutLayout
,
InDataType
,
WeiDataType
,
OutDataType
>
(
true
,
// do_verification
1
,
// init_method: integer value
false
,
// do_log
false
,
// time_kernel
param
,
split_k
);
}
EXPECT_TRUE
(
pass
);
}
};
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
float
>
,
std
::
tuple
<
ck
::
half_t
>
,
std
::
tuple
<
ck
::
bhalf_t
>>
;
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeight
,
KernelTypes
);
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeight1d
:
public
TestGroupedConvndBwdWeight
<
Tuple
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeight2d
:
public
TestGroupedConvndBwdWeight
<
Tuple
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeight3d
:
public
TestGroupedConvndBwdWeight
<
Tuple
>
{
};
using
namespace
ck
::
tensor_layout
::
convolution
;
using
KernelTypes1d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
float
,
float
,
GNWC
,
GKXC
,
GNWK
>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
GNWC
,
GKXC
,
GNWK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
GNWC
,
GKXC
,
GNWK
>>
;
using
KernelTypes2d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
float
,
float
,
GNHWC
,
GKYXC
,
GNHWK
>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
GNHWC
,
GKYXC
,
GNHWK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
GNHWC
,
GKYXC
,
GNHWK
>
,
std
::
tuple
<
float
,
float
,
float
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
NHWGC
,
GKYXC
,
NHWGK
>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
float
,
float
,
GNDHWC
,
GKZYXC
,
GNDHWK
>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
>>
;
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeight1d
,
KernelTypes1d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeight2d
,
KernelTypes2d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeight3d
,
KernelTypes3d
);
TYPED_TEST
(
TestGroupedConvndBwdWeight
,
Test1D
)
TYPED_TEST
(
TestGroupedConvndBwdWeight
1d
,
Test1D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
1
},
{
14
},
{
2
},
{
1
},
{
0
},
{
0
}});
...
...
@@ -66,7 +99,7 @@ TYPED_TEST(TestGroupedConvndBwdWeight, Test1D)
this
->
template
Run
<
1
>();
}
TYPED_TEST
(
TestGroupedConvndBwdWeight
,
Test2D
)
TYPED_TEST
(
TestGroupedConvndBwdWeight
2d
,
Test2D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
...
...
@@ -78,7 +111,7 @@ TYPED_TEST(TestGroupedConvndBwdWeight, Test2D)
this
->
template
Run
<
2
>();
}
TYPED_TEST
(
TestGroupedConvndBwdWeight
,
Test3D
)
TYPED_TEST
(
TestGroupedConvndBwdWeight
3d
,
Test3D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
...
...
test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface.cpp
0 → 100644
View file @
612539c7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <tuple>
#include <vector>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include <gtest/gtest.h>
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
ConvolutionBackwardWeightSpecialization
=
ck
::
tensor_operation
::
device
::
ConvolutionBackwardWeightSpecialization
;
static
constexpr
auto
ConvBwdWeightDefault
=
ConvolutionBackwardWeightSpecialization
::
Default
;
static
constexpr
auto
Filter1x1Stride1Pad0
=
ConvolutionBackwardWeightSpecialization
::
Filter1x1Stride1Pad0
;
template
<
typename
Tuple
,
ConvolutionBackwardWeightSpecialization
ConvSpec
>
class
TestGroupedConvndBwdWeight
:
public
::
testing
::
Test
{
protected:
static
constexpr
ck
::
index_t
NDimSpatial
=
2
;
using
InLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
WeiLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
OutLayout
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
// clang-format off
using
GroupedConvBwdWeightDeviceInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvBwdWeight_Xdl_CShuffle
//##########| Num| InLayout| WeiLayout| OutLayout| InData| WeiData| OutData| AccData| In| Wei| Out| ConvBackward| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransfer| CBlockTransfer|
//##########| Dim| | | | Type| Type| Type| Type| Elementwise| Elementwise| Elementwise| Weight| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| ClusterLengths| ScalarPerVector|
//##########| Spatial| | | | | | | | Operation| Operation| Operation| Specialization| | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| MBlock_MPerBlock| NWaveNPerXdl|
//##########| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | NBlock_NPerBlock| |
<
NDimSpatial
,
InLayout
,
WeiLayout
,
OutLayout
,
F16
,
F16
,
F16
,
F32
,
PassThrough
,
PassThrough
,
PassThrough
,
ConvSpec
,
128
,
32
,
128
,
4
,
8
,
32
,
32
,
1
,
2
,
S
<
1
,
4
,
4
,
8
>
,
S
<
0
,
3
,
1
,
2
>
,
S
<
0
,
2
,
1
,
3
>
,
2
,
8
,
1
,
true
,
S
<
1
,
4
,
16
,
2
>
,
S
<
0
,
3
,
1
,
2
>
,
S
<
0
,
2
,
1
,
3
>
,
2
,
8
,
4
,
true
,
1
,
1
,
S
<
1
,
32
,
1
,
4
>
,
8
>
;
// clang-format on
ck
::
utils
::
conv
::
ConvParam
conv_param
;
ck
::
index_t
split_k
{
2
};
template
<
ck
::
index_t
NDimSpatial
>
bool
Run
()
{
const
auto
in_g_n_c_wis_desc
=
ck
::
utils
::
conv
::
make_input_host_tensor_descriptor_g_n_c_wis_packed
<
InLayout
>
(
conv_param
);
const
auto
wei_g_k_c_xs_desc
=
ck
::
utils
::
conv
::
make_weight_host_tensor_descriptor_g_k_c_xs_packed
<
WeiLayout
>
(
conv_param
);
const
auto
out_g_n_k_wos_desc
=
ck
::
utils
::
conv
::
make_output_host_tensor_descriptor_g_n_k_wos_packed
<
OutLayout
>
(
conv_param
);
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_spatial_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
filter_spatial_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
output_spatial_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
input_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
output_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
{};
auto
range_copy
=
[](
const
auto
&
from
,
auto
to
)
{
std
::
copy
(
begin
(
from
),
end
(
from
),
to
);
};
range_copy
(
conv_param
.
input_spatial_lengths_
,
begin
(
input_spatial_lengths
));
range_copy
(
conv_param
.
filter_spatial_lengths_
,
begin
(
filter_spatial_lengths
));
range_copy
(
conv_param
.
output_spatial_lengths_
,
begin
(
output_spatial_lengths
));
range_copy
(
in_g_n_c_wis_desc
.
GetStrides
(),
begin
(
input_strides
));
range_copy
(
out_g_n_k_wos_desc
.
GetStrides
(),
begin
(
output_strides
));
range_copy
(
conv_param
.
conv_filter_strides_
,
begin
(
conv_filter_strides
));
range_copy
(
conv_param
.
conv_filter_dilations_
,
begin
(
conv_filter_dilations
));
range_copy
(
conv_param
.
input_left_pads_
,
begin
(
input_left_pads
));
range_copy
(
conv_param
.
input_right_pads_
,
begin
(
input_right_pads
));
auto
conv
=
GroupedConvBwdWeightDeviceInstance
{};
auto
argument
=
conv
.
MakeArgument
(
nullptr
,
nullptr
,
nullptr
,
conv_param
.
G_
,
conv_param
.
N_
,
conv_param
.
K_
,
conv_param
.
C_
,
input_spatial_lengths
,
filter_spatial_lengths
,
output_spatial_lengths
,
input_strides
,
output_strides
,
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
input_right_pads
,
PassThrough
{},
PassThrough
{},
PassThrough
{},
split_k
);
return
conv
.
IsSupportedArgument
(
argument
);
}
};
using
GNHWC
=
ck
::
tensor_layout
::
convolution
::
GNHWC
;
using
NHWGC
=
ck
::
tensor_layout
::
convolution
::
NHWGC
;
using
GKYXC
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
GNHWK
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
NHWGK
=
ck
::
tensor_layout
::
convolution
::
NHWGK
;
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
NHWGK
,
GKYXC
,
NHWGC
>>
;
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeightDefault
:
public
TestGroupedConvndBwdWeight
<
Tuple
,
ConvBwdWeightDefault
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeightFilter1x1
:
public
TestGroupedConvndBwdWeight
<
Tuple
,
Filter1x1Stride1Pad0
>
{
};
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeightDefault
,
KernelTypes
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeightFilter1x1
,
KernelTypes
);
TYPED_TEST
(
TestGroupedConvndBwdWeightFilter1x1
,
SpecializationCheck
)
{
// Check filter 3,3 instead of 1,1
this
->
conv_param
=
{
2
,
2
,
4
,
192
,
192
,
{
3
,
3
},
{
28
,
28
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
bool
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// Check strides 2,2 instead of 1,1
this
->
conv_param
=
{
2
,
2
,
4
,
192
,
192
,
{
1
,
1
},
{
28
,
28
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// Check with pad
this
->
conv_param
=
{
2
,
2
,
4
,
192
,
192
,
{
1
,
1
},
{
28
,
28
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// Supported version
this
->
conv_param
=
{
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_TRUE
(
is_supported
);
}
TYPED_TEST
(
TestGroupedConvndBwdWeightDefault
,
VectorLoadCheck
)
{
// vector load for A
this
->
conv_param
=
{
2
,
2
,
128
,
129
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
bool
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// vector load for B, E, Ds
this
->
conv_param
=
{
2
,
2
,
128
,
128
,
257
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment