Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
8c4897d1
Unverified
Commit
8c4897d1
authored
Aug 30, 2023
by
Rostyslav Geyyer
Committed by
GitHub
Aug 30, 2023
Browse files
Merge branch 'develop' into lwpck-756
parents
9ba9ebec
9e86ebd6
Changes
542
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
450 additions
and
280 deletions
+450
-280
test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp
test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp
+1
-1
test/gemm_layernorm/CMakeLists.txt
test/gemm_layernorm/CMakeLists.txt
+2
-0
test/gemm_reduce/CMakeLists.txt
test/gemm_reduce/CMakeLists.txt
+5
-3
test/grouped_convnd_bwd_data/CMakeLists.txt
test/grouped_convnd_bwd_data/CMakeLists.txt
+1
-1
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp
.../grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp
+47
-13
test/grouped_convnd_bwd_weight/CMakeLists.txt
test/grouped_convnd_bwd_weight/CMakeLists.txt
+3
-1
test/grouped_convnd_bwd_weight/grouped_convnd_bwd_weight.cpp
test/grouped_convnd_bwd_weight/grouped_convnd_bwd_weight.cpp
+0
-91
test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp
...uped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp
+140
-0
test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface.cpp
...d_bwd_weight/test_grouped_convnd_bwd_weight_interface.cpp
+179
-0
test/grouped_convnd_fwd/grouped_convnd_fwd.cpp
test/grouped_convnd_fwd/grouped_convnd_fwd.cpp
+15
-1
test/grouped_gemm/CMakeLists.txt
test/grouped_gemm/CMakeLists.txt
+2
-0
test/grouped_gemm/test_grouped_gemm_interface.cpp
test/grouped_gemm/test_grouped_gemm_interface.cpp
+1
-1
test/grouped_gemm/test_grouped_gemm_util.hpp
test/grouped_gemm/test_grouped_gemm_util.hpp
+2
-2
test/normalization/CMakeLists.txt
test/normalization/CMakeLists.txt
+19
-16
test/pool_fwd/CMakeLists.txt
test/pool_fwd/CMakeLists.txt
+2
-8
test/pool_fwd/test_avg_pool2d_fwd.cpp
test/pool_fwd/test_avg_pool2d_fwd.cpp
+0
-56
test/pool_fwd/test_avg_pool3d_fwd.cpp
test/pool_fwd/test_avg_pool3d_fwd.cpp
+12
-6
test/pool_fwd/test_max_pool2d_fwd.cpp
test/pool_fwd/test_max_pool2d_fwd.cpp
+0
-75
test/pool_fwd/test_max_pool3d_fwd.cpp
test/pool_fwd/test_max_pool3d_fwd.cpp
+16
-5
test/pool_fwd/test_pool_fwd_common.hpp
test/pool_fwd/test_pool_fwd_common.hpp
+3
-0
No files found.
test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp
View file @
8c4897d1
...
...
@@ -5,7 +5,7 @@
#include "ck/ck.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_waveletmodel_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_gemm_xdl_waveletmodel_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
...
...
test/gemm_layernorm/CMakeLists.txt
View file @
8c4897d1
...
...
@@ -2,10 +2,12 @@ list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
(
DTYPES MATCHES
"fp16"
OR NOT DEFINED DTYPES
)
add_custom_target
(
test_gemm_layernorm
)
add_gtest_executable
(
test_gemm_add_relu_add_layernorm_fp16 test_gemm_add_relu_add_layernorm_fp16.cpp
)
target_link_libraries
(
test_gemm_add_relu_add_layernorm_fp16 PRIVATE utility device_gemm_add_relu_add_layernorm_instance
)
add_dependencies
(
test_gemm_layernorm test_gemm_add_relu_add_layernorm_fp16
)
set
(
target 1
)
endif
()
endif
()
endforeach
()
test/gemm_reduce/CMakeLists.txt
View file @
8c4897d1
add_test_executable
(
test_gemm_reduce_fp16 gemm_reduce_fp16.cpp
)
target_link_libraries
(
test_gemm_reduce_fp16 PRIVATE utility
)
target_link_libraries
(
test_gemm_reduce_fp16 PRIVATE device_gemm_reduce_instance
)
if
(
DTYPES MATCHES
"fp16"
OR NOT DEFINED DTYPES
)
add_test_executable
(
test_gemm_reduce_fp16 gemm_reduce_fp16.cpp
)
target_link_libraries
(
test_gemm_reduce_fp16 PRIVATE utility
)
target_link_libraries
(
test_gemm_reduce_fp16 PRIVATE device_gemm_reduce_instance
)
endif
()
\ No newline at end of file
test/grouped_convnd_bwd_data/CMakeLists.txt
View file @
8c4897d1
if
(
GPU_TARGETS MATCHES
"gfx908"
OR GPU_TARGETS MATCHES
"gfx90a"
OR GPU_TARGETS MATCHES
"gfx940"
)
add_gtest_executable
(
test_grouped_convnd_bwd_data test_grouped_convnd_bwd_data.cpp
)
target_link_libraries
(
test_grouped_convnd_bwd_data PRIVATE utility device_grouped_conv2d_bwd_data_instance
)
target_link_libraries
(
test_grouped_convnd_bwd_data PRIVATE utility device_grouped_conv2d_bwd_data_instance
device_grouped_conv3d_bwd_data_instance
)
add_gtest_executable
(
test_grouped_convnd_bwd_data_interface test_grouped_convnd_bwd_data_interface.cpp
)
target_link_libraries
(
test_grouped_convnd_bwd_data_interface PRIVATE utility device_grouped_conv2d_bwd_data_instance
)
endif
()
\ No newline at end of file
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp
View file @
8c4897d1
...
...
@@ -46,23 +46,36 @@ class TestGroupedConvndBwdData : public ::testing::Test
}
};
using
GNHWC
=
ck
::
tensor_layout
::
convolution
::
GNHWC
;
using
NHWGC
=
ck
::
tensor_layout
::
convolution
::
NHWGC
;
using
namespace
ck
::
tensor_layout
::
convolution
;
using
GKYXC
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
KernelTypes2d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
ck
::
half_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
float
,
NHWGK
,
GKYXC
,
NHWGC
>
,
std
::
tuple
<
ck
::
half_t
,
NHWGK
,
GKYXC
,
NHWGC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NHWGK
,
GKYXC
,
NHWGC
>>
;
using
GNHWK
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
NHWGK
=
ck
::
tensor_layout
::
convolution
::
NHWGK
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
ck
::
half_t
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
float
,
NDHWGK
,
GKZYXC
,
NDHWGC
>
,
std
::
tuple
<
ck
::
half_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>>
;
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
ck
::
half_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
float
,
NHWGK
,
GKYXC
,
NHWGC
>
,
std
::
tuple
<
ck
::
half_t
,
NHWGK
,
GKYXC
,
NHWGC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NHWGK
,
GKYXC
,
NHWGC
>>
;
TYPED_TEST_SUITE
(
TestGroupedConvndBwdData
,
KernelTypes
);
template
<
typename
Tuple
>
class
TestGroupedConvndBwdData2d
:
public
TestGroupedConvndBwdData
<
Tuple
>
{
};
TYPED_TEST
(
TestGroupedConvndBwdData
,
Test2D
)
template
<
typename
Tuple
>
class
TestGroupedConvndBwdData3d
:
public
TestGroupedConvndBwdData
<
Tuple
>
{
};
TYPED_TEST_SUITE
(
TestGroupedConvndBwdData2d
,
KernelTypes2d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdData3d
,
KernelTypes3d
);
TYPED_TEST
(
TestGroupedConvndBwdData2d
,
Test2D
)
{
this
->
conv_params
.
clear
();
...
...
@@ -74,5 +87,26 @@ TYPED_TEST(TestGroupedConvndBwdData, Test2D)
{
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
32
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
64
,
3
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
1
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
template
Run
<
2
>();
}
TYPED_TEST
(
TestGroupedConvndBwdData3d
,
Test3D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
3
,
2
,
16
,
128
,
256
,
{
1
,
1
,
1
},
{
7
,
7
,
7
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
2
,
128
,
256
,
{
3
,
3
,
3
},
{
14
,
14
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
32
,
128
,
256
,
{
1
,
1
,
1
},
{
3
,
3
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
32
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
64
,
3
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
1
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
template
Run
<
3
>();
}
test/grouped_convnd_bwd_weight/CMakeLists.txt
View file @
8c4897d1
...
...
@@ -2,8 +2,10 @@ list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
add_gtest_executable
(
test_grouped_convnd_bwd_weight grouped_convnd_bwd_weight.cpp
)
add_gtest_executable
(
test_grouped_convnd_bwd_weight
test_
grouped_convnd_bwd_weight.cpp
)
target_link_libraries
(
test_grouped_convnd_bwd_weight PRIVATE utility device_grouped_conv1d_bwd_weight_instance device_grouped_conv2d_bwd_weight_instance device_grouped_conv3d_bwd_weight_instance
)
add_gtest_executable
(
test_grouped_convnd_bwd_weight_interface test_grouped_convnd_bwd_weight_interface.cpp
)
target_link_libraries
(
test_grouped_convnd_bwd_weight_interface PRIVATE utility device_grouped_conv1d_bwd_weight_instance device_grouped_conv2d_bwd_weight_instance device_grouped_conv3d_bwd_weight_instance
)
set
(
target 1
)
endif
()
endforeach
()
\ No newline at end of file
test/grouped_convnd_bwd_weight/grouped_convnd_bwd_weight.cpp
deleted
100644 → 0
View file @
9ba9ebec
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <tuple>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/profile_grouped_conv_bwd_weight_impl.hpp"
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeight
:
public
::
testing
::
Test
{
protected:
using
DataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
ck
::
index_t
split_k
{
2
};
template
<
ck
::
index_t
NDimSpatial
>
void
Run
()
{
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
EXPECT_FALSE
(
conv_params
.
empty
());
pass
=
ck
::
profiler
::
profile_grouped_conv_bwd_weight_impl
<
NDimSpatial
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
GNWC
,
ck
::
tensor_layout
::
convolution
::
GNHWC
,
ck
::
tensor_layout
::
convolution
::
GNDHWC
>>
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
GKXC
,
ck
::
tensor_layout
::
convolution
::
GKYXC
,
ck
::
tensor_layout
::
convolution
::
GKZYXC
>>
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
GNWK
,
ck
::
tensor_layout
::
convolution
::
GNHWK
,
ck
::
tensor_layout
::
convolution
::
GNDHWK
>>
,
DataType
,
DataType
,
DataType
>
(
true
,
// do_verification
1
,
// init_method: integer value
false
,
// do_log
false
,
// time_kernel
param
,
split_k
);
EXPECT_TRUE
(
pass
);
}
}
};
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
float
>
,
std
::
tuple
<
ck
::
half_t
>
,
std
::
tuple
<
ck
::
bhalf_t
>>
;
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeight
,
KernelTypes
);
TYPED_TEST
(
TestGroupedConvndBwdWeight
,
Test1D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
1
},
{
14
},
{
2
},
{
1
},
{
0
},
{
0
}});
this
->
conv_params
.
push_back
({
1
,
2
,
32
,
128
,
256
,
{
3
},
{
28
},
{
1
},
{
1
},
{
1
},
{
1
}});
this
->
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
1
},
{
3
},
{
1
},
{
1
},
{
0
},
{
0
}});
this
->
template
Run
<
1
>();
}
TYPED_TEST
(
TestGroupedConvndBwdWeight
,
Test2D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
2
,
2
,
64
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
4
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
template
Run
<
2
>();
}
TYPED_TEST
(
TestGroupedConvndBwdWeight
,
Test3D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
3
,
2
,
16
,
128
,
256
,
{
1
,
1
,
1
},
{
7
,
7
,
7
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
2
,
128
,
256
,
{
3
,
3
,
3
},
{
14
,
14
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
32
,
128
,
256
,
{
1
,
1
,
1
},
{
3
,
3
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
template
Run
<
3
>();
}
test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp
0 → 100644
View file @
8c4897d1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <tuple>
#include <vector>
#include <gtest/gtest.h>
#include "ck/utility/common_header.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "profiler/profile_grouped_conv_bwd_weight_impl.hpp"
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeight
:
public
::
testing
::
Test
{
protected:
using
InDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
WeiDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
OutDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
InLayout
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
WeiLayout
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
using
OutLayout
=
std
::
tuple_element_t
<
5
,
Tuple
>
;
using
NDimSpatial
=
std
::
tuple_element_t
<
6
,
Tuple
>
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
ck
::
index_t
split_k
{
2
};
void
Run
()
{
EXPECT_FALSE
(
conv_params
.
empty
());
bool
pass
=
true
;
for
(
auto
&
param
:
conv_params
)
{
pass
=
pass
&&
ck
::
profiler
::
profile_grouped_conv_bwd_weight_impl
<
NDimSpatial
{},
InLayout
,
WeiLayout
,
OutLayout
,
InDataType
,
WeiDataType
,
OutDataType
>
(
true
,
// do_verification
1
,
// init_method: integer value
false
,
// do_log
false
,
// time_kernel
param
,
split_k
);
}
EXPECT_TRUE
(
pass
);
}
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeight1d
:
public
TestGroupedConvndBwdWeight
<
Tuple
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeight2d
:
public
TestGroupedConvndBwdWeight
<
Tuple
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeight3d
:
public
TestGroupedConvndBwdWeight
<
Tuple
>
{
};
using
namespace
ck
::
tensor_layout
::
convolution
;
using
KernelTypes1d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
float
,
float
,
GNWC
,
GKXC
,
GNWK
,
ck
::
Number
<
1
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
GNWC
,
GKXC
,
GNWK
,
ck
::
Number
<
1
>>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
GNWC
,
GKXC
,
GNWK
,
ck
::
Number
<
1
>>>
;
using
KernelTypes2d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
float
,
float
,
GNHWC
,
GKYXC
,
GNHWK
,
ck
::
Number
<
2
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
GNHWC
,
GKYXC
,
GNHWK
,
ck
::
Number
<
2
>>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
GNHWC
,
GKYXC
,
GNHWK
,
ck
::
Number
<
2
>>
,
std
::
tuple
<
float
,
float
,
float
,
NHWGC
,
GKYXC
,
NHWGK
,
ck
::
Number
<
2
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
NHWGC
,
GKYXC
,
NHWGK
,
ck
::
Number
<
2
>>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
NHWGC
,
GKYXC
,
NHWGK
,
ck
::
Number
<
2
>>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
float
,
float
,
GNDHWC
,
GKZYXC
,
GNDHWK
,
ck
::
Number
<
3
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
,
ck
::
Number
<
3
>>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
,
ck
::
Number
<
3
>>
,
std
::
tuple
<
float
,
float
,
float
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ck
::
Number
<
3
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ck
::
Number
<
3
>>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ck
::
Number
<
3
>>>
;
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeight1d
,
KernelTypes1d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeight2d
,
KernelTypes2d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeight3d
,
KernelTypes3d
);
TYPED_TEST
(
TestGroupedConvndBwdWeight1d
,
Test1D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
1
},
{
14
},
{
2
},
{
1
},
{
0
},
{
0
}});
this
->
conv_params
.
push_back
({
1
,
2
,
32
,
128
,
256
,
{
3
},
{
28
},
{
1
},
{
1
},
{
1
},
{
1
}});
this
->
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
1
},
{
3
},
{
1
},
{
1
},
{
0
},
{
0
}});
this
->
conv_params
.
push_back
({
1
,
1
,
1
,
1
,
32
,
{
3
},
{
32
},
{
1
},
{
1
},
{
1
},
{
1
}});
this
->
conv_params
.
push_back
({
1
,
1
,
1
,
64
,
3
,
{
3
},
{
32
},
{
1
},
{
1
},
{
1
},
{
1
}});
this
->
conv_params
.
push_back
({
1
,
1
,
1
,
1
,
1
,
{
3
},
{
32
},
{
1
},
{
1
},
{
1
},
{
1
}});
this
->
Run
();
}
TYPED_TEST
(
TestGroupedConvndBwdWeight2d
,
Test2D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
2
,
2
,
64
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
4
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
32
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
64
,
3
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
1
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
Run
();
}
TYPED_TEST
(
TestGroupedConvndBwdWeight3d
,
Test3D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
3
,
2
,
16
,
128
,
256
,
{
1
,
1
,
1
},
{
7
,
7
,
7
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
2
,
128
,
256
,
{
3
,
3
,
3
},
{
14
,
14
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
32
,
128
,
256
,
{
1
,
1
,
1
},
{
3
,
3
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
32
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
64
,
3
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
1
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
Run
();
}
test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface.cpp
0 → 100644
View file @
8c4897d1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <tuple>
#include <vector>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include <gtest/gtest.h>
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
ConvolutionBackwardWeightSpecialization
=
ck
::
tensor_operation
::
device
::
ConvolutionBackwardWeightSpecialization
;
static
constexpr
auto
ConvBwdWeightDefault
=
ConvolutionBackwardWeightSpecialization
::
Default
;
static
constexpr
auto
Filter1x1Stride1Pad0
=
ConvolutionBackwardWeightSpecialization
::
Filter1x1Stride1Pad0
;
template
<
typename
Tuple
,
ConvolutionBackwardWeightSpecialization
ConvSpec
>
class
TestGroupedConvndBwdWeight
:
public
::
testing
::
Test
{
protected:
static
constexpr
ck
::
index_t
NDimSpatial
=
2
;
using
InLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
WeiLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
OutLayout
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
// clang-format off
using
GroupedConvBwdWeightDeviceInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvBwdWeight_Xdl_CShuffle
//##########| Num| InLayout| WeiLayout| OutLayout| InData| WeiData| OutData| AccData| In| Wei| Out| ConvBackward| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransfer| CBlockTransfer|
//##########| Dim| | | | Type| Type| Type| Type| Elementwise| Elementwise| Elementwise| Weight| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| ClusterLengths| ScalarPerVector|
//##########| Spatial| | | | | | | | Operation| Operation| Operation| Specialization| | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| MBlock_MPerBlock| NWaveNPerXdl|
//##########| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | NBlock_NPerBlock| |
<
NDimSpatial
,
InLayout
,
WeiLayout
,
OutLayout
,
F16
,
F16
,
F16
,
F32
,
PassThrough
,
PassThrough
,
PassThrough
,
ConvSpec
,
128
,
32
,
128
,
4
,
8
,
32
,
32
,
1
,
2
,
S
<
1
,
4
,
4
,
8
>
,
S
<
0
,
3
,
1
,
2
>
,
S
<
0
,
2
,
1
,
3
>
,
2
,
8
,
1
,
true
,
S
<
1
,
4
,
16
,
2
>
,
S
<
0
,
3
,
1
,
2
>
,
S
<
0
,
2
,
1
,
3
>
,
2
,
8
,
4
,
true
,
1
,
1
,
S
<
1
,
32
,
1
,
4
>
,
8
>
;
// clang-format on
ck
::
utils
::
conv
::
ConvParam
conv_param
;
ck
::
index_t
split_k
{
2
};
template
<
ck
::
index_t
NDimSpatial
>
bool
Run
()
{
const
auto
in_g_n_c_wis_desc
=
ck
::
utils
::
conv
::
make_input_host_tensor_descriptor_g_n_c_wis_packed
<
InLayout
>
(
conv_param
);
const
auto
wei_g_k_c_xs_desc
=
ck
::
utils
::
conv
::
make_weight_host_tensor_descriptor_g_k_c_xs_packed
<
WeiLayout
>
(
conv_param
);
const
auto
out_g_n_k_wos_desc
=
ck
::
utils
::
conv
::
make_output_host_tensor_descriptor_g_n_k_wos_packed
<
OutLayout
>
(
conv_param
);
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
input_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
filter_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
output_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
input_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
weights_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
output_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
{};
auto
range_copy
=
[](
const
auto
&
from
,
auto
to
)
{
std
::
copy
(
begin
(
from
),
end
(
from
),
to
);
};
range_copy
(
in_g_n_c_wis_desc
.
GetLengths
(),
begin
(
input_lengths
));
range_copy
(
in_g_n_c_wis_desc
.
GetStrides
(),
begin
(
input_strides
));
range_copy
(
wei_g_k_c_xs_desc
.
GetLengths
(),
begin
(
filter_lengths
));
range_copy
(
wei_g_k_c_xs_desc
.
GetStrides
(),
begin
(
weights_strides
));
range_copy
(
out_g_n_k_wos_desc
.
GetLengths
(),
begin
(
output_lengths
));
range_copy
(
out_g_n_k_wos_desc
.
GetStrides
(),
begin
(
output_strides
));
range_copy
(
conv_param
.
conv_filter_strides_
,
begin
(
conv_filter_strides
));
range_copy
(
conv_param
.
conv_filter_dilations_
,
begin
(
conv_filter_dilations
));
range_copy
(
conv_param
.
input_left_pads_
,
begin
(
input_left_pads
));
range_copy
(
conv_param
.
input_right_pads_
,
begin
(
input_right_pads
));
auto
conv
=
GroupedConvBwdWeightDeviceInstance
{};
auto
argument
=
conv
.
MakeArgument
(
nullptr
,
nullptr
,
nullptr
,
input_lengths
,
input_strides
,
filter_lengths
,
weights_strides
,
output_lengths
,
output_strides
,
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
input_right_pads
,
PassThrough
{},
PassThrough
{},
PassThrough
{},
split_k
);
return
conv
.
IsSupportedArgument
(
argument
);
}
};
using
GNHWC
=
ck
::
tensor_layout
::
convolution
::
GNHWC
;
using
NHWGC
=
ck
::
tensor_layout
::
convolution
::
NHWGC
;
using
GKYXC
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
GNHWK
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
NHWGK
=
ck
::
tensor_layout
::
convolution
::
NHWGK
;
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
NHWGK
,
GKYXC
,
NHWGC
>>
;
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeightDefault
:
public
TestGroupedConvndBwdWeight
<
Tuple
,
ConvBwdWeightDefault
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeightFilter1x1
:
public
TestGroupedConvndBwdWeight
<
Tuple
,
Filter1x1Stride1Pad0
>
{
};
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeightDefault
,
KernelTypes
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeightFilter1x1
,
KernelTypes
);
TYPED_TEST
(
TestGroupedConvndBwdWeightFilter1x1
,
SpecializationCheck
)
{
// Check filter 3,3 instead of 1,1
this
->
conv_param
=
{
2
,
2
,
4
,
192
,
192
,
{
3
,
3
},
{
28
,
28
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
bool
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// Check strides 2,2 instead of 1,1
this
->
conv_param
=
{
2
,
2
,
4
,
192
,
192
,
{
1
,
1
},
{
28
,
28
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// Check with pad
this
->
conv_param
=
{
2
,
2
,
4
,
192
,
192
,
{
1
,
1
},
{
28
,
28
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// Supported version
this
->
conv_param
=
{
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_TRUE
(
is_supported
);
}
TYPED_TEST
(
TestGroupedConvndBwdWeightDefault
,
VectorLoadCheck
)
{
// vector load for A
this
->
conv_param
=
{
2
,
2
,
128
,
129
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
bool
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// vector load for B, E, Ds
this
->
conv_param
=
{
2
,
2
,
128
,
128
,
257
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
}
test/grouped_convnd_fwd/grouped_convnd_fwd.cpp
View file @
8c4897d1
...
...
@@ -22,6 +22,8 @@ TEST_F(TestGroupedConvNdFwd, GroupedConv1dFwdGNWC)
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
1
},
{
14
},
{
2
},
{
1
},
{
0
},
{
0
}});
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
3
},
{
28
},
{
1
},
{
1
},
{
1
},
{
1
}});
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
1
},
{
3
},
{
1
},
{
1
},
{
0
},
{
0
}});
conv_params
.
push_back
({
1
,
1
,
1
,
1
,
32
,
{
3
},
{
32
},
{
1
},
{
1
},
{
1
},
{
1
}});
conv_params
.
push_back
({
1
,
1
,
1
,
64
,
3
,
{
3
},
{
32
},
{
1
},
{
1
},
{
1
},
{
1
}});
for
(
auto
&
param
:
conv_params
)
{
...
...
@@ -96,6 +98,9 @@ TEST_F(TestGroupedConvNdFwd, GroupedConv2dFwdGNHWC)
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
32
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
1
,
1
,
64
,
3
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
1
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
for
(
auto
&
param
:
conv_params
)
{
...
...
@@ -173,6 +178,12 @@ TEST_F(TestGroupedConvNdFwd, GroupedConv3dFwdGNDHWC)
{
3
,
2
,
128
,
128
,
256
,
{
3
,
3
,
3
},
{
14
,
14
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
conv_params
.
push_back
(
{
3
,
2
,
128
,
128
,
256
,
{
1
,
1
,
1
},
{
3
,
3
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
32
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
64
,
3
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
1
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
for
(
auto
&
param
:
conv_params
)
{
...
...
@@ -247,6 +258,9 @@ TEST_F(TestGroupedConvNdFwd, GroupedConv2dFwdNHWGC)
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
32
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
1
,
1
,
64
,
3
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
1
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
for
(
auto
&
param
:
conv_params
)
{
...
...
@@ -255,7 +269,7 @@ TEST_F(TestGroupedConvNdFwd, GroupedConv2dFwdNHWGC)
// fp16
pass
=
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWGC
,
ck
::
tensor_layout
::
convolution
::
KYX
G
C
,
ck
::
tensor_layout
::
convolution
::
G
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWGK
,
ck
::
half_t
,
ck
::
half_t
,
...
...
test/grouped_gemm/CMakeLists.txt
View file @
8c4897d1
if
(
DTYPES MATCHES
"fp16"
OR NOT DEFINED DTYPES
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
...
...
@@ -12,3 +13,4 @@ foreach(gpu IN LISTS GPU_TARGETS)
set
(
target 1
)
endif
()
endforeach
()
endif
()
test/grouped_gemm/test_grouped_gemm_interface.cpp
View file @
8c4897d1
...
...
@@ -108,7 +108,7 @@ TEST_F(TestGGemmSplitKInterface_MKNKMN, KLoops)
// kloops % 2
Ks
=
std
::
vector
<
int
>
{
256
,
512
,
320
,
768
};
EXPECT_
FALS
E
(
EXPECT_
TRU
E
(
DefaultGGemmInstance
{}.
IsSupported
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
kbatch
));
// Not all gemms have same value for main_k0_block_loop!
...
...
test/grouped_gemm/test_grouped_gemm_util.hpp
View file @
8c4897d1
...
...
@@ -147,14 +147,14 @@ struct DeviceGroupedGemmSplitkInstanceWrapper
32
,
4
,
2
,
S
<
1
,
4
,
32
,
1
>
,
S
<
1
,
4
,
16
,
1
>
,
ABlockTransferThreadClusterArrageOrder
,
ABlockTransferSrcAccessOrder
,
ABlockTransferSrcVectorDim
::
value
,
ABlockTransferSrcScalarPerVector
,
ABlockTransferDstScalarPerVector_K1
::
value
,
ABlockLdsAddExtraM
::
value
,
S
<
1
,
4
,
32
,
1
>
,
S
<
1
,
4
,
16
,
1
>
,
BBlockTransferThreadClusterArrageOrder
,
BBlockTransferSrcAccessOrder
,
BBlockTransferSrcVectorDim
::
value
,
...
...
test/normalization/CMakeLists.txt
View file @
8c4897d1
add_custom_target
(
test_normalization
)
add_gtest_executable
(
test_layernorm2d_fp32 test_layernorm2d_fp32.cpp
)
add_gtest_executable
(
test_layernorm2d_fp16 test_layernorm2d_fp16.cpp
)
add_gtest_executable
(
test_groupnorm_fp16 test_groupnorm_fp16.cpp
)
add_gtest_executable
(
test_groupnorm_fp32 test_groupnorm_fp32.cpp
)
target_link_libraries
(
test_layernorm2d_fp32 PRIVATE utility device_normalization_instance
)
target_link_libraries
(
test_layernorm2d_fp16 PRIVATE utility device_normalization_instance
)
target_link_libraries
(
test_groupnorm_fp16 PRIVATE utility device_normalization_instance
)
target_link_libraries
(
test_groupnorm_fp32 PRIVATE utility device_normalization_instance
)
add_dependencies
(
test_normalization test_layernorm2d_fp32
)
add_dependencies
(
test_normalization test_layernorm2d_fp16
)
add_dependencies
(
test_normalization test_groupnorm_fp16
)
add_dependencies
(
test_normalization test_groupnorm_fp32
)
if
(
DTYPES MATCHES
"fp16"
OR DTYPES MATCHES
"fp32"
OR NOT DEFINED DTYPES
)
add_custom_target
(
test_normalization
)
endif
()
if
(
DTYPES MATCHES
"fp32"
OR NOT DEFINED DTYPES
)
add_gtest_executable
(
test_layernorm2d_fp32 test_layernorm2d_fp32.cpp
)
add_gtest_executable
(
test_groupnorm_fp32 test_groupnorm_fp32.cpp
)
target_link_libraries
(
test_layernorm2d_fp32 PRIVATE utility device_normalization_instance
)
target_link_libraries
(
test_groupnorm_fp32 PRIVATE utility device_normalization_instance
)
add_dependencies
(
test_normalization test_layernorm2d_fp32
)
add_dependencies
(
test_normalization test_groupnorm_fp32
)
endif
()
if
(
DTYPES MATCHES
"fp16"
OR NOT DEFINED DTYPES
)
add_gtest_executable
(
test_layernorm2d_fp16 test_layernorm2d_fp16.cpp
)
add_gtest_executable
(
test_groupnorm_fp16 test_groupnorm_fp16.cpp
)
target_link_libraries
(
test_layernorm2d_fp16 PRIVATE utility device_normalization_instance
)
target_link_libraries
(
test_groupnorm_fp16 PRIVATE utility device_normalization_instance
)
add_dependencies
(
test_normalization test_layernorm2d_fp16
)
add_dependencies
(
test_normalization test_groupnorm_fp16
)
endif
()
test/pool_fwd/CMakeLists.txt
View file @
8c4897d1
add_custom_target
(
test_pool_fwd
)
add_gtest_executable
(
test_avg_pool2d_fwd test_avg_pool2d_fwd.cpp
)
add_gtest_executable
(
test_avg_pool3d_fwd test_avg_pool3d_fwd.cpp
)
add_gtest_executable
(
test_max_pool2d_fwd test_max_pool2d_fwd.cpp
)
add_gtest_executable
(
test_max_pool3d_fwd test_max_pool3d_fwd.cpp
)
target_link_libraries
(
test_avg_pool2d_fwd PRIVATE utility device_pool_fwd_instance
)
target_link_libraries
(
test_avg_pool3d_fwd PRIVATE utility device_pool_fwd_instance
)
target_link_libraries
(
test_max_pool2d_fwd PRIVATE utility device_pool_fwd_instance
)
target_link_libraries
(
test_max_pool3d_fwd PRIVATE utility device_pool_fwd_instance
)
target_link_libraries
(
test_avg_pool3d_fwd PRIVATE utility device_pool3d_fwd_instance
)
target_link_libraries
(
test_max_pool3d_fwd PRIVATE utility device_pool3d_fwd_instance
)
add_dependencies
(
test_pool_fwd test_avg_pool2d_fwd
)
add_dependencies
(
test_pool_fwd test_avg_pool3d_fwd
)
add_dependencies
(
test_pool_fwd test_max_pool2d_fwd
)
add_dependencies
(
test_pool_fwd test_max_pool3d_fwd
)
test/pool_fwd/test_avg_pool2d_fwd.cpp
deleted
100644 → 0
View file @
9ba9ebec
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_pool2d_fwd_impl.hpp"
#include "test_pool_fwd_common.hpp"
template
<
typename
Tuple
>
class
TestAvgPool2dFwd
:
public
::
testing
::
Test
{
protected:
using
InDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
OutDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
ComputeDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
IndexDataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
std
::
vector
<
PoolingParam
>
params
;
void
Run
()
{
for
(
auto
param
:
params
)
{
bool
success
=
ck
::
profiler
::
profile_pool2d_fwd_impl
<
InDataType
,
OutDataType
,
ComputeDataType
,
IndexDataType
,
ck
::
ReduceTensorOp
::
AVG
,
false
,
false
>
(
true
,
2
,
false
,
false
,
param
.
length_
,
param
.
window_spatial_lengths_
,
param
.
window_strides_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
);
EXPECT_TRUE
(
success
);
}
}
};
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
F16
,
F16
,
F32
,
I32
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
I32
>>
;
TYPED_TEST_SUITE
(
TestAvgPool2dFwd
,
KernelTypes
);
TYPED_TEST
(
TestAvgPool2dFwd
,
Test_Pool
)
{
// length, window_length, window_stride, left_pad, right_pad
this
->
params
=
{{{
1
,
1
,
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}},
{{
2
,
16
,
64
,
64
},
{
64
,
64
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}},
{{
2
,
32
,
30
,
30
},
{
2
,
2
},
{
2
,
2
},
{
1
,
1
},
{
1
,
1
}}};
this
->
Run
();
}
test/pool_fwd/test_avg_pool3d_fwd.cpp
View file @
8c4897d1
...
...
@@ -25,6 +25,8 @@ class TestAvgPool3dFwd : public ::testing::Test
OutDataType
,
ComputeDataType
,
IndexDataType
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
ReduceTensorOp
::
AVG
,
false
,
false
>
(
true
,
...
...
@@ -34,23 +36,27 @@ class TestAvgPool3dFwd : public ::testing::Test
param
.
length_
,
param
.
window_spatial_lengths_
,
param
.
window_strides_
,
param
.
window_dilations_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
);
EXPECT_TRUE
(
success
);
}
}
};
#ifdef CK_ENABLE_FP16
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
F16
,
F16
,
F32
,
I32
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
I32
>>
;
#else
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
F32
,
F32
,
F32
,
I32
>>
;
#endif
TYPED_TEST_SUITE
(
TestAvgPool3dFwd
,
KernelTypes
);
TYPED_TEST
(
TestAvgPool3dFwd
,
Test_Pool
)
{
// length, window_length, window_stride, left_pad, right_pad
this
->
params
=
{{{
1
,
1
,
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}},
{{
2
,
16
,
64
,
64
,
64
},
{
64
,
64
,
64
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}},
{{
2
,
32
,
30
,
30
,
30
},
{
2
,
2
,
2
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}}};
// length, window_length, window_stride, window_dilation, left_pad, right_pad
this
->
params
=
{{{
1
,
1
,
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}},
{{
2
,
16
,
64
,
64
,
64
},
{
64
,
64
,
64
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}},
{{
2
,
16
,
64
,
64
,
64
},
{
4
,
4
,
4
},
{
4
,
4
,
4
},
{
2
,
2
,
2
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}},
{{
2
,
32
,
30
,
30
,
30
},
{
2
,
2
,
2
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}}};
this
->
Run
();
}
test/pool_fwd/test_max_pool2d_fwd.cpp
deleted
100644 → 0
View file @
9ba9ebec
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_pool2d_fwd_impl.hpp"
#include "test_pool_fwd_common.hpp"
template
<
typename
Tuple
>
class
TestMaxPool2dFwd
:
public
::
testing
::
Test
{
protected:
using
InDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
OutDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
ComputeDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
IndexDataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
std
::
vector
<
PoolingParam
>
params
;
void
Run
()
{
for
(
auto
param
:
params
)
{
// max pool
bool
success
=
ck
::
profiler
::
profile_pool2d_fwd_impl
<
InDataType
,
OutDataType
,
ComputeDataType
,
IndexDataType
,
ck
::
ReduceTensorOp
::
MAX
,
false
,
false
>
(
true
,
2
,
false
,
false
,
param
.
length_
,
param
.
window_spatial_lengths_
,
param
.
window_strides_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
);
EXPECT_TRUE
(
success
);
// max pool + index
success
=
ck
::
profiler
::
profile_pool2d_fwd_impl
<
InDataType
,
OutDataType
,
ComputeDataType
,
IndexDataType
,
ck
::
ReduceTensorOp
::
MAX
,
false
,
true
>
(
true
,
2
,
false
,
false
,
param
.
length_
,
param
.
window_spatial_lengths_
,
param
.
window_strides_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
);
EXPECT_TRUE
(
success
);
}
}
};
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
F16
,
F16
,
F16
,
I32
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
I32
>>
;
TYPED_TEST_SUITE
(
TestMaxPool2dFwd
,
KernelTypes
);
TYPED_TEST
(
TestMaxPool2dFwd
,
Test_Pool
)
{
// length, window_length, window_stride, left_pad, right_pad
this
->
params
=
{{{
1
,
1
,
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}},
{{
2
,
16
,
64
,
64
},
{
64
,
64
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}},
{{
2
,
32
,
30
,
30
},
{
2
,
2
},
{
2
,
2
},
{
1
,
1
},
{
1
,
1
}}};
this
->
Run
();
}
test/pool_fwd/test_max_pool3d_fwd.cpp
View file @
8c4897d1
...
...
@@ -26,6 +26,8 @@ class TestMaxPool3dFwd : public ::testing::Test
OutDataType
,
ComputeDataType
,
IndexDataType
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
ReduceTensorOp
::
MAX
,
false
,
false
>
(
true
,
...
...
@@ -35,6 +37,7 @@ class TestMaxPool3dFwd : public ::testing::Test
param
.
length_
,
param
.
window_spatial_lengths_
,
param
.
window_strides_
,
param
.
window_dilations_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
);
EXPECT_TRUE
(
success
);
...
...
@@ -44,6 +47,8 @@ class TestMaxPool3dFwd : public ::testing::Test
OutDataType
,
ComputeDataType
,
IndexDataType
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
ReduceTensorOp
::
MAX
,
false
,
true
>
(
true
,
...
...
@@ -53,6 +58,7 @@ class TestMaxPool3dFwd : public ::testing::Test
param
.
length_
,
param
.
window_spatial_lengths_
,
param
.
window_strides_
,
param
.
window_dilations_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
);
EXPECT_TRUE
(
success
);
...
...
@@ -60,16 +66,21 @@ class TestMaxPool3dFwd : public ::testing::Test
}
};
#ifdef CK_ENABLE_FP16
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
F16
,
F16
,
F16
,
I32
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
I32
>>
;
::
testing
::
Types
<
std
::
tuple
<
F16
,
F16
,
F32
,
I32
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
I32
>>
;
#else
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
F32
,
F32
,
F32
,
I32
>>
;
#endif
TYPED_TEST_SUITE
(
TestMaxPool3dFwd
,
KernelTypes
);
TYPED_TEST
(
TestMaxPool3dFwd
,
Test_Pool
)
{
// length, window_length, window_stride, left_pad, right_pad
this
->
params
=
{{{
1
,
1
,
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}},
{{
2
,
16
,
64
,
64
,
64
},
{
64
,
64
,
64
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}},
{{
2
,
32
,
30
,
30
,
30
},
{
2
,
2
,
2
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}}};
// length, window_length, window_stride, window_dilation, left_pad, right_pad
this
->
params
=
{{{
1
,
1
,
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}},
{{
2
,
16
,
64
,
64
,
64
},
{
64
,
64
,
64
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}},
{{
2
,
16
,
64
,
64
,
64
},
{
4
,
4
,
4
},
{
4
,
4
,
4
},
{
2
,
2
,
2
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}},
{{
2
,
32
,
30
,
30
,
30
},
{
2
,
2
,
2
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}}};
this
->
Run
();
}
test/pool_fwd/test_pool_fwd_common.hpp
View file @
8c4897d1
...
...
@@ -14,11 +14,13 @@ struct PoolingParam
PoolingParam
(
const
std
::
vector
<
index_t
>&
length
,
const
std
::
vector
<
index_t
>&
window_spatial_lengths
,
const
std
::
vector
<
index_t
>&
window_strides
,
const
std
::
vector
<
index_t
>&
window_dilations
,
const
std
::
vector
<
index_t
>&
input_left_pads
,
const
std
::
vector
<
index_t
>&
input_right_pads
)
:
length_
(
length
),
window_spatial_lengths_
(
window_spatial_lengths
),
window_strides_
(
window_strides
),
window_dilations_
(
window_dilations
),
input_left_pads_
(
input_left_pads
),
input_right_pads_
(
input_right_pads
)
{
...
...
@@ -26,6 +28,7 @@ struct PoolingParam
std
::
vector
<
index_t
>
length_
;
std
::
vector
<
index_t
>
window_spatial_lengths_
;
std
::
vector
<
index_t
>
window_strides_
;
std
::
vector
<
index_t
>
window_dilations_
;
std
::
vector
<
index_t
>
input_left_pads_
;
std
::
vector
<
index_t
>
input_right_pads_
;
};
Prev
1
…
23
24
25
26
27
28
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment