Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
4100d1d8
Commit
4100d1d8
authored
Aug 23, 2023
by
Alan Turner
Browse files
Merge remote-tracking branch 'origin/develop' into migx-flash-attn
parents
48717006
c8a8385f
Changes
609
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
812 additions
and
134 deletions
+812
-134
test/contraction/test_contraction_interface.cpp
test/contraction/test_contraction_interface.cpp
+1
-1
test/data_type/CMakeLists.txt
test/data_type/CMakeLists.txt
+3
-0
test/data_type/fp8.cpp
test/data_type/fp8.cpp
+123
-0
test/elementwise_normalization/CMakeLists.txt
test/elementwise_normalization/CMakeLists.txt
+6
-7
test/gemm/CMakeLists.txt
test/gemm/CMakeLists.txt
+14
-10
test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp
test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp
+1
-1
test/gemm_layernorm/CMakeLists.txt
test/gemm_layernorm/CMakeLists.txt
+2
-0
test/gemm_reduce/CMakeLists.txt
test/gemm_reduce/CMakeLists.txt
+5
-3
test/grouped_convnd_bwd_data/CMakeLists.txt
test/grouped_convnd_bwd_data/CMakeLists.txt
+6
-0
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp
.../grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp
+112
-0
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface.cpp
...onvnd_bwd_data/test_grouped_convnd_bwd_data_interface.cpp
+178
-0
test/grouped_convnd_bwd_weight/CMakeLists.txt
test/grouped_convnd_bwd_weight/CMakeLists.txt
+3
-1
test/grouped_convnd_bwd_weight/grouped_convnd_bwd_weight.cpp
test/grouped_convnd_bwd_weight/grouped_convnd_bwd_weight.cpp
+0
-91
test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp
...uped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp
+140
-0
test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface.cpp
...d_bwd_weight/test_grouped_convnd_bwd_weight_interface.cpp
+179
-0
test/grouped_convnd_fwd/grouped_convnd_fwd.cpp
test/grouped_convnd_fwd/grouped_convnd_fwd.cpp
+15
-1
test/grouped_gemm/CMakeLists.txt
test/grouped_gemm/CMakeLists.txt
+2
-0
test/grouped_gemm/test_grouped_gemm_interface.cpp
test/grouped_gemm/test_grouped_gemm_interface.cpp
+1
-1
test/grouped_gemm/test_grouped_gemm_util.hpp
test/grouped_gemm/test_grouped_gemm_util.hpp
+2
-2
test/normalization/CMakeLists.txt
test/normalization/CMakeLists.txt
+19
-16
No files found.
test/contraction/test_contraction_interface.cpp
View file @
4100d1d8
...
...
@@ -38,7 +38,7 @@ class ContractionInstanceWrapper
//#####################################| | | | Type| Type| Type| DataType| Type| Type| Elementwise| Elementwise| Elementwise| Spacialization| Prefetch| Size| Block| Block| Block| | | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| _MBlock_MWaveMPerXdl| ScalarPerVector|
//#####################################| | | | | | | | | | Operation| Operation| Operation| | Stage| | | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| _NBlock_NWaveNPerXdl| _NWaveNPerXdl|
//#####################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
DeviceContractionMultipleD_Xdl_CShuffle
<
NumDim
,
NumDim
,
NumDim
,
F32
,
F32
,
F32
,
F32
,
ck
::
Tuple
<
F32
>
,
F32
,
Pass
,
Pass
,
Bilinear
,
GemmSpec
,
1
,
256
,
256
,
128
,
16
,
4
,
4
,
32
,
32
,
4
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
ABlockTransferSrcVectorDim
,
4
,
4
,
1
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
BBlockTransferSrcVectorDim
,
4
,
4
,
1
,
1
,
1
,
S
<
1
,
16
,
1
,
16
>
,
CDEBlockTransferScalarPerVector
>
;
DeviceContractionMultipleD_Xdl_CShuffle
<
NumDim
,
NumDim
,
NumDim
,
F32
,
F32
,
F32
,
F32
,
ck
::
Tuple
<
F32
>
,
F32
,
Pass
,
Pass
,
Bilinear
,
GemmSpec
,
1
,
256
,
256
,
128
,
16
,
4
,
4
,
32
,
32
,
4
,
2
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
ABlockTransferSrcVectorDim
,
4
,
4
,
1
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
BBlockTransferSrcVectorDim
,
4
,
4
,
1
,
1
,
1
,
S
<
1
,
16
,
1
,
16
>
,
CDEBlockTransferScalarPerVector
>
;
// clang-format on
bool
isSupported
(
std
::
vector
<
ck
::
index_t
>&
ADims
,
...
...
test/data_type/CMakeLists.txt
View file @
4100d1d8
...
...
@@ -2,3 +2,6 @@ if (USE_BITINT_EXTENSION_INT4)
add_gtest_executable
(
test_int4 int4.cpp
)
target_link_libraries
(
test_int4 PRIVATE utility
)
endif
()
add_gtest_executable
(
test_fp8 fp8.cpp
)
target_link_libraries
(
test_fp8 PRIVATE utility
)
test/data_type/fp8.cpp
0 → 100644
View file @
4100d1d8
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp"
using
ck
::
f8_convert_sr
;
using
ck
::
f8_t
;
using
ck
::
half_t
;
using
ck
::
type_convert
;
TEST
(
FP8
,
NumericLimits
)
{
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_t
>::
Min
(),
0x08
);
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_t
>::
Max
(),
0x77
);
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_t
>::
Lowest
(),
0xF7
);
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_t
>::
QuietNaN
(),
0x80
);
}
TEST
(
FP8
,
ConvertFP32Nearest
)
{
// fix the tolerance value
float
abs_tol
=
1e-6
;
// convert 0 float to fp8 and back, check if holds
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
type_convert
<
f8_t
>
(
0.0
f
)),
abs_tol
);
// convert minimal float to fp8 and back, check if holds
ASSERT_NEAR
(
std
::
numeric_limits
<
float
>::
min
(),
type_convert
<
float
>
(
type_convert
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
abs_tol
);
// convert maximal f8_t to float and check if equal to 240.0
ASSERT_NEAR
(
240.0
f
,
type_convert
<
float
>
(
type_convert
<
f8_t
>
(
240.0
f
)),
abs_tol
);
// convert maximal float to fp8 and back, check if clipped to 240.0
ASSERT_NEAR
(
240.0
f
,
type_convert
<
float
>
(
type_convert
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
abs_tol
);
// convert inf float to f8_t and check if it is qNan
ASSERT_NEAR
(
0x80
,
type_convert
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
abs_tol
);
// positive float value to fp8 and back, check if holds
float
pos_float
=
0.0078125
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
type_convert
<
f8_t
>
(
pos_float
)),
abs_tol
);
// negative float value to fp8 and back, check if holds
float
neg_float
=
-
0.0156250
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
type_convert
<
f8_t
>
(
neg_float
)),
abs_tol
);
}
TEST
(
FP8
,
ConvertFP32Stochastic
)
{
// fix the tolerance value
float
abs_tol
=
1e-6
;
// convert 0 float to fp8 and back, check if holds
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_t
>
(
0.0
f
)),
abs_tol
);
// convert minimal float to fp8 and back, check if holds
ASSERT_NEAR
(
std
::
numeric_limits
<
float
>::
min
(),
type_convert
<
float
>
(
f8_convert_sr
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
abs_tol
);
// convert maximal f8_t to float and check if equal to 240.0
ASSERT_NEAR
(
240.0
f
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_t
>
(
240.0
f
)),
abs_tol
);
// convert maximal float to fp8 and back, check if clipped to 240.0
ASSERT_NEAR
(
240.0
f
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
abs_tol
);
// convert inf float to f8_t and check if it is qNan
ASSERT_NEAR
(
0x80
,
f8_convert_sr
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
abs_tol
);
// positive float value to fp8 and back, check if holds
float
pos_float
=
0.0078125
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_t
>
(
pos_float
)),
abs_tol
);
// negative float value to fp8 and back, check if holds
float
neg_float
=
-
0.0156250
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_t
>
(
neg_float
)),
abs_tol
);
}
TEST
(
FP8
,
ConvertFP16Nearest
)
{
// fix the tolerance value
float
abs_tol
=
1e-3
;
// convert 0 fp16 to fp8 and back, check if holds
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
type_convert
<
f8_t
>
(
half_t
{
0.0
})),
abs_tol
);
// convert minimal fp16 to fp8 and back, check if holds
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
type_convert
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
abs_tol
);
// convert maximal f8_t to fp16 and check if equal to 240.0
ASSERT_NEAR
(
half_t
{
240.0
},
type_convert
<
half_t
>
(
type_convert
<
f8_t
>
(
half_t
{
240.0
})),
abs_tol
);
// convert maximal fp16 to fp8 and back, check if clipped to 240.0
ASSERT_NEAR
(
half_t
{
240.0
},
type_convert
<
half_t
>
(
type_convert
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
abs_tol
);
// convert QuietNaN fp16 to f8_t and check if it is QuietNaN
ASSERT_NEAR
(
0x80
,
type_convert
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
abs_tol
);
// positive fp16 value to fp8 and back, check if holds
half_t
pos_half
=
half_t
{
0.0078125
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
type_convert
<
f8_t
>
(
pos_half
)),
abs_tol
);
// negative fp16 value to fp8 and back, check if holds
half_t
neg_half
=
half_t
{
-
0.0156250
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
type_convert
<
f8_t
>
(
neg_half
)),
abs_tol
);
}
TEST
(
FP8
,
ConvertFP16Stochastic
)
{
// fix the tolerance value
float
abs_tol
=
1e-3
;
// convert 0 fp16 to fp8 and back, check if holds
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_t
>
(
half_t
{
0.0
})),
abs_tol
);
// convert minimal fp16 to fp8 and back, check if holds
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
abs_tol
);
// convert maximal f8_t to fp16 and check if equal to 240.0
ASSERT_NEAR
(
half_t
{
240.0
},
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_t
>
(
half_t
{
240.0
})),
abs_tol
);
// convert maximal fp16 to fp8 and back, check if clipped to 240.0
ASSERT_NEAR
(
half_t
{
240.0
},
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
abs_tol
);
// convert QuietNaN fp16 to f8_t and check if it is QuietNaN
ASSERT_NEAR
(
0x80
,
f8_convert_sr
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
abs_tol
);
// positive fp16 value to fp8 and back, check if holds
half_t
pos_half
=
half_t
{
0.0078125
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_t
>
(
pos_half
)),
abs_tol
);
// negative fp16 value to fp8 and back, check if holds
half_t
neg_half
=
half_t
{
-
0.0156250
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_t
>
(
neg_half
)),
abs_tol
);
}
test/elementwise_normalization/CMakeLists.txt
View file @
4100d1d8
add_custom_target
(
test_elementwise_normalization
)
add_gtest_executable
(
test_elementwise_layernorm_fp16 test_elementwise_layernorm_fp16.cpp
)
target_link_libraries
(
test_elementwise_layernorm_fp16 PRIVATE utility device_elementwise_normalization_instance
)
add_dependencies
(
test_elementwise_normalization test_elementwise_layernorm_fp16
)
if
(
DTYPES MATCHES
"fp16"
OR NOT DEFINED DTYPES
)
add_custom_target
(
test_elementwise_normalization
)
add_gtest_executable
(
test_elementwise_layernorm_fp16 test_elementwise_layernorm_fp16.cpp
)
target_link_libraries
(
test_elementwise_layernorm_fp16 PRIVATE utility device_elementwise_normalization_instance
)
add_dependencies
(
test_elementwise_normalization test_elementwise_layernorm_fp16
)
endif
()
\ No newline at end of file
test/gemm/CMakeLists.txt
View file @
4100d1d8
if
(
DTYPES MATCHES
"fp32"
OR NOT DEFINED DTYPES
)
add_test_executable
(
test_gemm_fp32 gemm_fp32.cpp
)
target_link_libraries
(
test_gemm_fp32 PRIVATE utility
)
target_link_libraries
(
test_gemm_fp32 PRIVATE device_gemm_instance
)
endif
()
if
(
DTYPES MATCHES
"fp16"
OR NOT DEFINED DTYPES
)
add_test_executable
(
test_gemm_fp16 gemm_fp16.cpp
)
target_link_libraries
(
test_gemm_fp16 PRIVATE utility
)
target_link_libraries
(
test_gemm_fp16 PRIVATE device_gemm_instance
)
add_test_executable
(
test_gemm_bf16 gemm_bf16.cpp
)
target_link_libraries
(
test_gemm_bf16 PRIVATE utility
)
target_link_libraries
(
test_gemm_bf16 PRIVATE device_gemm_instance
)
add_test_executable
(
test_gemm_int8 gemm_int8.cpp
)
target_link_libraries
(
test_gemm_int8 PRIVATE utility
)
target_link_libraries
(
test_gemm_int8 PRIVATE device_gemm_instance
)
add_library
(
gemm_standalone_xdl_fp16_instances STATIC
instance/gemm_f16_nn_instance.cpp
instance/gemm_f16_nt_instance.cpp
...
...
@@ -24,3 +17,14 @@ add_library(gemm_standalone_xdl_fp16_instances STATIC
add_test_executable
(
test_gemm_standalone_xdl_fp16 gemm_standalone_xdl_fp16.cpp
)
target_link_libraries
(
test_gemm_standalone_xdl_fp16 PRIVATE gemm_standalone_xdl_fp16_instances utility
)
target_include_directories
(
test_gemm_standalone_xdl_fp16 PRIVATE instance/
)
endif
()
if
(
DTYPES MATCHES
"bf16"
OR NOT DEFINED DTYPES
)
add_test_executable
(
test_gemm_bf16 gemm_bf16.cpp
)
target_link_libraries
(
test_gemm_bf16 PRIVATE utility
)
target_link_libraries
(
test_gemm_bf16 PRIVATE device_gemm_instance
)
endif
()
if
(
DTYPES MATCHES
"int8"
OR NOT DEFINED DTYPES
)
add_test_executable
(
test_gemm_int8 gemm_int8.cpp
)
target_link_libraries
(
test_gemm_int8 PRIVATE utility
)
target_link_libraries
(
test_gemm_int8 PRIVATE device_gemm_instance
)
endif
()
\ No newline at end of file
test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp
View file @
4100d1d8
...
...
@@ -5,7 +5,7 @@
#include "ck/ck.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_waveletmodel_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_gemm_xdl_waveletmodel_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
...
...
test/gemm_layernorm/CMakeLists.txt
View file @
4100d1d8
...
...
@@ -2,10 +2,12 @@ list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
(
DTYPES MATCHES
"fp16"
OR NOT DEFINED DTYPES
)
add_custom_target
(
test_gemm_layernorm
)
add_gtest_executable
(
test_gemm_add_relu_add_layernorm_fp16 test_gemm_add_relu_add_layernorm_fp16.cpp
)
target_link_libraries
(
test_gemm_add_relu_add_layernorm_fp16 PRIVATE utility device_gemm_add_relu_add_layernorm_instance
)
add_dependencies
(
test_gemm_layernorm test_gemm_add_relu_add_layernorm_fp16
)
set
(
target 1
)
endif
()
endif
()
endforeach
()
test/gemm_reduce/CMakeLists.txt
View file @
4100d1d8
add_test_executable
(
test_gemm_reduce_fp16 gemm_reduce_fp16.cpp
)
target_link_libraries
(
test_gemm_reduce_fp16 PRIVATE utility
)
target_link_libraries
(
test_gemm_reduce_fp16 PRIVATE device_gemm_reduce_instance
)
if
(
DTYPES MATCHES
"fp16"
OR NOT DEFINED DTYPES
)
add_test_executable
(
test_gemm_reduce_fp16 gemm_reduce_fp16.cpp
)
target_link_libraries
(
test_gemm_reduce_fp16 PRIVATE utility
)
target_link_libraries
(
test_gemm_reduce_fp16 PRIVATE device_gemm_reduce_instance
)
endif
()
\ No newline at end of file
test/grouped_convnd_bwd_data/CMakeLists.txt
0 → 100644
View file @
4100d1d8
if
(
GPU_TARGETS MATCHES
"gfx908"
OR GPU_TARGETS MATCHES
"gfx90a"
OR GPU_TARGETS MATCHES
"gfx940"
)
add_gtest_executable
(
test_grouped_convnd_bwd_data test_grouped_convnd_bwd_data.cpp
)
target_link_libraries
(
test_grouped_convnd_bwd_data PRIVATE utility device_grouped_conv2d_bwd_data_instance device_grouped_conv3d_bwd_data_instance
)
add_gtest_executable
(
test_grouped_convnd_bwd_data_interface test_grouped_convnd_bwd_data_interface.cpp
)
target_link_libraries
(
test_grouped_convnd_bwd_data_interface PRIVATE utility device_grouped_conv2d_bwd_data_instance
)
endif
()
\ No newline at end of file
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp
0 → 100644
View file @
4100d1d8
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <tuple>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/profile_grouped_conv_bwd_data_impl.hpp"
template
<
typename
Tuple
>
class
TestGroupedConvndBwdData
:
public
::
testing
::
Test
{
protected:
using
DataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
OutLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
WeiLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
InLayout
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
template
<
ck
::
index_t
NDimSpatial
>
void
Run
()
{
EXPECT_FALSE
(
conv_params
.
empty
());
bool
pass
=
true
;
for
(
auto
&
param
:
conv_params
)
{
pass
=
pass
&&
ck
::
profiler
::
profile_grouped_conv_bwd_data_impl
<
NDimSpatial
,
OutLayout
,
WeiLayout
,
InLayout
,
DataType
,
DataType
,
DataType
>
(
true
,
// do_verification
1
,
// init_method: integer value
false
,
// do_log
false
,
// time_kernel
param
);
}
EXPECT_TRUE
(
pass
);
}
};
using
namespace
ck
::
tensor_layout
::
convolution
;
using
KernelTypes2d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
ck
::
half_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
float
,
NHWGK
,
GKYXC
,
NHWGC
>
,
std
::
tuple
<
ck
::
half_t
,
NHWGK
,
GKYXC
,
NHWGC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NHWGK
,
GKYXC
,
NHWGC
>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
ck
::
half_t
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
float
,
NDHWGK
,
GKZYXC
,
NDHWGC
>
,
std
::
tuple
<
ck
::
half_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>>
;
template
<
typename
Tuple
>
class
TestGroupedConvndBwdData2d
:
public
TestGroupedConvndBwdData
<
Tuple
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdData3d
:
public
TestGroupedConvndBwdData
<
Tuple
>
{
};
TYPED_TEST_SUITE
(
TestGroupedConvndBwdData2d
,
KernelTypes2d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdData3d
,
KernelTypes3d
);
TYPED_TEST
(
TestGroupedConvndBwdData2d
,
Test2D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
2
,
2
,
4
,
192
,
192
,
{
3
,
3
},
{
28
,
28
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
128
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
32
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
64
,
3
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
1
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
template
Run
<
2
>();
}
TYPED_TEST
(
TestGroupedConvndBwdData3d
,
Test3D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
3
,
2
,
16
,
128
,
256
,
{
1
,
1
,
1
},
{
7
,
7
,
7
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
2
,
128
,
256
,
{
3
,
3
,
3
},
{
14
,
14
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
32
,
128
,
256
,
{
1
,
1
,
1
},
{
3
,
3
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
32
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
64
,
3
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
1
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
template
Run
<
3
>();
}
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface.cpp
0 → 100644
View file @
4100d1d8
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <tuple>
#include <vector>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include <gtest/gtest.h>
using
DataType
=
ck
::
half_t
;
using
AccDataType
=
float
;
using
Pass
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
ConvBackwardDataSpecialization
=
ck
::
tensor_operation
::
device
::
ConvolutionBackwardDataSpecialization
;
static
constexpr
auto
ConvBwdDataDefault
=
ConvBackwardDataSpecialization
::
Default
;
static
constexpr
auto
Filter1x1Stride1Pad0
=
ConvBackwardDataSpecialization
::
Filter1x1Stride1Pad0
;
template
<
typename
Tuple
,
ConvBackwardDataSpecialization
ConvSpec
>
class
TestGroupedConvndBwdData
:
public
::
testing
::
Test
{
protected:
static
constexpr
ck
::
index_t
NDimSpatial
=
2
;
using
OutLayout
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
WeiLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
InLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
// clang-format off
using
GroupedConvBwdDataDeviceInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1
// ######| NDimSpatial| ALayout| BLayout| DsLayout| ELayout| AData| BData| AccData| CShuffle| DsData| EData| AElementwise| BElementwise| CDEElementwise| ConvolutionBackward| DoPad| DoPad| NumGemmK| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffleMXdl| CShuffleNXdl| CDEBlockTransfer| CDEBlockTransfer|
// ######| | | | | | Type| Type| Type| DataType| Type| Type| Operation| Operation| Operation| DataSpecialization| GemmM| GemmN| PrefetchStage| Size| Block| Block| Block| | | XDL| XDL| PerWave| PerWave| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| ExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| ExtraN| PerWave| PerWave| _MBlock_MPerBlock| ScalarPerVector|
// ######| | | | | | | | | | | | | | | | | | | | | | | | | | | | | Lengths_AK0_M_AK1| ArrangeOrder| | | PerVector| PerVector_AK1| | Lengths_BK0_N_BK1| ArrangeOrder| | | PerVector| PerVector_BK1| | PerShuffle| PerShuffle| _NBlock_NPerBlock| _NPerBlock|
// ######| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
<
NDimSpatial
,
OutLayout
,
WeiLayout
,
ck
::
Tuple
<>
,
InLayout
,
DataType
,
DataType
,
AccDataType
,
DataType
,
ck
::
Tuple
<>
,
DataType
,
Pass
,
Pass
,
Pass
,
ConvSpec
,
true
,
true
,
1
,
256
,
128
,
256
,
32
,
8
,
2
,
32
,
32
,
2
,
4
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
1
,
S
<
4
,
64
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
4
,
2
,
0
,
1
,
1
,
S
<
1
,
32
,
1
,
8
>
,
8
>
;
// clang-format on
ck
::
utils
::
conv
::
ConvParam
conv_param
;
template
<
ck
::
index_t
NDimSpatial
>
bool
Run
()
{
const
auto
out_g_n_k_wos_desc
=
ck
::
utils
::
conv
::
make_output_host_tensor_descriptor_g_n_k_wos_packed
<
OutLayout
>
(
conv_param
);
const
auto
wei_g_k_c_xs_desc
=
ck
::
utils
::
conv
::
make_weight_host_tensor_descriptor_g_k_c_xs_packed
<
WeiLayout
>
(
conv_param
);
const
auto
in_g_n_c_wis_desc
=
ck
::
utils
::
conv
::
make_input_host_tensor_descriptor_g_n_c_wis_packed
<
InLayout
>
(
conv_param
);
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
out_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
out_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
wei_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
wei_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
in_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
in_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
{};
auto
copy
=
[](
const
auto
&
x
,
auto
&
y
)
{
ck
::
ranges
::
copy
(
x
,
y
.
begin
());
};
copy
(
out_g_n_k_wos_desc
.
GetLengths
(),
out_lengths
);
copy
(
out_g_n_k_wos_desc
.
GetStrides
(),
out_strides
);
copy
(
wei_g_k_c_xs_desc
.
GetLengths
(),
wei_lengths
);
copy
(
wei_g_k_c_xs_desc
.
GetStrides
(),
wei_strides
);
copy
(
in_g_n_c_wis_desc
.
GetLengths
(),
in_lengths
);
copy
(
in_g_n_c_wis_desc
.
GetStrides
(),
in_strides
);
copy
(
conv_param
.
conv_filter_strides_
,
conv_filter_strides
);
copy
(
conv_param
.
conv_filter_dilations_
,
conv_filter_dilations
);
copy
(
conv_param
.
input_left_pads_
,
input_left_pads
);
copy
(
conv_param
.
input_right_pads_
,
input_right_pads
);
auto
conv
=
GroupedConvBwdDataDeviceInstance
{};
auto
argument
=
conv
.
MakeArgument
(
nullptr
,
nullptr
,
std
::
array
<
const
void
*
,
0
>
{},
nullptr
,
out_lengths
,
out_strides
,
wei_lengths
,
wei_strides
,
{},
{},
in_lengths
,
in_strides
,
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
input_right_pads
,
Pass
{},
Pass
{},
Pass
{});
return
conv
.
IsSupportedArgument
(
argument
);
}
};
using
GNHWC
=
ck
::
tensor_layout
::
convolution
::
GNHWC
;
using
NHWGC
=
ck
::
tensor_layout
::
convolution
::
NHWGC
;
using
GKYXC
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
GNHWK
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
NHWGK
=
ck
::
tensor_layout
::
convolution
::
NHWGK
;
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
NHWGK
,
GKYXC
,
NHWGC
>>
;
template
<
typename
Tuple
>
class
TestGroupedConvndBwdDataDefault
:
public
TestGroupedConvndBwdData
<
Tuple
,
ConvBwdDataDefault
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdDataFilter1x1
:
public
TestGroupedConvndBwdData
<
Tuple
,
Filter1x1Stride1Pad0
>
{
};
TYPED_TEST_SUITE
(
TestGroupedConvndBwdDataDefault
,
KernelTypes
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdDataFilter1x1
,
KernelTypes
);
TYPED_TEST
(
TestGroupedConvndBwdDataFilter1x1
,
SpecializationCheck
)
{
// Check filter 3,3 instead of 1,1
this
->
conv_param
=
{
2
,
2
,
4
,
192
,
192
,
{
3
,
3
},
{
28
,
28
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
bool
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// Check strides 2,2 instead of 1,1
this
->
conv_param
=
{
2
,
2
,
4
,
192
,
192
,
{
1
,
1
},
{
28
,
28
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// Check with pad
this
->
conv_param
=
{
2
,
2
,
4
,
192
,
192
,
{
1
,
1
},
{
28
,
28
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// Supported version
this
->
conv_param
=
{
2
,
2
,
4
,
192
,
192
,
{
1
,
1
},
{
28
,
28
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_TRUE
(
is_supported
);
}
TYPED_TEST
(
TestGroupedConvndBwdDataDefault
,
VectorLoadCheck
)
{
// vector load for A
this
->
conv_param
=
{
2
,
2
,
128
,
129
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
bool
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// vector load for B, E, Ds
this
->
conv_param
=
{
2
,
2
,
128
,
128
,
257
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
}
test/grouped_convnd_bwd_weight/CMakeLists.txt
View file @
4100d1d8
...
...
@@ -2,8 +2,10 @@ list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
add_gtest_executable
(
test_grouped_convnd_bwd_weight grouped_convnd_bwd_weight.cpp
)
add_gtest_executable
(
test_grouped_convnd_bwd_weight
test_
grouped_convnd_bwd_weight.cpp
)
target_link_libraries
(
test_grouped_convnd_bwd_weight PRIVATE utility device_grouped_conv1d_bwd_weight_instance device_grouped_conv2d_bwd_weight_instance device_grouped_conv3d_bwd_weight_instance
)
add_gtest_executable
(
test_grouped_convnd_bwd_weight_interface test_grouped_convnd_bwd_weight_interface.cpp
)
target_link_libraries
(
test_grouped_convnd_bwd_weight_interface PRIVATE utility device_grouped_conv1d_bwd_weight_instance device_grouped_conv2d_bwd_weight_instance device_grouped_conv3d_bwd_weight_instance
)
set
(
target 1
)
endif
()
endforeach
()
\ No newline at end of file
test/grouped_convnd_bwd_weight/grouped_convnd_bwd_weight.cpp
deleted
100644 → 0
View file @
48717006
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <tuple>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/profile_grouped_conv_bwd_weight_impl.hpp"
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeight
:
public
::
testing
::
Test
{
protected:
using
DataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
ck
::
index_t
split_k
{
2
};
template
<
ck
::
index_t
NDimSpatial
>
void
Run
()
{
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
EXPECT_FALSE
(
conv_params
.
empty
());
pass
=
ck
::
profiler
::
profile_grouped_conv_bwd_weight_impl
<
NDimSpatial
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
GNWC
,
ck
::
tensor_layout
::
convolution
::
GNHWC
,
ck
::
tensor_layout
::
convolution
::
GNDHWC
>>
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
GKXC
,
ck
::
tensor_layout
::
convolution
::
GKYXC
,
ck
::
tensor_layout
::
convolution
::
GKZYXC
>>
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
GNWK
,
ck
::
tensor_layout
::
convolution
::
GNHWK
,
ck
::
tensor_layout
::
convolution
::
GNDHWK
>>
,
DataType
,
DataType
,
DataType
>
(
true
,
// do_verification
1
,
// init_method: integer value
false
,
// do_log
false
,
// time_kernel
param
,
split_k
);
EXPECT_TRUE
(
pass
);
}
}
};
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
float
>
,
std
::
tuple
<
ck
::
half_t
>
,
std
::
tuple
<
ck
::
bhalf_t
>>
;
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeight
,
KernelTypes
);
TYPED_TEST
(
TestGroupedConvndBwdWeight
,
Test1D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
1
},
{
14
},
{
2
},
{
1
},
{
0
},
{
0
}});
this
->
conv_params
.
push_back
({
1
,
2
,
32
,
128
,
256
,
{
3
},
{
28
},
{
1
},
{
1
},
{
1
},
{
1
}});
this
->
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
1
},
{
3
},
{
1
},
{
1
},
{
0
},
{
0
}});
this
->
template
Run
<
1
>();
}
TYPED_TEST
(
TestGroupedConvndBwdWeight
,
Test2D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
2
,
2
,
64
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
4
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
template
Run
<
2
>();
}
TYPED_TEST
(
TestGroupedConvndBwdWeight
,
Test3D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
3
,
2
,
16
,
128
,
256
,
{
1
,
1
,
1
},
{
7
,
7
,
7
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
2
,
128
,
256
,
{
3
,
3
,
3
},
{
14
,
14
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
32
,
128
,
256
,
{
1
,
1
,
1
},
{
3
,
3
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
template
Run
<
3
>();
}
test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp
0 → 100644
View file @
4100d1d8
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <tuple>
#include <vector>
#include <gtest/gtest.h>
#include "ck/utility/common_header.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "profiler/profile_grouped_conv_bwd_weight_impl.hpp"
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeight
:
public
::
testing
::
Test
{
protected:
using
InDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
WeiDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
OutDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
InLayout
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
WeiLayout
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
using
OutLayout
=
std
::
tuple_element_t
<
5
,
Tuple
>
;
using
NDimSpatial
=
std
::
tuple_element_t
<
6
,
Tuple
>
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
ck
::
index_t
split_k
{
2
};
void
Run
()
{
EXPECT_FALSE
(
conv_params
.
empty
());
bool
pass
=
true
;
for
(
auto
&
param
:
conv_params
)
{
pass
=
pass
&&
ck
::
profiler
::
profile_grouped_conv_bwd_weight_impl
<
NDimSpatial
{},
InLayout
,
WeiLayout
,
OutLayout
,
InDataType
,
WeiDataType
,
OutDataType
>
(
true
,
// do_verification
1
,
// init_method: integer value
false
,
// do_log
false
,
// time_kernel
param
,
split_k
);
}
EXPECT_TRUE
(
pass
);
}
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeight1d
:
public
TestGroupedConvndBwdWeight
<
Tuple
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeight2d
:
public
TestGroupedConvndBwdWeight
<
Tuple
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeight3d
:
public
TestGroupedConvndBwdWeight
<
Tuple
>
{
};
using
namespace
ck
::
tensor_layout
::
convolution
;
using
KernelTypes1d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
float
,
float
,
GNWC
,
GKXC
,
GNWK
,
ck
::
Number
<
1
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
GNWC
,
GKXC
,
GNWK
,
ck
::
Number
<
1
>>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
GNWC
,
GKXC
,
GNWK
,
ck
::
Number
<
1
>>>
;
using
KernelTypes2d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
float
,
float
,
GNHWC
,
GKYXC
,
GNHWK
,
ck
::
Number
<
2
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
GNHWC
,
GKYXC
,
GNHWK
,
ck
::
Number
<
2
>>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
GNHWC
,
GKYXC
,
GNHWK
,
ck
::
Number
<
2
>>
,
std
::
tuple
<
float
,
float
,
float
,
NHWGC
,
GKYXC
,
NHWGK
,
ck
::
Number
<
2
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
NHWGC
,
GKYXC
,
NHWGK
,
ck
::
Number
<
2
>>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
NHWGC
,
GKYXC
,
NHWGK
,
ck
::
Number
<
2
>>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
float
,
float
,
GNDHWC
,
GKZYXC
,
GNDHWK
,
ck
::
Number
<
3
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
,
ck
::
Number
<
3
>>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
,
ck
::
Number
<
3
>>
,
std
::
tuple
<
float
,
float
,
float
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ck
::
Number
<
3
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ck
::
Number
<
3
>>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ck
::
Number
<
3
>>>
;
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeight1d
,
KernelTypes1d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeight2d
,
KernelTypes2d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeight3d
,
KernelTypes3d
);
TYPED_TEST
(
TestGroupedConvndBwdWeight1d
,
Test1D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
1
},
{
14
},
{
2
},
{
1
},
{
0
},
{
0
}});
this
->
conv_params
.
push_back
({
1
,
2
,
32
,
128
,
256
,
{
3
},
{
28
},
{
1
},
{
1
},
{
1
},
{
1
}});
this
->
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
1
},
{
3
},
{
1
},
{
1
},
{
0
},
{
0
}});
this
->
conv_params
.
push_back
({
1
,
1
,
1
,
1
,
32
,
{
3
},
{
32
},
{
1
},
{
1
},
{
1
},
{
1
}});
this
->
conv_params
.
push_back
({
1
,
1
,
1
,
64
,
3
,
{
3
},
{
32
},
{
1
},
{
1
},
{
1
},
{
1
}});
this
->
conv_params
.
push_back
({
1
,
1
,
1
,
1
,
1
,
{
3
},
{
32
},
{
1
},
{
1
},
{
1
},
{
1
}});
this
->
Run
();
}
TYPED_TEST
(
TestGroupedConvndBwdWeight2d
,
Test2D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
2
,
2
,
64
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
4
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
32
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
64
,
3
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
1
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
Run
();
}
TYPED_TEST
(
TestGroupedConvndBwdWeight3d
,
Test3D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
3
,
2
,
16
,
128
,
256
,
{
1
,
1
,
1
},
{
7
,
7
,
7
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
2
,
128
,
256
,
{
3
,
3
,
3
},
{
14
,
14
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
32
,
128
,
256
,
{
1
,
1
,
1
},
{
3
,
3
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
32
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
64
,
3
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
1
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
Run
();
}
test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface.cpp
0 → 100644
View file @
4100d1d8
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <tuple>
#include <vector>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include <gtest/gtest.h>
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
ConvolutionBackwardWeightSpecialization
=
ck
::
tensor_operation
::
device
::
ConvolutionBackwardWeightSpecialization
;
static
constexpr
auto
ConvBwdWeightDefault
=
ConvolutionBackwardWeightSpecialization
::
Default
;
static
constexpr
auto
Filter1x1Stride1Pad0
=
ConvolutionBackwardWeightSpecialization
::
Filter1x1Stride1Pad0
;
template
<
typename
Tuple
,
ConvolutionBackwardWeightSpecialization
ConvSpec
>
class
TestGroupedConvndBwdWeight
:
public
::
testing
::
Test
{
protected:
static
constexpr
ck
::
index_t
NDimSpatial
=
2
;
using
InLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
WeiLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
OutLayout
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
// clang-format off
using
GroupedConvBwdWeightDeviceInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvBwdWeight_Xdl_CShuffle
//##########| Num| InLayout| WeiLayout| OutLayout| InData| WeiData| OutData| AccData| In| Wei| Out| ConvBackward| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransfer| CBlockTransfer|
//##########| Dim| | | | Type| Type| Type| Type| Elementwise| Elementwise| Elementwise| Weight| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| ClusterLengths| ScalarPerVector|
//##########| Spatial| | | | | | | | Operation| Operation| Operation| Specialization| | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| MBlock_MPerBlock| NWaveNPerXdl|
//##########| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | NBlock_NPerBlock| |
<
NDimSpatial
,
InLayout
,
WeiLayout
,
OutLayout
,
F16
,
F16
,
F16
,
F32
,
PassThrough
,
PassThrough
,
PassThrough
,
ConvSpec
,
128
,
32
,
128
,
4
,
8
,
32
,
32
,
1
,
2
,
S
<
1
,
4
,
4
,
8
>
,
S
<
0
,
3
,
1
,
2
>
,
S
<
0
,
2
,
1
,
3
>
,
2
,
8
,
1
,
true
,
S
<
1
,
4
,
16
,
2
>
,
S
<
0
,
3
,
1
,
2
>
,
S
<
0
,
2
,
1
,
3
>
,
2
,
8
,
4
,
true
,
1
,
1
,
S
<
1
,
32
,
1
,
4
>
,
8
>
;
// clang-format on
ck
::
utils
::
conv
::
ConvParam
conv_param
;
ck
::
index_t
split_k
{
2
};
template
<
ck
::
index_t
NDimSpatial
>
bool
Run
()
{
const
auto
in_g_n_c_wis_desc
=
ck
::
utils
::
conv
::
make_input_host_tensor_descriptor_g_n_c_wis_packed
<
InLayout
>
(
conv_param
);
const
auto
wei_g_k_c_xs_desc
=
ck
::
utils
::
conv
::
make_weight_host_tensor_descriptor_g_k_c_xs_packed
<
WeiLayout
>
(
conv_param
);
const
auto
out_g_n_k_wos_desc
=
ck
::
utils
::
conv
::
make_output_host_tensor_descriptor_g_n_k_wos_packed
<
OutLayout
>
(
conv_param
);
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
input_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
filter_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
output_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
input_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
weights_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
output_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
{};
auto
range_copy
=
[](
const
auto
&
from
,
auto
to
)
{
std
::
copy
(
begin
(
from
),
end
(
from
),
to
);
};
range_copy
(
in_g_n_c_wis_desc
.
GetLengths
(),
begin
(
input_lengths
));
range_copy
(
in_g_n_c_wis_desc
.
GetStrides
(),
begin
(
input_strides
));
range_copy
(
wei_g_k_c_xs_desc
.
GetLengths
(),
begin
(
filter_lengths
));
range_copy
(
wei_g_k_c_xs_desc
.
GetStrides
(),
begin
(
weights_strides
));
range_copy
(
out_g_n_k_wos_desc
.
GetLengths
(),
begin
(
output_lengths
));
range_copy
(
out_g_n_k_wos_desc
.
GetStrides
(),
begin
(
output_strides
));
range_copy
(
conv_param
.
conv_filter_strides_
,
begin
(
conv_filter_strides
));
range_copy
(
conv_param
.
conv_filter_dilations_
,
begin
(
conv_filter_dilations
));
range_copy
(
conv_param
.
input_left_pads_
,
begin
(
input_left_pads
));
range_copy
(
conv_param
.
input_right_pads_
,
begin
(
input_right_pads
));
auto
conv
=
GroupedConvBwdWeightDeviceInstance
{};
auto
argument
=
conv
.
MakeArgument
(
nullptr
,
nullptr
,
nullptr
,
input_lengths
,
input_strides
,
filter_lengths
,
weights_strides
,
output_lengths
,
output_strides
,
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
input_right_pads
,
PassThrough
{},
PassThrough
{},
PassThrough
{},
split_k
);
return
conv
.
IsSupportedArgument
(
argument
);
}
};
using
GNHWC
=
ck
::
tensor_layout
::
convolution
::
GNHWC
;
using
NHWGC
=
ck
::
tensor_layout
::
convolution
::
NHWGC
;
using
GKYXC
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
GNHWK
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
NHWGK
=
ck
::
tensor_layout
::
convolution
::
NHWGK
;
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
NHWGK
,
GKYXC
,
NHWGC
>>
;
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeightDefault
:
public
TestGroupedConvndBwdWeight
<
Tuple
,
ConvBwdWeightDefault
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdWeightFilter1x1
:
public
TestGroupedConvndBwdWeight
<
Tuple
,
Filter1x1Stride1Pad0
>
{
};
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeightDefault
,
KernelTypes
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeightFilter1x1
,
KernelTypes
);
TYPED_TEST
(
TestGroupedConvndBwdWeightFilter1x1
,
SpecializationCheck
)
{
// Check filter 3,3 instead of 1,1
this
->
conv_param
=
{
2
,
2
,
4
,
192
,
192
,
{
3
,
3
},
{
28
,
28
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
bool
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// Check strides 2,2 instead of 1,1
this
->
conv_param
=
{
2
,
2
,
4
,
192
,
192
,
{
1
,
1
},
{
28
,
28
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// Check with pad
this
->
conv_param
=
{
2
,
2
,
4
,
192
,
192
,
{
1
,
1
},
{
28
,
28
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// Supported version
this
->
conv_param
=
{
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_TRUE
(
is_supported
);
}
TYPED_TEST
(
TestGroupedConvndBwdWeightDefault
,
VectorLoadCheck
)
{
// vector load for A
this
->
conv_param
=
{
2
,
2
,
128
,
129
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
bool
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
// vector load for B, E, Ds
this
->
conv_param
=
{
2
,
2
,
128
,
128
,
257
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}};
is_supported
=
this
->
template
Run
<
2
>();
EXPECT_FALSE
(
is_supported
);
}
test/grouped_convnd_fwd/grouped_convnd_fwd.cpp
View file @
4100d1d8
...
...
@@ -22,6 +22,8 @@ TEST_F(TestGroupedConvNdFwd, GroupedConv1dFwdGNWC)
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
1
},
{
14
},
{
2
},
{
1
},
{
0
},
{
0
}});
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
3
},
{
28
},
{
1
},
{
1
},
{
1
},
{
1
}});
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
1
},
{
3
},
{
1
},
{
1
},
{
0
},
{
0
}});
conv_params
.
push_back
({
1
,
1
,
1
,
1
,
32
,
{
3
},
{
32
},
{
1
},
{
1
},
{
1
},
{
1
}});
conv_params
.
push_back
({
1
,
1
,
1
,
64
,
3
,
{
3
},
{
32
},
{
1
},
{
1
},
{
1
},
{
1
}});
for
(
auto
&
param
:
conv_params
)
{
...
...
@@ -96,6 +98,9 @@ TEST_F(TestGroupedConvNdFwd, GroupedConv2dFwdGNHWC)
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
32
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
1
,
1
,
64
,
3
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
1
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
for
(
auto
&
param
:
conv_params
)
{
...
...
@@ -173,6 +178,12 @@ TEST_F(TestGroupedConvNdFwd, GroupedConv3dFwdGNDHWC)
{
3
,
2
,
128
,
128
,
256
,
{
3
,
3
,
3
},
{
14
,
14
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
conv_params
.
push_back
(
{
3
,
2
,
128
,
128
,
256
,
{
1
,
1
,
1
},
{
3
,
3
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
32
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
64
,
3
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
1
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
for
(
auto
&
param
:
conv_params
)
{
...
...
@@ -247,6 +258,9 @@ TEST_F(TestGroupedConvNdFwd, GroupedConv2dFwdNHWGC)
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
32
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
1
,
1
,
64
,
3
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
1
,
{
3
,
3
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
for
(
auto
&
param
:
conv_params
)
{
...
...
@@ -255,7 +269,7 @@ TEST_F(TestGroupedConvNdFwd, GroupedConv2dFwdNHWGC)
// fp16
pass
=
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWGC
,
ck
::
tensor_layout
::
convolution
::
KYX
G
C
,
ck
::
tensor_layout
::
convolution
::
G
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWGK
,
ck
::
half_t
,
ck
::
half_t
,
...
...
test/grouped_gemm/CMakeLists.txt
View file @
4100d1d8
if
(
DTYPES MATCHES
"fp16"
OR NOT DEFINED DTYPES
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
...
...
@@ -12,3 +13,4 @@ foreach(gpu IN LISTS GPU_TARGETS)
set
(
target 1
)
endif
()
endforeach
()
endif
()
test/grouped_gemm/test_grouped_gemm_interface.cpp
View file @
4100d1d8
...
...
@@ -108,7 +108,7 @@ TEST_F(TestGGemmSplitKInterface_MKNKMN, KLoops)
// kloops % 2
Ks
=
std
::
vector
<
int
>
{
256
,
512
,
320
,
768
};
EXPECT_
FALS
E
(
EXPECT_
TRU
E
(
DefaultGGemmInstance
{}.
IsSupported
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
kbatch
));
// Not all gemms have same value for main_k0_block_loop!
...
...
test/grouped_gemm/test_grouped_gemm_util.hpp
View file @
4100d1d8
...
...
@@ -147,14 +147,14 @@ struct DeviceGroupedGemmSplitkInstanceWrapper
32
,
4
,
2
,
S
<
1
,
4
,
32
,
1
>
,
S
<
1
,
4
,
16
,
1
>
,
ABlockTransferThreadClusterArrageOrder
,
ABlockTransferSrcAccessOrder
,
ABlockTransferSrcVectorDim
::
value
,
ABlockTransferSrcScalarPerVector
,
ABlockTransferDstScalarPerVector_K1
::
value
,
ABlockLdsAddExtraM
::
value
,
S
<
1
,
4
,
32
,
1
>
,
S
<
1
,
4
,
16
,
1
>
,
BBlockTransferThreadClusterArrageOrder
,
BBlockTransferSrcAccessOrder
,
BBlockTransferSrcVectorDim
::
value
,
...
...
test/normalization/CMakeLists.txt
View file @
4100d1d8
add_custom_target
(
test_normalization
)
add_gtest_executable
(
test_layernorm2d_fp32 test_layernorm2d_fp32.cpp
)
add_gtest_executable
(
test_layernorm2d_fp16 test_layernorm2d_fp16.cpp
)
add_gtest_executable
(
test_groupnorm_fp16 test_groupnorm_fp16.cpp
)
add_gtest_executable
(
test_groupnorm_fp32 test_groupnorm_fp32.cpp
)
target_link_libraries
(
test_layernorm2d_fp32 PRIVATE utility device_normalization_instance
)
target_link_libraries
(
test_layernorm2d_fp16 PRIVATE utility device_normalization_instance
)
target_link_libraries
(
test_groupnorm_fp16 PRIVATE utility device_normalization_instance
)
target_link_libraries
(
test_groupnorm_fp32 PRIVATE utility device_normalization_instance
)
add_dependencies
(
test_normalization test_layernorm2d_fp32
)
add_dependencies
(
test_normalization test_layernorm2d_fp16
)
add_dependencies
(
test_normalization test_groupnorm_fp16
)
add_dependencies
(
test_normalization test_groupnorm_fp32
)
if
(
DTYPES MATCHES
"fp16"
OR DTYPES MATCHES
"fp32"
OR NOT DEFINED DTYPES
)
add_custom_target
(
test_normalization
)
endif
()
if
(
DTYPES MATCHES
"fp32"
OR NOT DEFINED DTYPES
)
add_gtest_executable
(
test_layernorm2d_fp32 test_layernorm2d_fp32.cpp
)
add_gtest_executable
(
test_groupnorm_fp32 test_groupnorm_fp32.cpp
)
target_link_libraries
(
test_layernorm2d_fp32 PRIVATE utility device_normalization_instance
)
target_link_libraries
(
test_groupnorm_fp32 PRIVATE utility device_normalization_instance
)
add_dependencies
(
test_normalization test_layernorm2d_fp32
)
add_dependencies
(
test_normalization test_groupnorm_fp32
)
endif
()
if
(
DTYPES MATCHES
"fp16"
OR NOT DEFINED DTYPES
)
add_gtest_executable
(
test_layernorm2d_fp16 test_layernorm2d_fp16.cpp
)
add_gtest_executable
(
test_groupnorm_fp16 test_groupnorm_fp16.cpp
)
target_link_libraries
(
test_layernorm2d_fp16 PRIVATE utility device_normalization_instance
)
target_link_libraries
(
test_groupnorm_fp16 PRIVATE utility device_normalization_instance
)
add_dependencies
(
test_normalization test_layernorm2d_fp16
)
add_dependencies
(
test_normalization test_groupnorm_fp16
)
endif
()
Prev
1
…
26
27
28
29
30
31
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment