Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
6b9a4bd5
Commit
6b9a4bd5
authored
Apr 23, 2024
by
Jun Liu
Browse files
Merge branch 'amd-develop-staging-0423' into amd-master
parents
56de337f
c5f1cdf7
Changes
364
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
149 additions
and
145 deletions
+149
-145
test/batched_gemm_reduce/batched_gemm_reduce_fp16_xdl.cpp
test/batched_gemm_reduce/batched_gemm_reduce_fp16_xdl.cpp
+0
-0
test/batched_gemm_softmax_gemm/CMakeLists.txt
test/batched_gemm_softmax_gemm/CMakeLists.txt
+6
-13
test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16_xdl.cpp
..._softmax_gemm/test_batched_gemm_softmax_gemm_fp16_xdl.cpp
+0
-0
test/batched_gemm_softmax_gemm_permute/CMakeLists.txt
test/batched_gemm_softmax_gemm_permute/CMakeLists.txt
+21
-29
test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16_xdl.cpp
.../test_batched_gemm_bias_softmax_gemm_permute_bf16_xdl.cpp
+0
-0
test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16_xdl.cpp
.../test_batched_gemm_bias_softmax_gemm_permute_fp16_xdl.cpp
+0
-0
test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16_xdl.cpp
...rmute/test_batched_gemm_softmax_gemm_permute_bf16_xdl.cpp
+0
-0
test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16_xdl.cpp
...rmute/test_batched_gemm_softmax_gemm_permute_fp16_xdl.cpp
+0
-0
test/contraction/CMakeLists.txt
test/contraction/CMakeLists.txt
+9
-12
test/contraction/test_contraction_interface_xdl.cpp
test/contraction/test_contraction_interface_xdl.cpp
+1
-13
test/contraction/test_contraction_xdl.cpp
test/contraction/test_contraction_xdl.cpp
+87
-51
test/convnd_bwd_data/CMakeLists.txt
test/convnd_bwd_data/CMakeLists.txt
+3
-8
test/convnd_bwd_data/convnd_bwd_data_xdl.cpp
test/convnd_bwd_data/convnd_bwd_data_xdl.cpp
+0
-0
test/convnd_fwd/CMakeLists.txt
test/convnd_fwd/CMakeLists.txt
+3
-8
test/convnd_fwd/convnd_fwd_xdl.cpp
test/convnd_fwd/convnd_fwd_xdl.cpp
+0
-0
test/gemm_add/CMakeLists.txt
test/gemm_add/CMakeLists.txt
+16
-8
test/gemm_add/test_gemm_add_fastgelu_xdl.cpp
test/gemm_add/test_gemm_add_fastgelu_xdl.cpp
+1
-1
test/gemm_add/test_gemm_add_relu_xdl.cpp
test/gemm_add/test_gemm_add_relu_xdl.cpp
+1
-1
test/gemm_add/test_gemm_add_silu_xdl.cpp
test/gemm_add/test_gemm_add_silu_xdl.cpp
+1
-1
test/gemm_add/test_gemm_add_xdl.hpp
test/gemm_add/test_gemm_add_xdl.hpp
+0
-0
No files found.
test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp
→
test/batched_gemm_reduce/batched_gemm_reduce_fp16
_xdl
.cpp
View file @
6b9a4bd5
File moved
test/batched_gemm_softmax_gemm/CMakeLists.txt
View file @
6b9a4bd5
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
add_custom_target
(
test_batched_gemm_softmax_gemm
)
add_gtest_executable
(
test_batched_gemm_softmax_gemm_fp16 test_batched_gemm_softmax_gemm_fp16.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_batched_gemm_softmax_gemm_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_instance
)
add_dependencies
(
test_batched_gemm_softmax_gemm test_batched_gemm_softmax_gemm_fp16
)
set
(
target 1
)
endif
()
endif
()
endforeach
()
\ No newline at end of file
add_gtest_executable
(
test_batched_gemm_softmax_gemm_fp16 test_batched_gemm_softmax_gemm_fp16_xdl.cpp
)
if
(
result EQUAL 0
)
add_custom_target
(
test_batched_gemm_softmax_gemm
)
target_link_libraries
(
test_batched_gemm_softmax_gemm_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_instance
)
add_dependencies
(
test_batched_gemm_softmax_gemm test_batched_gemm_softmax_gemm_fp16
)
endif
()
test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16.cpp
→
test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16
_xdl
.cpp
View file @
6b9a4bd5
File moved
test/batched_gemm_softmax_gemm_permute/CMakeLists.txt
View file @
6b9a4bd5
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
add_custom_target
(
test_batched_gemm_softmax_gemm_permute
)
add_gtest_executable
(
test_batched_gemm_softmax_gemm_permute_fp16 test_batched_gemm_softmax_gemm_permute_fp16.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_batched_gemm_softmax_gemm_permute_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance
)
add_dependencies
(
test_batched_gemm_softmax_gemm_permute test_batched_gemm_softmax_gemm_permute_fp16
)
endif
()
add_gtest_executable
(
test_batched_gemm_bias_softmax_gemm_permute_fp16 test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_batched_gemm_bias_softmax_gemm_permute_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance
)
add_dependencies
(
test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_fp16
)
endif
()
add_gtest_executable
(
test_batched_gemm_softmax_gemm_permute_bf16 test_batched_gemm_softmax_gemm_permute_bf16.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_batched_gemm_softmax_gemm_permute_bf16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance
)
add_dependencies
(
test_batched_gemm_softmax_gemm_permute test_batched_gemm_softmax_gemm_permute_bf16
)
endif
()
add_gtest_executable
(
test_batched_gemm_bias_softmax_gemm_permute_bf16 test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_batched_gemm_bias_softmax_gemm_permute_bf16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance
)
add_dependencies
(
test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_bf16
)
endif
()
set
(
target 1
)
endif
()
endforeach
()
\ No newline at end of file
add_custom_target
(
test_batched_gemm_softmax_gemm_permute
)
add_gtest_executable
(
test_batched_gemm_softmax_gemm_permute_fp16 test_batched_gemm_softmax_gemm_permute_fp16_xdl.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_batched_gemm_softmax_gemm_permute_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance
)
add_dependencies
(
test_batched_gemm_softmax_gemm_permute test_batched_gemm_softmax_gemm_permute_fp16
)
endif
()
add_gtest_executable
(
test_batched_gemm_bias_softmax_gemm_permute_fp16 test_batched_gemm_bias_softmax_gemm_permute_fp16_xdl.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_batched_gemm_bias_softmax_gemm_permute_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance
)
add_dependencies
(
test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_fp16
)
endif
()
add_gtest_executable
(
test_batched_gemm_softmax_gemm_permute_bf16 test_batched_gemm_softmax_gemm_permute_bf16_xdl.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_batched_gemm_softmax_gemm_permute_bf16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance
)
add_dependencies
(
test_batched_gemm_softmax_gemm_permute test_batched_gemm_softmax_gemm_permute_bf16
)
endif
()
add_gtest_executable
(
test_batched_gemm_bias_softmax_gemm_permute_bf16 test_batched_gemm_bias_softmax_gemm_permute_bf16_xdl.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_batched_gemm_bias_softmax_gemm_permute_bf16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance
)
add_dependencies
(
test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_bf16
)
endif
()
test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp
→
test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16
_xdl
.cpp
View file @
6b9a4bd5
File moved
test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp
→
test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16
_xdl
.cpp
View file @
6b9a4bd5
File moved
test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp
→
test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16
_xdl
.cpp
View file @
6b9a4bd5
File moved
test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp
→
test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16
_xdl
.cpp
View file @
6b9a4bd5
File moved
test/contraction/CMakeLists.txt
View file @
6b9a4bd5
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
((
DTYPES MATCHES
"fp32"
OR DTYPES MATCHES
"fp64"
)
OR NOT DEFINED DTYPES
)
add_gtest_executable
(
test_contraction test_contraction.cpp
)
target_link_libraries
(
test_contraction PRIVATE utility device_contraction_bilinear_instance device_contraction_scale_instance
)
add_gtest_executable
(
test_contraction_interface test_contraction_interface.cpp
)
target_link_libraries
(
test_contraction_interface PRIVATE utility device_contraction_bilinear_instance device_contraction_scale_instance
)
set
(
target 1
)
endif
()
if
((
DTYPES MATCHES
"fp32"
OR DTYPES MATCHES
"fp64"
)
OR NOT DEFINED DTYPES
)
add_gtest_executable
(
test_contraction test_contraction_xdl.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_contraction PRIVATE utility device_contraction_bilinear_instance device_contraction_scale_instance
)
endif
()
endforeach
()
add_gtest_executable
(
test_contraction_interface test_contraction_interface_xdl.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_contraction_interface PRIVATE utility device_contraction_bilinear_instance device_contraction_scale_instance
)
endif
()
endif
()
test/contraction/test_contraction_interface.cpp
→
test/contraction/test_contraction_interface
_xdl
.cpp
View file @
6b9a4bd5
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023
-2024
, Advanced Micro Devices, Inc. All rights reserved.
#include <stdexcept>
#include <vector>
...
...
@@ -125,18 +125,6 @@ class ContractionDeviceOpWrapper
}
};
TEST
(
TestContractionInterface
,
IncorrectNumDims
)
{
std
::
vector
<
std
::
vector
<
ck
::
index_t
>>
Dims
=
{{
4
,
4
},
{
4
,
4
,
4
,
4
},
{
4
,
4
,
4
,
4
,
4
,
4
}};
std
::
vector
<
std
::
vector
<
ck
::
index_t
>>
Strides
=
{{
1
,
1
},
{
1
,
1
,
1
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
}};
ContractionDeviceOpWrapper
<
F32
,
F32
,
F32
,
F32
,
1
>
wrapper_1d
;
ContractionDeviceOpWrapper
<
F32
,
F32
,
F32
,
F32
,
2
>
wrapper_2d
;
ContractionDeviceOpWrapper
<
F32
,
F32
,
F32
,
F32
,
3
>
wrapper_3d
;
EXPECT_FALSE
(
wrapper_1d
.
IsSupportedInstance
(
Dims
[
0
],
Strides
[
0
]));
EXPECT_TRUE
(
wrapper_2d
.
IsSupportedInstance
(
Dims
[
1
],
Strides
[
1
]));
EXPECT_FALSE
(
wrapper_3d
.
IsSupportedInstance
(
Dims
[
2
],
Strides
[
2
]));
}
TEST
(
TestContractionInterface
,
IncorrectDataTypes
)
{
std
::
vector
<
ck
::
index_t
>
Dims
=
{
4
,
4
,
4
,
4
};
...
...
test/contraction/test_contraction.cpp
→
test/contraction/test_contraction
_xdl
.cpp
View file @
6b9a4bd5
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023
-2024
, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
...
...
@@ -23,8 +23,11 @@ using Col = ck::tensor_layout::gemm::ColumnMajor;
using
Bilinear
=
ck
::
tensor_operation
::
element_wise
::
Bilinear
;
using
Scale
=
ck
::
tensor_operation
::
element_wise
::
Scale
;
template
<
ck
::
index_t
NDims
>
struct
Dimensions
{
constexpr
static
ck
::
index_t
NumDimMNK
=
NDims
;
std
::
vector
<
ck
::
index_t
>
M
;
std
::
vector
<
ck
::
index_t
>
N
;
std
::
vector
<
ck
::
index_t
>
K
;
...
...
@@ -42,53 +45,58 @@ class TestContraction : public ::testing::Test
using
ComputeDataType
=
std
::
tuple_element_t
<
5
,
Tuple
>
;
using
CDElementOp
=
std
::
tuple_element_t
<
6
,
Tuple
>
;
std
::
vector
<
Dimensions
>
dimension_list
=
{{{
32
,
32
},
{
32
,
32
},
{
32
,
32
}},
{{
16
,
16
},
{
32
,
32
},
{
16
,
16
}}};
std
::
vector
<
ck
::
index_t
>
init_methods
=
{
1
,
2
};
std
::
unique_ptr
<
CDElementOp
>
p_cd_element_op
;
void
Run
()
template
<
ck
::
index_t
NumDim
>
void
Run
(
Dimensions
<
NumDim
>
dimension_params
)
{
for
(
auto
&
dimension_params
:
dimension_list
)
constexpr
ck
::
index_t
NumDimMNK
=
ck
::
remove_cvref_t
<
decltype
(
dimension_params
)
>::
NumDimMNK
;
std
::
vector
<
ck
::
index_t
>
StridesA
(
2
*
NumDim
);
std
::
vector
<
ck
::
index_t
>
StridesB
(
2
*
NumDim
);
std
::
vector
<
ck
::
index_t
>
StridesC
(
2
*
NumDim
);
std
::
vector
<
ck
::
index_t
>
StridesD
(
2
*
NumDim
);
const
auto
&
M
=
dimension_params
.
M
;
const
auto
&
N
=
dimension_params
.
N
;
const
auto
&
K
=
dimension_params
.
K
;
auto
merge_dims
=
[](
const
std
::
vector
<
ck
::
index_t
>&
dims01
,
const
std
::
vector
<
ck
::
index_t
>&
dims23
)
{
std
::
vector
<
ck
::
index_t
>
dims_szt
(
dims01
.
begin
(),
dims01
.
end
());
dims_szt
.
insert
(
dims_szt
.
end
(),
dims23
.
begin
(),
dims23
.
end
());
return
dims_szt
;
};
assign_default_strides
(
ALayout
{},
StridesA
,
merge_dims
(
M
,
K
));
assign_default_strides
(
BLayout
{},
StridesB
,
merge_dims
(
N
,
K
));
assign_default_strides
(
CDLayout
{},
StridesC
,
merge_dims
(
M
,
N
));
assign_default_strides
(
CDLayout
{},
StridesD
,
merge_dims
(
M
,
N
));
for
(
const
ck
::
index_t
init_method
:
init_methods
)
{
std
::
vector
<
ck
::
index_t
>
StridesA
;
std
::
vector
<
ck
::
index_t
>
StridesB
;
std
::
vector
<
ck
::
index_t
>
StridesC
;
std
::
vector
<
ck
::
index_t
>
StridesD
;
const
auto
&
M
=
dimension_params
.
M
;
const
auto
&
N
=
dimension_params
.
N
;
const
auto
&
K
=
dimension_params
.
K
;
assign_default_strides
(
ALayout
{},
StridesA
,
{
M
[
0
],
M
[
1
],
K
[
0
],
K
[
1
]});
assign_default_strides
(
BLayout
{},
StridesB
,
{
N
[
0
],
N
[
1
],
K
[
0
],
K
[
1
]});
assign_default_strides
(
CDLayout
{},
StridesC
,
{
M
[
0
],
M
[
1
],
N
[
0
],
N
[
1
]});
assign_default_strides
(
CDLayout
{},
StridesD
,
{
M
[
0
],
M
[
1
],
N
[
0
],
N
[
1
]});
for
(
const
ck
::
index_t
init_method
:
init_methods
)
{
bool
pass
=
ck
::
profiler
::
profile_contraction_impl
<
ALayout
,
BLayout
,
CDLayout
,
DataType
,
ComputeDataType
,
DTupleDataType
,
CDElementOp
>
(
true
/*do_verification*/
,
init_method
,
false
/*do_logs*/
,
false
/*time_kernel*/
,
*
p_cd_element_op
,
dimension_params
.
M
,
dimension_params
.
N
,
dimension_params
.
K
,
StridesA
,
StridesB
,
StridesC
,
StridesD
);
EXPECT_TRUE
(
pass
);
}
bool
pass
=
ck
::
profiler
::
profile_contraction_impl
<
NumDimMNK
,
ALayout
,
BLayout
,
CDLayout
,
DataType
,
ComputeDataType
,
DTupleDataType
,
CDElementOp
>
(
true
/*do_verification*/
,
init_method
,
false
/*do_logs*/
,
false
/*time_kernel*/
,
*
p_cd_element_op
,
dimension_params
.
M
,
dimension_params
.
N
,
dimension_params
.
K
,
StridesA
,
StridesB
,
StridesC
,
StridesD
);
EXPECT_TRUE
(
pass
);
}
}
};
...
...
@@ -122,17 +130,31 @@ TYPED_TEST_SUITE(TestContractionScale, ScaleKernelTypes);
TYPED_TEST
(
TestContractionBilinear
,
bilinear
)
{
this
->
p_cd_element_op
=
std
::
make_unique
<
Bilinear
>
(
1.
f
,
1.
f
);
this
->
Run
();
this
->
template
Run
<
6
>({{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
2
,
2
,
2
,
2
,
4
}});
this
->
template
Run
<
6
>({{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
2
,
2
,
4
}});
this
->
template
Run
<
2
>({{
16
,
8
},
{
16
,
8
},
{
16
,
8
}});
this
->
template
Run
<
2
>({{
8
,
16
},
{
16
,
8
},
{
8
,
16
}});
this
->
p_cd_element_op
=
std
::
make_unique
<
Bilinear
>
(
-
0.5
f
,
0.5
f
);
this
->
Run
();
this
->
template
Run
<
6
>({{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
2
,
2
,
2
,
2
,
4
}});
this
->
template
Run
<
6
>({{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
2
,
2
,
4
}});
this
->
template
Run
<
2
>({{
16
,
8
},
{
16
,
8
},
{
16
,
8
}});
this
->
template
Run
<
2
>({{
8
,
16
},
{
16
,
8
},
{
8
,
16
}});
}
TYPED_TEST
(
TestContractionScale
,
scale
)
{
this
->
p_cd_element_op
=
std
::
make_unique
<
Scale
>
(
1.
f
);
this
->
Run
();
this
->
template
Run
<
6
>({{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
2
,
2
,
2
,
2
,
4
}});
this
->
template
Run
<
6
>({{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
2
,
2
,
4
}});
this
->
template
Run
<
2
>({{
16
,
8
},
{
16
,
8
},
{
16
,
8
}});
this
->
template
Run
<
2
>({{
8
,
16
},
{
16
,
8
},
{
8
,
16
}});
this
->
p_cd_element_op
=
std
::
make_unique
<
Scale
>
(
0.5
f
);
this
->
Run
();
this
->
template
Run
<
6
>({{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
2
,
2
,
2
,
2
,
4
}});
this
->
template
Run
<
6
>({{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
2
,
2
,
4
}});
this
->
template
Run
<
2
>({{
16
,
8
},
{
16
,
8
},
{
16
,
8
}});
this
->
template
Run
<
2
>({{
8
,
16
},
{
16
,
8
},
{
8
,
16
}});
}
template
<
typename
Tuple
>
...
...
@@ -165,15 +187,29 @@ TYPED_TEST_SUITE(TestContractionScaleMixedPrecision, ScaleKernelTypesMixedPrecis
TYPED_TEST
(
TestContractionBilinearMixedPrecision
,
bilinear
)
{
this
->
p_cd_element_op
=
std
::
make_unique
<
Bilinear
>
(
1.
f
,
1.
f
);
this
->
Run
();
this
->
template
Run
<
6
>({{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
2
,
2
,
2
,
2
,
4
}});
this
->
template
Run
<
6
>({{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
2
,
2
,
4
}});
this
->
template
Run
<
2
>({{
16
,
8
},
{
16
,
8
},
{
16
,
8
}});
this
->
template
Run
<
2
>({{
8
,
16
},
{
16
,
8
},
{
8
,
16
}});
this
->
p_cd_element_op
=
std
::
make_unique
<
Bilinear
>
(
-
0.5
f
,
0.5
f
);
this
->
Run
();
this
->
template
Run
<
6
>({{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
2
,
2
,
2
,
2
,
4
}});
this
->
template
Run
<
6
>({{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
2
,
2
,
4
}});
this
->
template
Run
<
2
>({{
16
,
8
},
{
16
,
8
},
{
16
,
8
}});
this
->
template
Run
<
2
>({{
8
,
16
},
{
16
,
8
},
{
8
,
16
}});
}
TYPED_TEST
(
TestContractionScaleMixedPrecision
,
scale
)
{
this
->
p_cd_element_op
=
std
::
make_unique
<
Scale
>
(
1.
f
);
this
->
Run
();
this
->
template
Run
<
6
>({{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
2
,
2
,
2
,
2
,
4
}});
this
->
template
Run
<
6
>({{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
2
,
2
,
4
}});
this
->
template
Run
<
2
>({{
16
,
8
},
{
16
,
8
},
{
16
,
8
}});
this
->
template
Run
<
2
>({{
8
,
16
},
{
16
,
8
},
{
8
,
16
}});
this
->
p_cd_element_op
=
std
::
make_unique
<
Scale
>
(
0.5
f
);
this
->
Run
();
this
->
template
Run
<
6
>({{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
3
,
2
,
3
,
2
,
3
},
{
2
,
2
,
2
,
2
,
2
,
4
}});
this
->
template
Run
<
6
>({{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
3
,
2
,
3
},
{
1
,
1
,
1
,
2
,
2
,
4
}});
this
->
template
Run
<
2
>({{
16
,
8
},
{
16
,
8
},
{
16
,
8
}});
this
->
template
Run
<
2
>({{
8
,
16
},
{
16
,
8
},
{
8
,
16
}});
}
test/convnd_bwd_data/CMakeLists.txt
View file @
6b9a4bd5
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
add_gtest_executable
(
test_convnd_bwd_data convnd_bwd_data.cpp
)
add_gtest_executable
(
test_convnd_bwd_data convnd_bwd_data_xdl.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_convnd_bwd_data PRIVATE utility device_conv1d_bwd_data_instance device_conv2d_bwd_data_instance device_conv3d_bwd_data_instance
)
set
(
target 1
)
endif
()
endforeach
()
\ No newline at end of file
endif
()
test/convnd_bwd_data/convnd_bwd_data.cpp
→
test/convnd_bwd_data/convnd_bwd_data
_xdl
.cpp
View file @
6b9a4bd5
File moved
test/convnd_fwd/CMakeLists.txt
View file @
6b9a4bd5
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
add_gtest_executable
(
test_convnd_fwd convnd_fwd.cpp
)
add_gtest_executable
(
test_convnd_fwd convnd_fwd_xdl.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_convnd_fwd PRIVATE utility device_conv2d_fwd_instance
)
set
(
target 1
)
endif
()
endforeach
()
endif
()
test/convnd_fwd/convnd_fwd.cpp
→
test/convnd_fwd/convnd_fwd
_xdl
.cpp
View file @
6b9a4bd5
File moved
test/gemm_add/CMakeLists.txt
View file @
6b9a4bd5
add_gtest_executable
(
test_gemm_add test_gemm_add.hpp
)
target_link_libraries
(
test_gemm_add PRIVATE utility device_gemm_add_instance
)
add_gtest_executable
(
test_gemm_add test_gemm_add_xdl.hpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_gemm_add PRIVATE utility device_gemm_add_instance
)
endif
()
add_gtest_executable
(
test_gemm_add_relu test_gemm_add_relu.cpp
)
target_link_libraries
(
test_gemm_add_relu PRIVATE utility device_gemm_add_instance device_gemm_add_relu_instance
)
add_gtest_executable
(
test_gemm_add_relu test_gemm_add_relu_xdl.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_gemm_add_relu PRIVATE utility device_gemm_add_instance device_gemm_add_relu_instance
)
endif
()
add_gtest_executable
(
test_gemm_add_silu test_gemm_add_silu.cpp
)
target_link_libraries
(
test_gemm_add_silu PRIVATE utility device_gemm_add_instance device_gemm_add_silu_instance
)
add_gtest_executable
(
test_gemm_add_silu test_gemm_add_silu_xdl.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_gemm_add_silu PRIVATE utility device_gemm_add_instance device_gemm_add_silu_instance
)
endif
()
add_gtest_executable
(
test_gemm_add_fastgelu test_gemm_add_fastgelu.cpp
)
target_link_libraries
(
test_gemm_add_fastgelu PRIVATE utility device_gemm_add_instance device_gemm_add_fastgelu_instance
)
add_gtest_executable
(
test_gemm_add_fastgelu test_gemm_add_fastgelu_xdl.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_gemm_add_fastgelu PRIVATE utility device_gemm_add_instance device_gemm_add_fastgelu_instance
)
endif
()
test/gemm_add/test_gemm_add_fastgelu.cpp
→
test/gemm_add/test_gemm_add_fastgelu
_xdl
.cpp
View file @
6b9a4bd5
...
...
@@ -4,7 +4,7 @@
#include "gtest/gtest.h"
#include "ck/ck.hpp"
#include "profiler/profile_gemm_add_fastgelu_impl.hpp"
#include "test_gemm_add.hpp"
#include "test_gemm_add
_xdl
.hpp"
template
<
typename
Tuple
>
class
TestGemmAddFastgelu
:
public
TestGemmAdd
<
Tuple
>
...
...
test/gemm_add/test_gemm_add_relu.cpp
→
test/gemm_add/test_gemm_add_relu
_xdl
.cpp
View file @
6b9a4bd5
...
...
@@ -4,7 +4,7 @@
#include "gtest/gtest.h"
#include "ck/ck.hpp"
#include "profiler/profile_gemm_add_relu_impl.hpp"
#include "test_gemm_add.hpp"
#include "test_gemm_add
_xdl
.hpp"
template
<
typename
Tuple
>
class
TestGemmAddRelu
:
public
TestGemmAdd
<
Tuple
>
...
...
test/gemm_add/test_gemm_add_silu.cpp
→
test/gemm_add/test_gemm_add_silu
_xdl
.cpp
View file @
6b9a4bd5
...
...
@@ -4,7 +4,7 @@
#include "gtest/gtest.h"
#include "ck/ck.hpp"
#include "profiler/profile_gemm_add_silu_impl.hpp"
#include "test_gemm_add.hpp"
#include "test_gemm_add
_xdl
.hpp"
template
<
typename
Tuple
>
class
TestGemmAddSilu
:
public
TestGemmAdd
<
Tuple
>
...
...
test/gemm_add/test_gemm_add.hpp
→
test/gemm_add/test_gemm_add
_xdl
.hpp
View file @
6b9a4bd5
File moved
Prev
1
…
13
14
15
16
17
18
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment