Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
667cd6ab
Commit
667cd6ab
authored
Nov 05, 2024
by
illsilin
Browse files
merge from public repo
parents
7d50244e
365f39ae
Changes
121
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
386 additions
and
16 deletions
+386
-16
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_int8_instance.cpp
..._fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_int8_instance.cpp
+39
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_int8_mem_inter_instance.cpp
...v2d_fwd_xdl_ngchw_gkyxc_ngkhw_int8_mem_inter_instance.cpp
+39
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_int8_mem_intra_instance.cpp
...v2d_fwd_xdl_ngchw_gkyxc_ngkhw_int8_mem_intra_instance.cpp
+39
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instance.cpp
...v2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instance.cpp
+66
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instance.cpp
...v2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instance.cpp
+66
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_ngchw_gkyxc_ngkhw_int8_instance.cpp
...fwd_xdl_merged_groups_ngchw_gkyxc_ngkhw_int8_instance.cpp
+48
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_int8_instance.cpp
...fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_int8_instance.cpp
+48
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp
...2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp
+3
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp
...v2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp
+3
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instance.cpp
...v2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instance.cpp
+3
-0
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instance.cpp
...2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instance.cpp
+3
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
...fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
+3
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
..._fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
+3
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
..._fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
+3
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
...fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
+3
-0
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
+0
-5
profiler/src/profile_gemm_universal.cpp
profiler/src/profile_gemm_universal.cpp
+7
-1
test/CMakeLists.txt
test/CMakeLists.txt
+6
-6
test/gemm_universal/test_gemm_universal_xdl.cpp
test/gemm_universal/test_gemm_universal_xdl.cpp
+2
-2
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
+2
-2
No files found.
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_int8_instance.cpp
0 → 100644
View file @
667cd6ab
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_large_tensor_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void
add_device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
2
,
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
int8_t
,
int8_t
,
Empty_Tuple
,
int8_t
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_large_tensor_int8_instances
<
2
,
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
ConvFwdDefault
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_int8_mem_inter_instance.cpp
0 → 100644
View file @
667cd6ab
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void
add_device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_int8_mem_inter_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
2
,
NGCHW
,
GKYXC
,
Empty_Tuple
,
NGKHW
,
int8_t
,
int8_t
,
Empty_Tuple
,
int8_t
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_int8_mem_instances
<
2
,
NGCHW
,
GKYXC
,
Empty_Tuple
,
NGKHW
,
ConvFwdDefault
,
Interwave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_int8_mem_intra_instance.cpp
0 → 100644
View file @
667cd6ab
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void
add_device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_int8_mem_intra_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
2
,
NGCHW
,
GKYXC
,
Empty_Tuple
,
NGKHW
,
int8_t
,
int8_t
,
Empty_Tuple
,
int8_t
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_int8_mem_instances
<
2
,
NGCHW
,
GKYXC
,
Empty_Tuple
,
NGKHW
,
ConvFwdDefault
,
Intrawave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instance.cpp
0 → 100644
View file @
667cd6ab
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void
add_device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
2
,
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
int8_t
,
int8_t
,
Empty_Tuple
,
int8_t
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_int8_mem_instances
<
2
,
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
ConvFwdDefault
,
Interwave
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_int8_mem_instances
<
2
,
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
ConvFwd1x1P0
,
Interwave
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_int8_mem_instances
<
2
,
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
ConvFwd1x1S1P0
,
Interwave
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_int8_mem_instances
<
2
,
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
ConvFwdOddC
,
Interwave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instance.cpp
0 → 100644
View file @
667cd6ab
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void
add_device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
2
,
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
int8_t
,
int8_t
,
Empty_Tuple
,
int8_t
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_int8_mem_instances
<
2
,
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
ConvFwdDefault
,
Intrawave
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_int8_mem_instances
<
2
,
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
ConvFwd1x1P0
,
Intrawave
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_int8_mem_instances
<
2
,
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
ConvFwd1x1S1P0
,
Intrawave
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_int8_mem_instances
<
2
,
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
ConvFwdOddC
,
Intrawave
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_ngchw_gkyxc_ngkhw_int8_instance.cpp
0 → 100644
View file @
667cd6ab
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_merged_groups_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void
add_device_grouped_conv2d_fwd_xdl_merged_groups_ngchw_gkyxc_ngkhw_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
2
,
NGCHW
,
GKYXC
,
Empty_Tuple
,
NGKHW
,
int8_t
,
int8_t
,
Empty_Tuple
,
int8_t
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_merged_groups_int8_instances
<
2
,
NGCHW
,
GKYXC
,
Empty_Tuple
,
NGKHW
,
ConvFwdDefault
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_merged_groups_int8_instances
<
2
,
NGCHW
,
GKYXC
,
Empty_Tuple
,
NGKHW
,
ConvFwd3x3
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_int8_instance.cpp
0 → 100644
View file @
667cd6ab
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_merged_groups_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void
add_device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
2
,
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
int8_t
,
int8_t
,
Empty_Tuple
,
int8_t
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_merged_groups_int8_instances
<
2
,
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
ConvFwdDefault
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_merged_groups_int8_instances
<
2
,
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
ConvFwd3x3
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp
View file @
667cd6ab
...
@@ -31,6 +31,8 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instanc
...
@@ -31,6 +31,8 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instanc
Tuple
<>
,
Tuple
<>
,
NHWGK
,
NHWGK
,
ConvFwdDefault
>
{});
ConvFwdDefault
>
{});
#if 0 // Enable with dynamic op optimizations (at now generating a lot of virtual functions cause
// long compilation time)
add_device_operation_instances(
add_device_operation_instances(
instances,
instances,
device_grouped_conv_fwd_xdl_dynamic_op_bf16_instances<2,
device_grouped_conv_fwd_xdl_dynamic_op_bf16_instances<2,
...
@@ -47,6 +49,7 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instanc
...
@@ -47,6 +49,7 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instanc
Tuple<>,
Tuple<>,
NHWGK,
NHWGK,
ConvFwd1x1S1P0>{});
ConvFwd1x1S1P0>{});
#endif
}
}
}
// namespace instance
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp
View file @
667cd6ab
...
@@ -31,6 +31,8 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance
...
@@ -31,6 +31,8 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance
Tuple
<>
,
Tuple
<>
,
NHWGK
,
NHWGK
,
ConvFwdDefault
>
{});
ConvFwdDefault
>
{});
#if 0 // Enable with dynamic op optimizations (at now generating a lot of virtual functions cause
// long compilation time)
add_device_operation_instances(
add_device_operation_instances(
instances,
instances,
device_grouped_conv_fwd_xdl_dynamic_op_f16_instances<2,
device_grouped_conv_fwd_xdl_dynamic_op_f16_instances<2,
...
@@ -47,6 +49,7 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance
...
@@ -47,6 +49,7 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance
Tuple<>,
Tuple<>,
NHWGK,
NHWGK,
ConvFwd1x1S1P0>{});
ConvFwd1x1S1P0>{});
#endif
}
}
}
// namespace instance
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instance.cpp
View file @
667cd6ab
...
@@ -31,6 +31,8 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instance
...
@@ -31,6 +31,8 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instance
Tuple
<>
,
Tuple
<>
,
NHWGK
,
NHWGK
,
ConvFwdDefault
>
{});
ConvFwdDefault
>
{});
#if 0 // Enable with dynamic op optimizations (at now generating a lot of virtual functions cause
// long compilation time)
add_device_operation_instances(
add_device_operation_instances(
instances,
instances,
device_grouped_conv_fwd_xdl_dynamic_op_f32_instances<2,
device_grouped_conv_fwd_xdl_dynamic_op_f32_instances<2,
...
@@ -47,6 +49,7 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instance
...
@@ -47,6 +49,7 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instance
Tuple<>,
Tuple<>,
NHWGK,
NHWGK,
ConvFwd1x1S1P0>{});
ConvFwd1x1S1P0>{});
#endif
}
}
}
// namespace instance
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instance.cpp
View file @
667cd6ab
...
@@ -30,6 +30,8 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instanc
...
@@ -30,6 +30,8 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instanc
Tuple
<>
,
Tuple
<>
,
NHWGK
,
NHWGK
,
ConvFwdDefault
>
{});
ConvFwdDefault
>
{});
#if 0 // Enable with dynamic op optimizations (at now generating a lot of virtual functions cause
// long compilation time)
add_device_operation_instances(
add_device_operation_instances(
instances,
instances,
device_grouped_conv_fwd_xdl_dynamic_op_int8_instances<2,
device_grouped_conv_fwd_xdl_dynamic_op_int8_instances<2,
...
@@ -46,6 +48,7 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instanc
...
@@ -46,6 +48,7 @@ void add_device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instanc
Tuple<>,
Tuple<>,
NHWGK,
NHWGK,
ConvFwd1x1S1P0>{});
ConvFwd1x1S1P0>{});
#endif
}
}
}
// namespace instance
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
View file @
667cd6ab
...
@@ -31,6 +31,8 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_inst
...
@@ -31,6 +31,8 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_inst
Tuple
<>
,
Tuple
<>
,
NDHWGK
,
NDHWGK
,
ConvFwdDefault
>
{});
ConvFwdDefault
>
{});
#if 0 // Enable with dynamic op optimizations (at now generating a lot of virtual functions cause
// long compilation time)
add_device_operation_instances(
add_device_operation_instances(
instances,
instances,
device_grouped_conv_fwd_xdl_dynamic_op_bf16_instances<3,
device_grouped_conv_fwd_xdl_dynamic_op_bf16_instances<3,
...
@@ -47,6 +49,7 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_inst
...
@@ -47,6 +49,7 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_inst
Tuple<>,
Tuple<>,
NDHWGK,
NDHWGK,
ConvFwd1x1S1P0>{});
ConvFwd1x1S1P0>{});
#endif
}
}
}
// namespace instance
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
View file @
667cd6ab
...
@@ -31,6 +31,8 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_insta
...
@@ -31,6 +31,8 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_insta
Tuple
<>
,
Tuple
<>
,
NDHWGK
,
NDHWGK
,
ConvFwdDefault
>
{});
ConvFwdDefault
>
{});
#if 0 // Enable with dynamic op optimizations (at now generating a lot of virtual functions cause
// long compilation time)
add_device_operation_instances(
add_device_operation_instances(
instances,
instances,
device_grouped_conv_fwd_xdl_dynamic_op_f16_instances<3,
device_grouped_conv_fwd_xdl_dynamic_op_f16_instances<3,
...
@@ -47,6 +49,7 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_insta
...
@@ -47,6 +49,7 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_insta
Tuple<>,
Tuple<>,
NDHWGK,
NDHWGK,
ConvFwd1x1S1P0>{});
ConvFwd1x1S1P0>{});
#endif
}
}
}
// namespace instance
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
View file @
667cd6ab
...
@@ -31,6 +31,8 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_insta
...
@@ -31,6 +31,8 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_insta
Tuple
<>
,
Tuple
<>
,
NDHWGK
,
NDHWGK
,
ConvFwdDefault
>
{});
ConvFwdDefault
>
{});
#if 0 // Enable with dynamic op optimizations (at now generating a lot of virtual functions cause
// long compilation time)
add_device_operation_instances(
add_device_operation_instances(
instances,
instances,
device_grouped_conv_fwd_xdl_dynamic_op_f32_instances<3,
device_grouped_conv_fwd_xdl_dynamic_op_f32_instances<3,
...
@@ -47,6 +49,7 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_insta
...
@@ -47,6 +49,7 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_insta
Tuple<>,
Tuple<>,
NDHWGK,
NDHWGK,
ConvFwd1x1S1P0>{});
ConvFwd1x1S1P0>{});
#endif
}
}
}
// namespace instance
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
View file @
667cd6ab
...
@@ -30,6 +30,8 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_inst
...
@@ -30,6 +30,8 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_inst
Tuple
<>
,
Tuple
<>
,
NDHWGK
,
NDHWGK
,
ConvFwdDefault
>
{});
ConvFwdDefault
>
{});
#if 0 // Enable with dynamic op optimizations (at now generating a lot of virtual functions cause
// long compilation time)
add_device_operation_instances(
add_device_operation_instances(
instances,
instances,
device_grouped_conv_fwd_xdl_dynamic_op_int8_instances<3,
device_grouped_conv_fwd_xdl_dynamic_op_int8_instances<3,
...
@@ -46,6 +48,7 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_inst
...
@@ -46,6 +48,7 @@ void add_device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_inst
Tuple<>,
Tuple<>,
NDHWGK,
NDHWGK,
ConvFwd1x1S1P0>{});
ConvFwd1x1S1P0>{});
#endif
}
}
}
// namespace instance
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
View file @
667cd6ab
...
@@ -27,11 +27,6 @@ rocm_install(FILES ${MHA_HEADERS} DESTINATION include/ck_tile/ops)
...
@@ -27,11 +27,6 @@ rocm_install(FILES ${MHA_HEADERS} DESTINATION include/ck_tile/ops)
# headers for building lib
# headers for building lib
file
(
COPY
${
MHA_HEADERS
}
DESTINATION
${
FMHA_CPP_FOLDER
}
)
file
(
COPY
${
MHA_HEADERS
}
DESTINATION
${
FMHA_CPP_FOLDER
}
)
# Delete the blob file if it exists to avoid append of old content.
if
(
EXISTS
${
FMHA_CPP_FOLDER
}
/blob_list.txt
)
file
(
REMOVE
${
FMHA_CPP_FOLDER
}
/blob_list.txt
)
endif
()
set
(
FMHA_KNOWN_APIS
"fwd,fwd_splitkv,fwd_appendkv,bwd"
)
set
(
FMHA_KNOWN_APIS
"fwd,fwd_splitkv,fwd_appendkv,bwd"
)
# generate a list of kernels, but not actually emit files at config stage
# generate a list of kernels, but not actually emit files at config stage
...
...
profiler/src/profile_gemm_universal.cpp
View file @
667cd6ab
...
@@ -101,7 +101,9 @@ int profile_gemm_universal(int argc, char* argv[])
...
@@ -101,7 +101,9 @@ int profile_gemm_universal(int argc, char* argv[])
using
F32
=
float
;
using
F32
=
float
;
using
F16
=
ck
::
half_t
;
using
F16
=
ck
::
half_t
;
using
BF16
=
ck
::
bhalf_t
;
using
BF16
=
ck
::
bhalf_t
;
using
F8
=
ck
::
f8_t
;
#if defined(CK_USE_FP8_ON_UNSUPPORTED_ARCH)
using
F8
=
ck
::
f8_t
;
#endif
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck
::
tensor_layout
::
gemm
::
ColumnMajor
;
using
Col
=
ck
::
tensor_layout
::
gemm
::
ColumnMajor
;
...
@@ -162,6 +164,7 @@ int profile_gemm_universal(int argc, char* argv[])
...
@@ -162,6 +164,7 @@ int profile_gemm_universal(int argc, char* argv[])
{
{
return
profile
(
F16
{},
F16
{},
F16
{},
F32
{},
F16
{},
Row
{},
Col
{},
Row
{});
return
profile
(
F16
{},
F16
{},
F16
{},
F32
{},
F16
{},
Row
{},
Col
{},
Row
{});
}
}
#if defined(CK_USE_FP8_ON_UNSUPPORTED_ARCH)
else
if
(
data_type
==
GemmDataType
::
F16_F8_F16
&&
layout
==
GemmMatrixLayout
::
MK_KN_MN
)
else
if
(
data_type
==
GemmDataType
::
F16_F8_F16
&&
layout
==
GemmMatrixLayout
::
MK_KN_MN
)
{
{
return
profile
(
F16
{},
F8
{},
F16
{},
F32
{},
F16
{},
Row
{},
Row
{},
Row
{});
return
profile
(
F16
{},
F8
{},
F16
{},
F32
{},
F16
{},
Row
{},
Row
{},
Row
{});
...
@@ -178,6 +181,7 @@ int profile_gemm_universal(int argc, char* argv[])
...
@@ -178,6 +181,7 @@ int profile_gemm_universal(int argc, char* argv[])
{
{
return
profile
(
F8
{},
F16
{},
F16
{},
F32
{},
F16
{},
Row
{},
Col
{},
Row
{});
return
profile
(
F8
{},
F16
{},
F16
{},
F32
{},
F16
{},
Row
{},
Col
{},
Row
{});
}
}
#endif
else
if
(
data_type
==
GemmDataType
::
BF16_BF16_BF16
&&
layout
==
GemmMatrixLayout
::
MK_KN_MN
)
else
if
(
data_type
==
GemmDataType
::
BF16_BF16_BF16
&&
layout
==
GemmMatrixLayout
::
MK_KN_MN
)
{
{
return
profile
(
BF16
{},
BF16
{},
BF16
{},
F32
{},
BF16
{},
Row
{},
Row
{},
Row
{});
return
profile
(
BF16
{},
BF16
{},
BF16
{},
F32
{},
BF16
{},
Row
{},
Row
{},
Row
{});
...
@@ -194,6 +198,7 @@ int profile_gemm_universal(int argc, char* argv[])
...
@@ -194,6 +198,7 @@ int profile_gemm_universal(int argc, char* argv[])
{
{
return
profile
(
BF16
{},
BF16
{},
BF16
{},
F32
{},
BF16
{},
Col
{},
Row
{},
Row
{});
return
profile
(
BF16
{},
BF16
{},
BF16
{},
F32
{},
BF16
{},
Col
{},
Row
{},
Row
{});
}
}
#if defined(CK_USE_FP8_ON_UNSUPPORTED_ARCH)
else
if
(
data_type
==
GemmDataType
::
F8_F8_BF16
&&
layout
==
GemmMatrixLayout
::
MK_KN_MN
)
else
if
(
data_type
==
GemmDataType
::
F8_F8_BF16
&&
layout
==
GemmMatrixLayout
::
MK_KN_MN
)
{
{
return
profile
(
F8
{},
F8
{},
F8
{},
F32
{},
BF16
{},
Row
{},
Row
{},
Row
{});
return
profile
(
F8
{},
F8
{},
F8
{},
F32
{},
BF16
{},
Row
{},
Row
{},
Row
{});
...
@@ -202,6 +207,7 @@ int profile_gemm_universal(int argc, char* argv[])
...
@@ -202,6 +207,7 @@ int profile_gemm_universal(int argc, char* argv[])
{
{
return
profile
(
F8
{},
F8
{},
F8
{},
F32
{},
BF16
{},
Row
{},
Col
{},
Row
{});
return
profile
(
F8
{},
F8
{},
F8
{},
F32
{},
BF16
{},
Row
{},
Col
{},
Row
{});
}
}
#endif
else
else
{
{
std
::
cout
<<
"this data_type & layout is not implemented"
<<
std
::
endl
;
std
::
cout
<<
"this data_type & layout is not implemented"
<<
std
::
endl
;
...
...
test/CMakeLists.txt
View file @
667cd6ab
...
@@ -64,11 +64,11 @@ function(add_test_executable TEST_NAME)
...
@@ -64,11 +64,11 @@ function(add_test_executable TEST_NAME)
#only continue if there are some source files left on the list
#only continue if there are some source files left on the list
if
(
ARGN
)
if
(
ARGN
)
if
(
ARGN MATCHES
"_xdl"
)
if
(
ARGN MATCHES
"_xdl"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201
)
list
(
REMOVE_ITEM TEST_TARGETS
gfx900 gfx906 gfx906:xnack-
gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201
)
elseif
(
ARGN MATCHES
"_wmma"
)
elseif
(
ARGN MATCHES
"_wmma"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030
)
list
(
REMOVE_ITEM TEST_TARGETS
gfx900 gfx906 gfx906:xnack- gfx908:xnack+ gfx908:xnack- gfx90a:xnack+ gfx90a:xnack-
gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030
)
elseif
(
ARGN MATCHES
"_smfmac"
)
elseif
(
ARGN MATCHES
"_smfmac"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201
)
list
(
REMOVE_ITEM TEST_TARGETS
gfx900 gfx906 gfx906:xnack-
gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201
)
endif
()
endif
()
set_source_files_properties
(
${
ARGN
}
PROPERTIES LANGUAGE HIP
)
set_source_files_properties
(
${
ARGN
}
PROPERTIES LANGUAGE HIP
)
add_executable
(
${
TEST_NAME
}
${
ARGN
}
)
add_executable
(
${
TEST_NAME
}
${
ARGN
}
)
...
@@ -141,11 +141,11 @@ function(add_gtest_executable TEST_NAME)
...
@@ -141,11 +141,11 @@ function(add_gtest_executable TEST_NAME)
#only continue if there are some source files left on the list
#only continue if there are some source files left on the list
if
(
ARGN
)
if
(
ARGN
)
if
(
ARGN MATCHES
"_xdl"
)
if
(
ARGN MATCHES
"_xdl"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201
)
list
(
REMOVE_ITEM TEST_TARGETS gfx900 gfx906
gfx906:xnack-
gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201
)
elseif
(
ARGN MATCHES
"_wmma"
)
elseif
(
ARGN MATCHES
"_wmma"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030
)
list
(
REMOVE_ITEM TEST_TARGETS gfx900 gfx906
gfx906:xnack- gfx908:xnack+ gfx908:xnack- gfx90a:xnack+ gfx90a:xnack-
gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030
)
elseif
(
ARGN MATCHES
"_smfmac"
)
elseif
(
ARGN MATCHES
"_smfmac"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201
)
list
(
REMOVE_ITEM TEST_TARGETS
gfx900 gfx906 gfx906:xnack-
gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201
)
endif
()
endif
()
set_source_files_properties
(
${
ARGN
}
PROPERTIES LANGUAGE HIP
)
set_source_files_properties
(
${
ARGN
}
PROPERTIES LANGUAGE HIP
)
add_executable
(
${
TEST_NAME
}
${
ARGN
}
)
add_executable
(
${
TEST_NAME
}
${
ARGN
}
)
...
...
test/gemm_universal/test_gemm_universal_xdl.cpp
View file @
667cd6ab
...
@@ -56,7 +56,7 @@ class TestGemmUniversal_KM_NK
...
@@ -56,7 +56,7 @@ class TestGemmUniversal_KM_NK
using
KernelTypes_MK_KN
=
::
testing
::
Types
<
using
KernelTypes_MK_KN
=
::
testing
::
Types
<
// ADataType, BDataType, ComputeDataType, CDataType
// ADataType, BDataType, ComputeDataType, CDataType
std
::
tuple
<
F16
,
F16
,
F16
,
F16
>
,
std
::
tuple
<
F16
,
F16
,
F16
,
F16
>
,
#if
(
defined
CK_ENABLE_FP8)
#if defined
(
CK_ENABLE_FP8)
&& defined(CK_USE_FP8_ON_UNSUPPORTED_ARCH)
std
::
tuple
<
F16
,
F8
,
F16
,
F16
>
,
std
::
tuple
<
F16
,
F8
,
F16
,
F16
>
,
std
::
tuple
<
F8
,
F16
,
F16
,
F16
>
,
std
::
tuple
<
F8
,
F16
,
F16
,
F16
>
,
std
::
tuple
<
F8
,
F8
,
F8
,
BF16
>
,
std
::
tuple
<
F8
,
F8
,
F8
,
BF16
>
,
...
@@ -66,7 +66,7 @@ using KernelTypes_MK_KN = ::testing::Types<
...
@@ -66,7 +66,7 @@ using KernelTypes_MK_KN = ::testing::Types<
using
KernelTypes_MK_NK
=
::
testing
::
Types
<
using
KernelTypes_MK_NK
=
::
testing
::
Types
<
// ADataType, BDataType, ComputeDataType, CDataType
// ADataType, BDataType, ComputeDataType, CDataType
std
::
tuple
<
F16
,
F16
,
F16
,
F16
>
,
std
::
tuple
<
F16
,
F16
,
F16
,
F16
>
,
#if
(
defined
CK_ENABLE_FP8)
#if defined
(
CK_ENABLE_FP8)
&& defined(CK_USE_FP8_ON_UNSUPPORTED_ARCH)
std
::
tuple
<
F16
,
F8
,
F16
,
F16
>
,
std
::
tuple
<
F16
,
F8
,
F16
,
F16
>
,
std
::
tuple
<
F8
,
F16
,
F16
,
F16
>
,
std
::
tuple
<
F8
,
F16
,
F16
,
F16
>
,
std
::
tuple
<
F8
,
F8
,
F8
,
BF16
>
,
std
::
tuple
<
F8
,
F8
,
F8
,
BF16
>
,
...
...
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
View file @
667cd6ab
...
@@ -58,13 +58,13 @@ using KernelTypes1d = ::testing::Types<std::tuple<float, GNWC, GKXC, GNWK>,
...
@@ -58,13 +58,13 @@ using KernelTypes1d = ::testing::Types<std::tuple<float, GNWC, GKXC, GNWK>,
using
KernelTypes2d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNHWC
,
GKYXC
,
GNHWK
>
,
using
KernelTypes2d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNHWC
,
GKYXC
,
GNHWK
>
,
std
::
tuple
<
ck
::
half_t
,
GNHWC
,
GKYXC
,
GNHWK
>
,
std
::
tuple
<
ck
::
half_t
,
GNHWC
,
GKYXC
,
GNHWK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNHWC
,
GKYXC
,
GNHWK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNHWC
,
GKYXC
,
GNHWK
>
,
std
::
tuple
<
int8_t
,
GNHWC
,
GKYXC
,
GNHWK
>
,
std
::
tuple
<
float
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
float
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
ck
::
half_t
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
ck
::
half_t
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
int8_t
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
int8_t
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
float
,
NGCHW
,
GKYXC
,
NGKHW
>
,
std
::
tuple
<
float
,
NGCHW
,
GKYXC
,
NGKHW
>
,
std
::
tuple
<
ck
::
half_t
,
NGCHW
,
GKYXC
,
NGKHW
>>
;
std
::
tuple
<
ck
::
half_t
,
NGCHW
,
GKYXC
,
NGKHW
>
,
std
::
tuple
<
int8_t
,
NGCHW
,
GKYXC
,
NGKHW
>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNDHWC
,
GKZYXC
,
GNDHWK
>
,
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNDHWC
,
GKZYXC
,
GNDHWK
>
,
std
::
tuple
<
ck
::
half_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
>
,
std
::
tuple
<
ck
::
half_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
>
,
...
...
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment