Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
f74b77bc
Commit
f74b77bc
authored
Jun 19, 2023
by
carlushuang
Browse files
Merge remote-tracking branch 'origin/develop' into stream-k-initial-impl
parents
b5be51ed
0d911822
Changes
162
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
26 additions
and
167 deletions
+26
-167
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp
...softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp
...softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp
...softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp
...softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp
...ance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp
+7
-1
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance.hpp
..._instance/gpu/softmax/device_softmax_f32_f32_instance.hpp
+0
-22
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp
...softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp
...softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp
...softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp
...softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp
...softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp
...softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp
...softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp
...ance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp
+8
-1
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance.hpp
...on_instance/gpu/softmax/device_softmax_i8_i8_instance.hpp
+0
-22
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce1.hpp
...u/softmax/device_softmax_i8_i8_instance_rank3_reduce1.hpp
+0
-22
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce2.hpp
...u/softmax/device_softmax_i8_i8_instance_rank3_reduce2.hpp
+0
-22
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce3.hpp
...u/softmax/device_softmax_i8_i8_instance_rank3_reduce3.hpp
+0
-22
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce1.hpp
...u/softmax/device_softmax_i8_i8_instance_rank4_reduce1.hpp
+0
-22
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp
...u/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp
+0
-22
No files found.
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp
View file @
f74b77bc
...
@@ -14,7 +14,7 @@ namespace device {
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_softmax_f16_f16_rank4_reduce1_instances
(
void
add_device_softmax_f16_f16_rank4_reduce1_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
,
1
>>&
instances
);
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp
View file @
f74b77bc
...
@@ -14,7 +14,7 @@ namespace device {
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_softmax_f16_f16_rank4_reduce2_instances
(
void
add_device_softmax_f16_f16_rank4_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
,
2
>>&
instances
);
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp
View file @
f74b77bc
...
@@ -14,7 +14,7 @@ namespace device {
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_softmax_f16_f16_rank4_reduce3_instances
(
void
add_device_softmax_f16_f16_rank4_reduce3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
,
3
>>&
instances
);
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp
View file @
f74b77bc
...
@@ -14,7 +14,7 @@ namespace device {
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_softmax_f16_f16_rank4_reduce4_instances
(
void
add_device_softmax_f16_f16_rank4_reduce4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
,
4
>>&
instances
);
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp
View file @
f74b77bc
...
@@ -16,7 +16,6 @@ template <index_t Rank, index_t Reduce>
...
@@ -16,7 +16,6 @@ template <index_t Rank, index_t Reduce>
using
device_softmax_f16_f16_instances
=
std
::
tuple
<
using
device_softmax_f16_f16_instances
=
std
::
tuple
<
// clang-format off
// clang-format off
// InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
// InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
// fallback kernel
DeviceSoftmaxImpl
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
8
,
1
,
1
,
1
>
,
DeviceSoftmaxImpl
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
8
,
1
,
1
,
1
>
,
DeviceSoftmaxImpl
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
8
,
1
,
8
,
8
>
,
DeviceSoftmaxImpl
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
8
,
1
,
8
,
8
>
,
DeviceSoftmaxImpl
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
4
,
64
,
1
,
8
,
1
,
8
,
8
>
,
DeviceSoftmaxImpl
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
4
,
64
,
1
,
8
,
1
,
8
,
8
>
,
...
@@ -33,6 +32,13 @@ using device_softmax_f16_f16_instances = std::tuple<
...
@@ -33,6 +32,13 @@ using device_softmax_f16_f16_instances = std::tuple<
// clang-format on
// clang-format on
>
;
>
;
template
<
index_t
Rank
,
index_t
Reduce
>
using
device_softmax_f16_f16_generic_instance
=
std
::
tuple
<
// clang-format off
DeviceSoftmaxImpl
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
64
,
8
,
8
,
1
,
1
,
1
,
1
,
1
>
// clang-format on
>
;
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
}
// namespace tensor_operation
}
// namespace tensor_operation
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance.hpp
deleted
100644 → 0
View file @
b5be51ed
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_f32_f32_rank3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
void
add_device_softmax_f32_f32_rank4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp
View file @
f74b77bc
...
@@ -14,7 +14,7 @@ namespace device {
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_softmax_f32_f32_rank3_reduce1_instances
(
void
add_device_softmax_f32_f32_rank3_reduce1_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
,
1
>>&
instances
);
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp
View file @
f74b77bc
...
@@ -14,7 +14,7 @@ namespace device {
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_softmax_f32_f32_rank3_reduce2_instances
(
void
add_device_softmax_f32_f32_rank3_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
,
2
>>&
instances
);
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp
View file @
f74b77bc
...
@@ -14,7 +14,7 @@ namespace device {
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_softmax_f32_f32_rank3_reduce3_instances
(
void
add_device_softmax_f32_f32_rank3_reduce3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
,
3
>>&
instances
);
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp
View file @
f74b77bc
...
@@ -14,7 +14,7 @@ namespace device {
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_softmax_f32_f32_rank4_reduce1_instances
(
void
add_device_softmax_f32_f32_rank4_reduce1_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
,
1
>>&
instances
);
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp
View file @
f74b77bc
...
@@ -14,7 +14,7 @@ namespace device {
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_softmax_f32_f32_rank4_reduce2_instances
(
void
add_device_softmax_f32_f32_rank4_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
,
2
>>&
instances
);
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp
View file @
f74b77bc
...
@@ -14,7 +14,7 @@ namespace device {
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_softmax_f32_f32_rank4_reduce3_instances
(
void
add_device_softmax_f32_f32_rank4_reduce3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
,
3
>>&
instances
);
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp
View file @
f74b77bc
...
@@ -14,7 +14,7 @@ namespace device {
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_softmax_f32_f32_rank4_reduce4_instances
(
void
add_device_softmax_f32_f32_rank4_reduce4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
,
4
>>&
instances
);
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp
View file @
f74b77bc
...
@@ -16,7 +16,7 @@ template <index_t Rank, index_t Reduce>
...
@@ -16,7 +16,7 @@ template <index_t Rank, index_t Reduce>
using
device_softmax_f32_f32_instances
=
std
::
tuple
<
using
device_softmax_f32_f32_instances
=
std
::
tuple
<
// clang-format off
// clang-format off
// InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
// InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
8
,
1
,
1
,
1
>
,
// fallback kernel
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
8
,
1
,
1
,
1
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
8
,
1
,
4
,
4
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
8
,
1
,
4
,
4
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
4
,
64
,
1
,
8
,
1
,
4
,
4
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
4
,
64
,
1
,
8
,
1
,
4
,
4
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
2
,
128
,
1
,
8
,
1
,
4
,
4
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
2
,
128
,
1
,
8
,
1
,
4
,
4
>
,
...
@@ -32,6 +32,13 @@ using device_softmax_f32_f32_instances = std::tuple<
...
@@ -32,6 +32,13 @@ using device_softmax_f32_f32_instances = std::tuple<
// clang-format on
// clang-format on
>
;
>
;
template
<
index_t
Rank
,
index_t
Reduce
>
using
device_softmax_f32_f32_generic_instance
=
std
::
tuple
<
// clang-format off
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
64
,
8
,
8
,
1
,
1
,
1
,
1
,
1
>
// clang-format on
>
;
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
}
// namespace tensor_operation
}
// namespace tensor_operation
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance.hpp
deleted
100644 → 0
View file @
b5be51ed
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
void
add_device_softmax_i8_i8_rank4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce1.hpp
deleted
100644 → 0
View file @
b5be51ed
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank3_reduce1_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce2.hpp
deleted
100644 → 0
View file @
b5be51ed
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank3_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce3.hpp
deleted
100644 → 0
View file @
b5be51ed
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank3_reduce3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce1.hpp
deleted
100644 → 0
View file @
b5be51ed
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank4_reduce1_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp
deleted
100644 → 0
View file @
b5be51ed
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank4_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
Prev
1
2
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment