Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
f74b77bc
Commit
f74b77bc
authored
Jun 19, 2023
by
carlushuang
Browse files
Merge remote-tracking branch 'origin/develop' into stream-k-initial-impl
parents
b5be51ed
0d911822
Changes
162
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
42 additions
and
240 deletions
+42
-240
library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt
.../src/tensor_operation_instance/gpu/softmax/CMakeLists.txt
+0
-10
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance.cpp
..._instance/gpu/softmax/device_softmax_f16_f16_instance.cpp
+0
-40
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp
...softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp
+3
-4
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp
...softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp
+3
-4
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp
...softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp
+3
-4
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp
...softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp
+3
-4
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp
...softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp
+3
-4
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp
...softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp
+3
-4
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp
...softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp
+3
-4
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance.cpp
..._instance/gpu/softmax/device_softmax_f32_f32_instance.cpp
+0
-40
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp
...softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp
+3
-4
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp
...softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp
+3
-4
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp
...softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp
+3
-4
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp
...softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp
+3
-4
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp
...softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp
+3
-4
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp
...softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp
+3
-4
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp
...softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp
+3
-4
library/src/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance.cpp
...on_instance/gpu/softmax/device_softmax_i8_i8_instance.cpp
+0
-40
library/src/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce1.cpp
...u/softmax/device_softmax_i8_i8_instance_rank3_reduce1.cpp
+0
-27
library/src/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce2.cpp
...u/softmax/device_softmax_i8_i8_instance_rank3_reduce2.cpp
+0
-27
No files found.
library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt
View file @
f74b77bc
add_instance_library
(
device_softmax_instance
device_softmax_i8_i8_instance.cpp
device_softmax_i8_i8_instance_rank3_reduce1.cpp
device_softmax_i8_i8_instance_rank3_reduce2.cpp
device_softmax_i8_i8_instance_rank3_reduce3.cpp
device_softmax_i8_i8_instance_rank4_reduce1.cpp
device_softmax_i8_i8_instance_rank4_reduce2.cpp
device_softmax_i8_i8_instance_rank4_reduce3.cpp
device_softmax_i8_i8_instance_rank4_reduce4.cpp
device_softmax_f16_f16_instance.cpp
device_softmax_f16_f16_instance_rank3_reduce1.cpp
device_softmax_f16_f16_instance_rank3_reduce2.cpp
device_softmax_f16_f16_instance_rank3_reduce3.cpp
...
...
@@ -15,7 +6,6 @@ add_instance_library(device_softmax_instance
device_softmax_f16_f16_instance_rank4_reduce2.cpp
device_softmax_f16_f16_instance_rank4_reduce3.cpp
device_softmax_f16_f16_instance_rank4_reduce4.cpp
device_softmax_f32_f32_instance.cpp
device_softmax_f32_f32_instance_rank3_reduce1.cpp
device_softmax_f32_f32_instance_rank3_reduce2.cpp
device_softmax_f32_f32_instance_rank3_reduce3.cpp
...
...
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance.cpp
deleted
100644 → 0
View file @
b5be51ed
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_f16_f16_rank3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
3
>>&
instances
)
{
add_device_softmax_f16_f16_rank3_reduce1_instances
(
instances
);
add_device_softmax_f16_f16_rank3_reduce2_instances
(
instances
);
add_device_softmax_f16_f16_rank3_reduce3_instances
(
instances
);
}
void
add_device_softmax_f16_f16_rank4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
>>&
instances
)
{
add_device_softmax_f16_f16_rank4_reduce1_instances
(
instances
);
add_device_softmax_f16_f16_rank4_reduce2_instances
(
instances
);
add_device_softmax_f16_f16_rank4_reduce3_instances
(
instances
);
add_device_softmax_f16_f16_rank4_reduce4_instances
(
instances
);
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp
View file @
f74b77bc
...
...
@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
3
;
void
add_device_softmax_f16_f16_rank3_reduce1_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
3
,
1
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_instances
<
RANK
,
1
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_generic_instance
<
3
,
1
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_instances
<
3
,
1
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp
View file @
f74b77bc
...
...
@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
3
;
void
add_device_softmax_f16_f16_rank3_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
3
,
2
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_instances
<
RANK
,
2
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_generic_instance
<
3
,
2
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_instances
<
3
,
2
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp
View file @
f74b77bc
...
...
@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
3
;
void
add_device_softmax_f16_f16_rank3_reduce3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
3
,
3
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_instances
<
RANK
,
3
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_generic_instance
<
3
,
3
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_instances
<
3
,
3
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp
View file @
f74b77bc
...
...
@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
4
;
void
add_device_softmax_f16_f16_rank4_reduce1_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
,
1
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_instances
<
RANK
,
1
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_generic_instance
<
4
,
1
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_instances
<
4
,
1
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp
View file @
f74b77bc
...
...
@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
4
;
void
add_device_softmax_f16_f16_rank4_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
,
2
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_instances
<
RANK
,
2
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_generic_instance
<
4
,
2
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_instances
<
4
,
2
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp
View file @
f74b77bc
...
...
@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
4
;
void
add_device_softmax_f16_f16_rank4_reduce3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
,
3
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_instances
<
RANK
,
3
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_generic_instance
<
4
,
3
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_instances
<
4
,
3
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp
View file @
f74b77bc
...
...
@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
4
;
void
add_device_softmax_f16_f16_rank4_reduce4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
,
4
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_instances
<
RANK
,
4
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_generic_instance
<
4
,
4
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f16_f16_instances
<
4
,
4
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance.cpp
deleted
100644 → 0
View file @
b5be51ed
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_f32_f32_rank3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
>>&
instances
)
{
add_device_softmax_f32_f32_rank3_reduce1_instances
(
instances
);
add_device_softmax_f32_f32_rank3_reduce2_instances
(
instances
);
add_device_softmax_f32_f32_rank3_reduce3_instances
(
instances
);
}
void
add_device_softmax_f32_f32_rank4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
>>&
instances
)
{
add_device_softmax_f32_f32_rank4_reduce1_instances
(
instances
);
add_device_softmax_f32_f32_rank4_reduce2_instances
(
instances
);
add_device_softmax_f32_f32_rank4_reduce3_instances
(
instances
);
add_device_softmax_f32_f32_rank4_reduce4_instances
(
instances
);
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp
View file @
f74b77bc
...
...
@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
3
;
void
add_device_softmax_f32_f32_rank3_reduce1_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
,
1
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_instances
<
RANK
,
1
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_generic_instance
<
3
,
1
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_instances
<
3
,
1
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp
View file @
f74b77bc
...
...
@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
3
;
void
add_device_softmax_f32_f32_rank3_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
,
2
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_instances
<
RANK
,
2
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_generic_instance
<
3
,
2
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_instances
<
3
,
2
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp
View file @
f74b77bc
...
...
@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
3
;
void
add_device_softmax_f32_f32_rank3_reduce3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
,
3
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_instances
<
RANK
,
3
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_generic_instance
<
3
,
3
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_instances
<
3
,
3
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp
View file @
f74b77bc
...
...
@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
4
;
void
add_device_softmax_f32_f32_rank4_reduce1_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
,
1
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_instances
<
RANK
,
1
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_generic_instance
<
4
,
1
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_instances
<
4
,
1
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp
View file @
f74b77bc
...
...
@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
4
;
void
add_device_softmax_f32_f32_rank4_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
,
2
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_instances
<
RANK
,
2
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_generic_instance
<
4
,
2
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_instances
<
4
,
2
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp
View file @
f74b77bc
...
...
@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
4
;
void
add_device_softmax_f32_f32_rank4_reduce3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
,
3
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_instances
<
RANK
,
3
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_generic_instance
<
4
,
3
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_instances
<
4
,
3
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp
View file @
f74b77bc
...
...
@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
4
;
void
add_device_softmax_f32_f32_rank4_reduce4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
,
4
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_instances
<
RANK
,
4
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_generic_instance
<
4
,
4
>
{});
add_device_operation_instances
(
instances
,
device_softmax_f32_f32_instances
<
4
,
4
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance.cpp
deleted
100644 → 0
View file @
b5be51ed
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce4.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
3
>>&
instances
)
{
add_device_softmax_i8_i8_rank3_reduce1_instances
(
instances
);
add_device_softmax_i8_i8_rank3_reduce2_instances
(
instances
);
add_device_softmax_i8_i8_rank3_reduce3_instances
(
instances
);
}
void
add_device_softmax_i8_i8_rank4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
4
>>&
instances
)
{
add_device_softmax_i8_i8_rank4_reduce1_instances
(
instances
);
add_device_softmax_i8_i8_rank4_reduce2_instances
(
instances
);
add_device_softmax_i8_i8_rank4_reduce3_instances
(
instances
);
add_device_softmax_i8_i8_rank4_reduce4_instances
(
instances
);
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce1.cpp
deleted
100644 → 0
View file @
b5be51ed
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
3
;
void
add_device_softmax_i8_i8_rank3_reduce1_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_i8_i8_instances
<
RANK
,
1
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce2.cpp
deleted
100644 → 0
View file @
b5be51ed
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
static
constexpr
index_t
RANK
=
3
;
void
add_device_softmax_i8_i8_rank3_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
RANK
>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_softmax_i8_i8_instances
<
RANK
,
2
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
Prev
1
…
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment