Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
06d2c7b1
Commit
06d2c7b1
authored
Jun 28, 2023
by
Jing Zhang
Committed by
root
Jun 28, 2023
Browse files
clean
parents
b27909a0
3b18f1e3
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
51 additions
and
177 deletions
+51
-177
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp
...softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp
+2
-2
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp
...ance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp
+8
-2
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance.hpp
..._instance/gpu/softmax/device_softmax_f32_f32_instance.hpp
+0
-22
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp
...softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp
+2
-2
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp
...softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp
+2
-2
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp
...softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp
+2
-2
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp
...softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp
+2
-2
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp
...softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp
+2
-2
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp
...softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp
+2
-2
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp
...softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp
+2
-2
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp
...ance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp
+9
-2
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance.hpp
...on_instance/gpu/softmax/device_softmax_i8_i8_instance.hpp
+0
-22
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp
...u/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp
+0
-22
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce3.hpp
...u/softmax/device_softmax_i8_i8_instance_rank4_reduce3.hpp
+0
-22
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce4.hpp
...u/softmax/device_softmax_i8_i8_instance_rank4_reduce4.hpp
+0
-22
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp
...stance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp
+0
-40
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp
...peration_instance/gpu/softmax/device_softmax_instance.hpp
+15
-4
library/include/ck/library/utility/algorithm.hpp
library/include/ck/library/utility/algorithm.hpp
+1
-1
library/include/ck/library/utility/check_err.hpp
library/include/ck/library/utility/check_err.hpp
+1
-1
library/include/ck/library/utility/conv_common.hpp
library/include/ck/library/utility/conv_common.hpp
+1
-1
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp
View file @
06d2c7b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
void
add_device_softmax_f16_f16_rank4_reduce4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp
View file @
06d2c7b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
...
...
@@ -16,7 +16,6 @@ template <index_t Rank, index_t Reduce>
using
device_softmax_f16_f16_instances
=
std
::
tuple
<
// clang-format off
// InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
// fallback kernel
DeviceSoftmaxImpl
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
8
,
1
,
1
,
1
>
,
DeviceSoftmaxImpl
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
8
,
1
,
8
,
8
>
,
DeviceSoftmaxImpl
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
4
,
64
,
1
,
8
,
1
,
8
,
8
>
,
...
...
@@ -33,6 +32,13 @@ using device_softmax_f16_f16_instances = std::tuple<
// clang-format on
>
;
template
<
index_t
Rank
,
index_t
Reduce
>
using
device_softmax_f16_f16_generic_instance
=
std
::
tuple
<
// clang-format off
DeviceSoftmaxImpl
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
64
,
8
,
8
,
1
,
1
,
1
,
1
,
1
>
// clang-format on
>
;
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance.hpp
deleted
100644 → 0
View file @
b27909a0
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_f32_f32_rank3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
void
add_device_softmax_f32_f32_rank4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp
View file @
06d2c7b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
void
add_device_softmax_f32_f32_rank3_reduce1_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
,
1
>>&
instances
);
}
// namespace instance
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp
View file @
06d2c7b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
void
add_device_softmax_f32_f32_rank3_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
,
2
>>&
instances
);
}
// namespace instance
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp
View file @
06d2c7b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
void
add_device_softmax_f32_f32_rank3_reduce3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
,
3
>>&
instances
);
}
// namespace instance
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp
View file @
06d2c7b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
void
add_device_softmax_f32_f32_rank4_reduce1_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
,
1
>>&
instances
);
}
// namespace instance
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp
View file @
06d2c7b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
void
add_device_softmax_f32_f32_rank4_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
,
2
>>&
instances
);
}
// namespace instance
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp
View file @
06d2c7b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
void
add_device_softmax_f32_f32_rank4_reduce3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
,
3
>>&
instances
);
}
// namespace instance
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp
View file @
06d2c7b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -14,7 +14,7 @@ namespace device {
namespace
instance
{
void
add_device_softmax_f32_f32_rank4_reduce4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp
View file @
06d2c7b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
...
...
@@ -16,7 +16,7 @@ template <index_t Rank, index_t Reduce>
using
device_softmax_f32_f32_instances
=
std
::
tuple
<
// clang-format off
// InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
8
,
1
,
1
,
1
>
,
// fallback kernel
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
8
,
1
,
1
,
1
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
8
,
1
,
4
,
4
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
4
,
64
,
1
,
8
,
1
,
4
,
4
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
2
,
128
,
1
,
8
,
1
,
4
,
4
>
,
...
...
@@ -32,6 +32,13 @@ using device_softmax_f32_f32_instances = std::tuple<
// clang-format on
>
;
template
<
index_t
Rank
,
index_t
Reduce
>
using
device_softmax_f32_f32_generic_instance
=
std
::
tuple
<
// clang-format off
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
64
,
8
,
8
,
1
,
1
,
1
,
1
,
1
>
// clang-format on
>
;
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance.hpp
deleted
100644 → 0
View file @
b27909a0
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
void
add_device_softmax_i8_i8_rank4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp
deleted
100644 → 0
View file @
b27909a0
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank4_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce3.hpp
deleted
100644 → 0
View file @
b27909a0
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank4_reduce3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce4.hpp
deleted
100644 → 0
View file @
b27909a0
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank4_reduce4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp
deleted
100644 → 0
View file @
b27909a0
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "ck/ck.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp"
#include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
template
<
index_t
Rank
,
index_t
Reduce
>
using
device_softmax_i8_i8_instances
=
std
::
tuple
<
// clang-format off
// InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
// fallback kernel
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
16
,
1
,
1
,
1
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
16
,
1
,
16
,
16
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
4
,
64
,
1
,
16
,
1
,
16
,
16
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
2
,
128
,
1
,
16
,
1
,
16
,
16
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
2
,
128
,
1
,
32
,
1
,
16
,
16
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
2
,
128
,
1
,
64
,
1
,
16
,
16
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
16
,
1
,
16
,
16
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
32
,
1
,
16
,
16
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
64
,
1
,
16
,
16
>
,
// Reduction on middle dimensions
// InSrcVectorDim is 0 since we want to coalesce reads on M dimension
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
8
,
8
,
0
,
1
,
1
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
32
,
8
,
32
,
8
,
0
,
16
,
8
>
// clang-format on
>
;
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp
View file @
06d2c7b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp"
library/include/ck/library/utility/algorithm.hpp
View file @
06d2c7b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/utility/check_err.hpp
View file @
06d2c7b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/utility/conv_common.hpp
View file @
06d2c7b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
Prev
1
…
33
34
35
36
37
38
39
40
41
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment