"include/vscode:/vscode.git/clone" did not exist on "97648ccd662b0b55013984e0f8d31da02f24bfad"
Commit f74b77bc authored by carlushuang's avatar carlushuang
Browse files

Merge remote-tracking branch 'origin/develop' into stream-k-initial-impl

parents b5be51ed 0d911822
add_instance_library(device_softmax_instance add_instance_library(device_softmax_instance
device_softmax_i8_i8_instance.cpp
device_softmax_i8_i8_instance_rank3_reduce1.cpp
device_softmax_i8_i8_instance_rank3_reduce2.cpp
device_softmax_i8_i8_instance_rank3_reduce3.cpp
device_softmax_i8_i8_instance_rank4_reduce1.cpp
device_softmax_i8_i8_instance_rank4_reduce2.cpp
device_softmax_i8_i8_instance_rank4_reduce3.cpp
device_softmax_i8_i8_instance_rank4_reduce4.cpp
device_softmax_f16_f16_instance.cpp
device_softmax_f16_f16_instance_rank3_reduce1.cpp device_softmax_f16_f16_instance_rank3_reduce1.cpp
device_softmax_f16_f16_instance_rank3_reduce2.cpp device_softmax_f16_f16_instance_rank3_reduce2.cpp
device_softmax_f16_f16_instance_rank3_reduce3.cpp device_softmax_f16_f16_instance_rank3_reduce3.cpp
...@@ -15,7 +6,6 @@ add_instance_library(device_softmax_instance ...@@ -15,7 +6,6 @@ add_instance_library(device_softmax_instance
device_softmax_f16_f16_instance_rank4_reduce2.cpp device_softmax_f16_f16_instance_rank4_reduce2.cpp
device_softmax_f16_f16_instance_rank4_reduce3.cpp device_softmax_f16_f16_instance_rank4_reduce3.cpp
device_softmax_f16_f16_instance_rank4_reduce4.cpp device_softmax_f16_f16_instance_rank4_reduce4.cpp
device_softmax_f32_f32_instance.cpp
device_softmax_f32_f32_instance_rank3_reduce1.cpp device_softmax_f32_f32_instance_rank3_reduce1.cpp
device_softmax_f32_f32_instance_rank3_reduce2.cpp device_softmax_f32_f32_instance_rank3_reduce2.cpp
device_softmax_f32_f32_instance_rank3_reduce3.cpp device_softmax_f32_f32_instance_rank3_reduce3.cpp
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_f16_f16_rank3_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 3>>& instances)
{
add_device_softmax_f16_f16_rank3_reduce1_instances(instances);
add_device_softmax_f16_f16_rank3_reduce2_instances(instances);
add_device_softmax_f16_f16_rank3_reduce3_instances(instances);
}
void add_device_softmax_f16_f16_rank4_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 4>>& instances)
{
add_device_softmax_f16_f16_rank4_reduce1_instances(instances);
add_device_softmax_f16_f16_rank4_reduce2_instances(instances);
add_device_softmax_f16_f16_rank4_reduce3_instances(instances);
add_device_softmax_f16_f16_rank4_reduce4_instances(instances);
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
...@@ -13,12 +13,11 @@ namespace tensor_operation { ...@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_f16_f16_rank3_reduce1_instances( void add_device_softmax_f16_f16_rank3_reduce1_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances) std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 3, 1>>& instances)
{ {
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 1>{}); add_device_operation_instances(instances, device_softmax_f16_f16_generic_instance<3, 1>{});
add_device_operation_instances(instances, device_softmax_f16_f16_instances<3, 1>{});
} }
} // namespace instance } // namespace instance
......
...@@ -13,12 +13,11 @@ namespace tensor_operation { ...@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_f16_f16_rank3_reduce2_instances( void add_device_softmax_f16_f16_rank3_reduce2_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances) std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 3, 2>>& instances)
{ {
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 2>{}); add_device_operation_instances(instances, device_softmax_f16_f16_generic_instance<3, 2>{});
add_device_operation_instances(instances, device_softmax_f16_f16_instances<3, 2>{});
} }
} // namespace instance } // namespace instance
......
...@@ -13,12 +13,11 @@ namespace tensor_operation { ...@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_f16_f16_rank3_reduce3_instances( void add_device_softmax_f16_f16_rank3_reduce3_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances) std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 3, 3>>& instances)
{ {
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 3>{}); add_device_operation_instances(instances, device_softmax_f16_f16_generic_instance<3, 3>{});
add_device_operation_instances(instances, device_softmax_f16_f16_instances<3, 3>{});
} }
} // namespace instance } // namespace instance
......
...@@ -13,12 +13,11 @@ namespace tensor_operation { ...@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f16_f16_rank4_reduce1_instances( void add_device_softmax_f16_f16_rank4_reduce1_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances) std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 4, 1>>& instances)
{ {
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 1>{}); add_device_operation_instances(instances, device_softmax_f16_f16_generic_instance<4, 1>{});
add_device_operation_instances(instances, device_softmax_f16_f16_instances<4, 1>{});
} }
} // namespace instance } // namespace instance
......
...@@ -13,12 +13,11 @@ namespace tensor_operation { ...@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f16_f16_rank4_reduce2_instances( void add_device_softmax_f16_f16_rank4_reduce2_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances) std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 4, 2>>& instances)
{ {
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 2>{}); add_device_operation_instances(instances, device_softmax_f16_f16_generic_instance<4, 2>{});
add_device_operation_instances(instances, device_softmax_f16_f16_instances<4, 2>{});
} }
} // namespace instance } // namespace instance
......
...@@ -13,12 +13,11 @@ namespace tensor_operation { ...@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f16_f16_rank4_reduce3_instances( void add_device_softmax_f16_f16_rank4_reduce3_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances) std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 4, 3>>& instances)
{ {
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 3>{}); add_device_operation_instances(instances, device_softmax_f16_f16_generic_instance<4, 3>{});
add_device_operation_instances(instances, device_softmax_f16_f16_instances<4, 3>{});
} }
} // namespace instance } // namespace instance
......
...@@ -13,12 +13,11 @@ namespace tensor_operation { ...@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f16_f16_rank4_reduce4_instances( void add_device_softmax_f16_f16_rank4_reduce4_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, RANK>>& instances) std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 4, 4>>& instances)
{ {
add_device_operation_instances(instances, device_softmax_f16_f16_instances<RANK, 4>{}); add_device_operation_instances(instances, device_softmax_f16_f16_generic_instance<4, 4>{});
add_device_operation_instances(instances, device_softmax_f16_f16_instances<4, 4>{});
} }
} // namespace instance } // namespace instance
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_f32_f32_rank3_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 3>>& instances)
{
add_device_softmax_f32_f32_rank3_reduce1_instances(instances);
add_device_softmax_f32_f32_rank3_reduce2_instances(instances);
add_device_softmax_f32_f32_rank3_reduce3_instances(instances);
}
void add_device_softmax_f32_f32_rank4_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 4>>& instances)
{
add_device_softmax_f32_f32_rank4_reduce1_instances(instances);
add_device_softmax_f32_f32_rank4_reduce2_instances(instances);
add_device_softmax_f32_f32_rank4_reduce3_instances(instances);
add_device_softmax_f32_f32_rank4_reduce4_instances(instances);
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
...@@ -13,12 +13,11 @@ namespace tensor_operation { ...@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_f32_f32_rank3_reduce1_instances( void add_device_softmax_f32_f32_rank3_reduce1_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances) std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 3, 1>>& instances)
{ {
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 1>{}); add_device_operation_instances(instances, device_softmax_f32_f32_generic_instance<3, 1>{});
add_device_operation_instances(instances, device_softmax_f32_f32_instances<3, 1>{});
} }
} // namespace instance } // namespace instance
......
...@@ -13,12 +13,11 @@ namespace tensor_operation { ...@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_f32_f32_rank3_reduce2_instances( void add_device_softmax_f32_f32_rank3_reduce2_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances) std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 3, 2>>& instances)
{ {
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 2>{}); add_device_operation_instances(instances, device_softmax_f32_f32_generic_instance<3, 2>{});
add_device_operation_instances(instances, device_softmax_f32_f32_instances<3, 2>{});
} }
} // namespace instance } // namespace instance
......
...@@ -13,12 +13,11 @@ namespace tensor_operation { ...@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_f32_f32_rank3_reduce3_instances( void add_device_softmax_f32_f32_rank3_reduce3_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances) std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 3, 3>>& instances)
{ {
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 3>{}); add_device_operation_instances(instances, device_softmax_f32_f32_generic_instance<3, 3>{});
add_device_operation_instances(instances, device_softmax_f32_f32_instances<3, 3>{});
} }
} // namespace instance } // namespace instance
......
...@@ -13,12 +13,11 @@ namespace tensor_operation { ...@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f32_f32_rank4_reduce1_instances( void add_device_softmax_f32_f32_rank4_reduce1_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances) std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 4, 1>>& instances)
{ {
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 1>{}); add_device_operation_instances(instances, device_softmax_f32_f32_generic_instance<4, 1>{});
add_device_operation_instances(instances, device_softmax_f32_f32_instances<4, 1>{});
} }
} // namespace instance } // namespace instance
......
...@@ -13,12 +13,11 @@ namespace tensor_operation { ...@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f32_f32_rank4_reduce2_instances( void add_device_softmax_f32_f32_rank4_reduce2_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances) std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 4, 2>>& instances)
{ {
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 2>{}); add_device_operation_instances(instances, device_softmax_f32_f32_generic_instance<4, 2>{});
add_device_operation_instances(instances, device_softmax_f32_f32_instances<4, 2>{});
} }
} // namespace instance } // namespace instance
......
...@@ -13,12 +13,11 @@ namespace tensor_operation { ...@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f32_f32_rank4_reduce3_instances( void add_device_softmax_f32_f32_rank4_reduce3_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances) std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 4, 3>>& instances)
{ {
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 3>{}); add_device_operation_instances(instances, device_softmax_f32_f32_generic_instance<4, 3>{});
add_device_operation_instances(instances, device_softmax_f32_f32_instances<4, 3>{});
} }
} // namespace instance } // namespace instance
......
...@@ -13,12 +13,11 @@ namespace tensor_operation { ...@@ -13,12 +13,11 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_f32_f32_rank4_reduce4_instances( void add_device_softmax_f32_f32_rank4_reduce4_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, RANK>>& instances) std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 4, 4>>& instances)
{ {
add_device_operation_instances(instances, device_softmax_f32_f32_instances<RANK, 4>{}); add_device_operation_instances(instances, device_softmax_f32_f32_generic_instance<4, 4>{});
add_device_operation_instances(instances, device_softmax_f32_f32_instances<4, 4>{});
} }
} // namespace instance } // namespace instance
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce4.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_i8_i8_rank3_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, 3>>& instances)
{
add_device_softmax_i8_i8_rank3_reduce1_instances(instances);
add_device_softmax_i8_i8_rank3_reduce2_instances(instances);
add_device_softmax_i8_i8_rank3_reduce3_instances(instances);
}
void add_device_softmax_i8_i8_rank4_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, 4>>& instances)
{
add_device_softmax_i8_i8_rank4_reduce1_instances(instances);
add_device_softmax_i8_i8_rank4_reduce2_instances(instances);
add_device_softmax_i8_i8_rank4_reduce3_instances(instances);
add_device_softmax_i8_i8_rank4_reduce4_instances(instances);
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_i8_i8_rank3_reduce1_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
{
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 1>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_i8_i8_rank3_reduce2_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
{
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 2>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment