"tests/git@developer.sourcefind.cn:SIYIXNI/vllm.git" did not exist on "ab3a5a8259922ce312d01be39d29e27666968039"
Commit f74b77bc authored by carlushuang's avatar carlushuang
Browse files

Merge remote-tracking branch 'origin/develop' into stream-k-initial-impl

parents b5be51ed 0d911822
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 3;
void add_device_softmax_i8_i8_rank3_reduce3_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
{
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 3>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce1.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_i8_i8_rank4_reduce1_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
{
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 1>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_i8_i8_rank4_reduce2_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
{
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 2>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce3.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_i8_i8_rank4_reduce3_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
{
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 3>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce4.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
static constexpr index_t RANK = 4;
void add_device_softmax_i8_i8_rank4_reduce4_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, RANK>>& instances)
{
add_device_operation_instances(instances, device_softmax_i8_i8_instances<RANK, 4>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
...@@ -40,7 +40,11 @@ template <> std::string type_to_string<int8_t>() { return "int8"; } ...@@ -40,7 +40,11 @@ template <> std::string type_to_string<int8_t>() { return "int8"; }
template <> std::string type_to_string<int32_t>() { return "int32"; } template <> std::string type_to_string<int32_t>() { return "int32"; }
// clang-format on // clang-format on
template <typename InDataType, typename AccDataType, typename OutDataType, index_t Rank> template <typename InDataType,
typename AccDataType,
typename OutDataType,
index_t Rank,
index_t NumReduceDim>
bool profile_softmax_impl(int do_verification, bool profile_softmax_impl(int do_verification,
int init_method, int init_method,
bool do_log, bool do_log,
...@@ -54,7 +58,13 @@ bool profile_softmax_impl(int do_verification, ...@@ -54,7 +58,13 @@ bool profile_softmax_impl(int do_verification,
if(Rank != in_length.size()) if(Rank != in_length.size())
{ {
throw std::runtime_error("Input tensor rank is different from template argument Rank!"); throw std::runtime_error("Input tensor rank is different from template argument Rank!");
} };
if(NumReduceDim != reduce_dims.size())
{
throw std::runtime_error(
"Input reduce_dims rank is different from template argument NumReduceDim!");
};
Tensor<InDataType> in = in_strides.empty() ? Tensor<InDataType>(in_length) Tensor<InDataType> in = in_strides.empty() ? Tensor<InDataType>(in_length)
: Tensor<InDataType>(in_length, in_strides); : Tensor<InDataType>(in_length, in_strides);
...@@ -92,8 +102,13 @@ bool profile_softmax_impl(int do_verification, ...@@ -92,8 +102,13 @@ bool profile_softmax_impl(int do_verification,
// add device softmax instances // add device softmax instances
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using DeviceOp = tensor_operation::device:: using DeviceOp = tensor_operation::device::DeviceSoftmax<InDataType,
DeviceSoftmax<InDataType, AccDataType, OutDataType, PassThrough, PassThrough, Rank>; AccDataType,
OutDataType,
PassThrough,
PassThrough,
Rank,
NumReduceDim>;
// get device op instances // get device op instances
const auto instances = tensor_operation::device::instance::DeviceOperationInstanceFactory< const auto instances = tensor_operation::device::instance::DeviceOperationInstanceFactory<
...@@ -112,13 +127,6 @@ bool profile_softmax_impl(int do_verification, ...@@ -112,13 +127,6 @@ bool profile_softmax_impl(int do_verification,
for(auto& inst_ptr : instances) for(auto& inst_ptr : instances)
{ {
// Is this user's responsibility to check if problem mismatches kernel instance (ie. rank 3
// problem to rank 4 kernel) other than invoking IsSupportedArgument()?
if(!(inst_ptr->GetNumReduceDim() == static_cast<index_t>(reduce_dims.size())))
{
continue;
}
auto argument_ptr = inst_ptr->MakeArgumentPointer(in_tensor_lengths, auto argument_ptr = inst_ptr->MakeArgumentPointer(in_tensor_lengths,
in_tensor_strides, in_tensor_strides,
reduce_dims, reduce_dims,
......
...@@ -92,27 +92,76 @@ int profile_softmax(int argc, char* argv[]) ...@@ -92,27 +92,76 @@ int profile_softmax(int argc, char* argv[])
{ {
if(data_type == SoftmaxDataType::F16_F16) if(data_type == SoftmaxDataType::F16_F16)
{ {
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 3>(do_verification, if(reduce.size() == 1)
init_method, ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 3, 1>(
do_log, do_verification,
time_kernel, init_method,
length, do_log,
stride, time_kernel,
reduce, length,
double(alpha), stride,
double(beta)); reduce,
double(alpha),
double(beta));
else if(reduce.size() == 2)
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 3, 2>(
do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 3)
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 3, 3>(
do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else
throw std::runtime_error("invalid number of dimensions to reduce");
} }
else if(data_type == SoftmaxDataType::F32_F32) else if(data_type == SoftmaxDataType::F32_F32)
{ {
ck::profiler::profile_softmax_impl<float, float, float, 3>(do_verification, if(reduce.size() == 1)
init_method, ck::profiler::profile_softmax_impl<float, float, float, 3, 1>(do_verification,
do_log, init_method,
time_kernel, do_log,
length, time_kernel,
stride, length,
reduce, stride,
double(alpha), reduce,
double(beta)); double(alpha),
double(beta));
else if(reduce.size() == 2)
ck::profiler::profile_softmax_impl<float, float, float, 3, 2>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 3)
ck::profiler::profile_softmax_impl<float, float, float, 3, 3>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else
throw std::runtime_error("invalid number of dimensions to reduce");
} }
else else
{ {
...@@ -124,27 +173,97 @@ int profile_softmax(int argc, char* argv[]) ...@@ -124,27 +173,97 @@ int profile_softmax(int argc, char* argv[])
{ {
if(data_type == SoftmaxDataType::F16_F16) if(data_type == SoftmaxDataType::F16_F16)
{ {
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 4>(do_verification, if(reduce.size() == 1)
init_method, ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 4, 1>(
do_log, do_verification,
time_kernel, init_method,
length, do_log,
stride, time_kernel,
reduce, length,
double(alpha), stride,
double(beta)); reduce,
double(alpha),
double(beta));
else if(reduce.size() == 2)
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 4, 2>(
do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 3)
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 4, 3>(
do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 4)
ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 4, 4>(
do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else
throw std::runtime_error("invalid number of dimensions to reduce");
} }
else if(data_type == SoftmaxDataType::F32_F32) else if(data_type == SoftmaxDataType::F32_F32)
{ {
ck::profiler::profile_softmax_impl<float, float, float, 4>(do_verification, if(reduce.size() == 1)
init_method, ck::profiler::profile_softmax_impl<float, float, float, 4, 1>(do_verification,
do_log, init_method,
time_kernel, do_log,
length, time_kernel,
stride, length,
reduce, stride,
double(alpha), reduce,
double(beta)); double(alpha),
double(beta));
else if(reduce.size() == 2)
ck::profiler::profile_softmax_impl<float, float, float, 4, 2>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 3)
ck::profiler::profile_softmax_impl<float, float, float, 4, 3>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else if(reduce.size() == 4)
ck::profiler::profile_softmax_impl<float, float, float, 4, 4>(do_verification,
init_method,
do_log,
time_kernel,
length,
stride,
reduce,
double(alpha),
double(beta));
else
throw std::runtime_error("invalid number of dimensions to reduce");
} }
else else
{ {
......
if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS MATCHES "gfx940") list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)
add_test_executable(test_batched_gemm_fp16 batched_gemm_fp16.cpp) add_test_executable(test_batched_gemm_fp16 batched_gemm_fp16.cpp)
target_link_libraries(test_batched_gemm_fp16 PRIVATE utility) target_link_libraries(test_batched_gemm_fp16 PRIVATE utility)
target_link_libraries(test_batched_gemm_fp16 PRIVATE device_batched_gemm_instance) target_link_libraries(test_batched_gemm_fp16 PRIVATE device_batched_gemm_instance)
...@@ -14,4 +17,6 @@ if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS M ...@@ -14,4 +17,6 @@ if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS M
add_test_executable(test_batched_gemm_int8 batched_gemm_int8.cpp) add_test_executable(test_batched_gemm_int8 batched_gemm_int8.cpp)
target_link_libraries(test_batched_gemm_int8 PRIVATE utility) target_link_libraries(test_batched_gemm_int8 PRIVATE utility)
target_link_libraries(test_batched_gemm_int8 PRIVATE device_batched_gemm_instance) target_link_libraries(test_batched_gemm_int8 PRIVATE device_batched_gemm_instance)
endif() set(target 1)
\ No newline at end of file endif()
endforeach()
\ No newline at end of file
if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS MATCHES "gfx940") list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)
add_custom_target(test_batched_gemm_gemm) add_custom_target(test_batched_gemm_gemm)
add_gtest_executable(test_batched_gemm_gemm_fp16 test_batched_gemm_gemm_fp16.cpp) add_gtest_executable(test_batched_gemm_gemm_fp16 test_batched_gemm_gemm_fp16.cpp)
target_link_libraries(test_batched_gemm_gemm_fp16 PRIVATE utility device_batched_gemm_gemm_instance) target_link_libraries(test_batched_gemm_gemm_fp16 PRIVATE utility device_batched_gemm_gemm_instance)
add_dependencies(test_batched_gemm_gemm test_batched_gemm_gemm_fp16) add_dependencies(test_batched_gemm_gemm test_batched_gemm_gemm_fp16)
endif() set(target 1)
\ No newline at end of file endif()
endforeach()
\ No newline at end of file
if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS MATCHES "gfx940") list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)
add_test_executable(test_batched_gemm_reduce_fp16 batched_gemm_reduce_fp16.cpp) add_test_executable(test_batched_gemm_reduce_fp16 batched_gemm_reduce_fp16.cpp)
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE utility) target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE utility)
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE device_batched_gemm_reduce_instance) target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE device_batched_gemm_reduce_instance)
endif() set(target 1)
endif()
endforeach()
if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS MATCHES "gfx940") list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)
add_custom_target(test_batched_gemm_softmax_gemm) add_custom_target(test_batched_gemm_softmax_gemm)
add_gtest_executable(test_batched_gemm_softmax_gemm_fp16 test_batched_gemm_softmax_gemm_fp16.cpp) add_gtest_executable(test_batched_gemm_softmax_gemm_fp16 test_batched_gemm_softmax_gemm_fp16.cpp)
target_link_libraries(test_batched_gemm_softmax_gemm_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_instance) target_link_libraries(test_batched_gemm_softmax_gemm_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_instance)
add_dependencies(test_batched_gemm_softmax_gemm test_batched_gemm_softmax_gemm_fp16) add_dependencies(test_batched_gemm_softmax_gemm test_batched_gemm_softmax_gemm_fp16)
endif() set(target 1)
\ No newline at end of file endif()
endforeach()
\ No newline at end of file
if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS MATCHES "gfx940") list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)
add_custom_target(test_batched_gemm_softmax_gemm_permute) add_custom_target(test_batched_gemm_softmax_gemm_permute)
add_gtest_executable(test_batched_gemm_softmax_gemm_permute_fp16 test_batched_gemm_softmax_gemm_permute_fp16.cpp) add_gtest_executable(test_batched_gemm_softmax_gemm_permute_fp16 test_batched_gemm_softmax_gemm_permute_fp16.cpp)
...@@ -14,4 +17,6 @@ if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS M ...@@ -14,4 +17,6 @@ if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS M
target_link_libraries(test_batched_gemm_bias_softmax_gemm_permute_bf16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance) target_link_libraries(test_batched_gemm_bias_softmax_gemm_permute_bf16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_fp16) add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_fp16)
add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_bf16) add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_bf16)
endif() set(target 1)
\ No newline at end of file endif()
endforeach()
\ No newline at end of file
add_gtest_executable(test_contraction test_contraction.cpp) add_gtest_executable(test_contraction test_contraction.cpp)
target_link_libraries(test_contraction PRIVATE utility device_contraction_bilinear_instance device_contraction_scale_instance) target_link_libraries(test_contraction PRIVATE utility device_contraction_bilinear_instance device_contraction_scale_instance)
if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS MATCHES "gfx940") list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)
add_gtest_executable(test_contraction_interface test_contraction_interface.cpp) add_gtest_executable(test_contraction_interface test_contraction_interface.cpp)
target_link_libraries(test_contraction_interface PRIVATE utility device_contraction_bilinear_instance device_contraction_scale_instance) target_link_libraries(test_contraction_interface PRIVATE utility device_contraction_bilinear_instance device_contraction_scale_instance)
endif() set(target 1)
endif()
endforeach()
if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS MATCHES "gfx940") list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)
add_gtest_executable(test_convnd_bwd_data convnd_bwd_data.cpp) add_gtest_executable(test_convnd_bwd_data convnd_bwd_data.cpp)
target_link_libraries(test_convnd_bwd_data PRIVATE utility device_conv1d_bwd_data_instance device_conv2d_bwd_data_instance device_conv3d_bwd_data_instance) target_link_libraries(test_convnd_bwd_data PRIVATE utility device_conv1d_bwd_data_instance device_conv2d_bwd_data_instance device_conv3d_bwd_data_instance)
endif() set(target 1)
\ No newline at end of file endif()
endforeach()
\ No newline at end of file
if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS MATCHES "gfx940") list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)
add_gtest_executable(test_convnd_fwd convnd_fwd.cpp) add_gtest_executable(test_convnd_fwd convnd_fwd.cpp)
target_link_libraries(test_convnd_fwd PRIVATE utility device_conv2d_fwd_instance) target_link_libraries(test_convnd_fwd PRIVATE utility device_conv2d_fwd_instance)
endif() set(target 1)
endif()
endforeach()
if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS MATCHES "gfx940") list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)
add_custom_target(test_gemm_layernorm) add_custom_target(test_gemm_layernorm)
add_gtest_executable(test_gemm_add_relu_add_layernorm_fp16 test_gemm_add_relu_add_layernorm_fp16.cpp) add_gtest_executable(test_gemm_add_relu_add_layernorm_fp16 test_gemm_add_relu_add_layernorm_fp16.cpp)
target_link_libraries(test_gemm_add_relu_add_layernorm_fp16 PRIVATE utility device_gemm_add_relu_add_layernorm_instance) target_link_libraries(test_gemm_add_relu_add_layernorm_fp16 PRIVATE utility device_gemm_add_relu_add_layernorm_instance)
add_dependencies(test_gemm_layernorm test_gemm_add_relu_add_layernorm_fp16) add_dependencies(test_gemm_layernorm test_gemm_add_relu_add_layernorm_fp16)
endif() set(target 1)
endif()
endforeach()
if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS MATCHES "gfx940") list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)
add_gtest_executable(test_gemm_splitk test_gemm_splitk.cpp) add_gtest_executable(test_gemm_splitk test_gemm_splitk.cpp)
target_link_libraries(test_gemm_splitk PRIVATE utility device_gemm_splitk_instance) target_link_libraries(test_gemm_splitk PRIVATE utility device_gemm_splitk_instance)
endif() set(target 1)
endif()
endforeach()
if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS MATCHES "gfx940") list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)
add_gtest_executable(test_grouped_convnd_bwd_weight grouped_convnd_bwd_weight.cpp) add_gtest_executable(test_grouped_convnd_bwd_weight grouped_convnd_bwd_weight.cpp)
target_link_libraries(test_grouped_convnd_bwd_weight PRIVATE utility device_grouped_conv1d_bwd_weight_instance device_grouped_conv2d_bwd_weight_instance device_grouped_conv3d_bwd_weight_instance) target_link_libraries(test_grouped_convnd_bwd_weight PRIVATE utility device_grouped_conv1d_bwd_weight_instance device_grouped_conv2d_bwd_weight_instance device_grouped_conv3d_bwd_weight_instance)
endif() set(target 1)
\ No newline at end of file endif()
endforeach()
\ No newline at end of file
if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS MATCHES "gfx940") list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)
add_custom_target(test_grouped_gemm) add_custom_target(test_grouped_gemm)
add_gtest_executable(test_grouped_gemm_splitk test_grouped_gemm_splitk.cpp) add_gtest_executable(test_grouped_gemm_splitk test_grouped_gemm_splitk.cpp)
add_gtest_executable(test_grouped_gemm_interface test_grouped_gemm_interface.cpp) add_gtest_executable(test_grouped_gemm_interface test_grouped_gemm_interface.cpp)
...@@ -6,4 +9,6 @@ if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS M ...@@ -6,4 +9,6 @@ if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS M
target_link_libraries(test_grouped_gemm_interface PRIVATE utility device_grouped_gemm_instance) target_link_libraries(test_grouped_gemm_interface PRIVATE utility device_grouped_gemm_instance)
add_dependencies(test_grouped_gemm test_grouped_gemm_splitk test_grouped_gemm_interface) add_dependencies(test_grouped_gemm test_grouped_gemm_splitk test_grouped_gemm_interface)
endif() set(target 1)
endif()
endforeach()
...@@ -13,7 +13,6 @@ using I = ck::Number<N>; ...@@ -13,7 +13,6 @@ using I = ck::Number<N>;
using F16 = ck::half_t; using F16 = ck::half_t;
using F32 = float; using F32 = float;
using I8 = int8_t;
template <typename Tuple> template <typename Tuple>
class TestSoftmax : public ck::TestSoftmax<Tuple> class TestSoftmax : public ck::TestSoftmax<Tuple>
...@@ -24,8 +23,7 @@ class TestSoftmax : public ck::TestSoftmax<Tuple> ...@@ -24,8 +23,7 @@ class TestSoftmax : public ck::TestSoftmax<Tuple>
using KernelTypes = ::testing::Types< using KernelTypes = ::testing::Types<
// InDataType, AccDataType, OutDataType, Rank // InDataType, AccDataType, OutDataType, Rank
std::tuple< F16, F32, F16, I<3>>, std::tuple< F16, F32, F16, I<3>>,
std::tuple< F32, F32, F32, I<3>>, std::tuple< F32, F32, F32, I<3>>
std::tuple< I8, F32, I8, I<3>>
>; >;
// clang-format on // clang-format on
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment