Commit 3289a5c9 authored by Andriy Roshchenko's avatar Andriy Roshchenko
Browse files

Narrowing the scope of PR to OCP FP8 enablement only

parent dbfb222d
......@@ -16,7 +16,6 @@
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/gpu/reference_gemm.hpp"
#include "ck/library/utility/literals.hpp"
template <ck::index_t... Is>
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
......@@ -16,7 +16,6 @@
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/gpu/reference_gemm.hpp"
#include "ck/library/utility/literals.hpp"
template <ck::index_t... Is>
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
......@@ -16,7 +16,6 @@
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/gpu/reference_gemm.hpp"
#include "ck/library/utility/literals.hpp"
template <ck::index_t... Is>
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
......@@ -16,7 +16,6 @@
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/gpu/reference_gemm.hpp"
#include "ck/library/utility/literals.hpp"
template <ck::index_t... Is>
......
......@@ -24,7 +24,6 @@
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/gpu/reference_gemm.hpp"
#include "ck/library/utility/literals.hpp"
template <ck::index_t... Is>
......
......@@ -40,7 +40,7 @@ using BF8 = ck::bf8_t;
struct ExecutionConfig final
{
bool do_verification = true;
int init_method = 2;
int init_method = 1;
bool time_kernel = false;
};
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp"
#include "common.hpp"
......@@ -11,7 +11,6 @@ using AccDataType = FP32;
using CShuffleDataType = FP16;
using DsDataType = ck::Tuple<>;
using InDataType = FP16;
using VerifyDataType = FP16; // is used for selection of check tolerances
using OutLayout = ck::tensor_layout::convolution::GNHWK;
using WeiLayout = ck::tensor_layout::convolution::GKYXC;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp"
#include "common.hpp"
......@@ -10,7 +10,6 @@ using AccDataType = FP32;
using CShuffleDataType = FP16;
using DsDataType = ck::Tuple<>;
using InDataType = FP16;
using VerifyDataType = FP16; // is used for selection of check tolerances
using OutLayout = ck::tensor_layout::convolution::GNHWK;
using WeiLayout = ck::tensor_layout::convolution::GKYXC;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp"
#include "common.hpp"
......@@ -12,7 +12,6 @@ using DsDataType = ck::Tuple<>;
using InDataType = FP16;
using AComputeType = BF8;
using BComputeType = FP8;
using VerifyDataType = BF8; // is used for selection of check tolerances
using OutLayout = ck::tensor_layout::convolution::GNHWK;
using WeiLayout = ck::tensor_layout::convolution::GKYXC;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
template <typename DataType>
inline __host__ __device__ constexpr double get_rtol()
{
if constexpr(std::is_same_v<DataType, float>)
{
return 1e-3;
}
else if constexpr(std::is_same_v<DataType, double>)
{
return 1e-6;
}
else if constexpr(std::is_same_v<DataType, ck::half_t>)
{
return 1e-3;
}
else if constexpr(std::is_same_v<DataType, ck::bhalf_t>)
{
return 5e-2;
}
else if constexpr(std::is_same_v<DataType, int32_t>)
{
return 1e-1;
}
else if constexpr(std::is_same_v<DataType, int8_t>)
{
return 1e-1;
}
else if constexpr(std::is_same_v<DataType, ck::f8_t>)
{
return 1;
}
else if constexpr(std::is_same_v<DataType, ck::bf8_t>)
{
return 1;
}
else
{
return 1e-3;
}
}
template <typename DataType>
inline __host__ __device__ constexpr double get_atol()
{
if constexpr(std::is_same_v<DataType, float>)
{
return 1e-3;
}
else if constexpr(std::is_same_v<DataType, double>)
{
return 1e-6;
}
else if constexpr(std::is_same_v<DataType, ck::half_t>)
{
return 1e-3;
}
else if constexpr(std::is_same_v<DataType, ck::bhalf_t>)
{
return 5e-2;
}
else if constexpr(std::is_same_v<DataType, int32_t>)
{
return 1e-1;
}
else if constexpr(std::is_same_v<DataType, int8_t>)
{
return 1e-1;
}
else if constexpr(std::is_same_v<DataType, ck::f8_t>)
{
return 1;
}
else if constexpr(std::is_same_v<DataType, ck::bf8_t>)
{
return 1;
}
else
{
return 1e-3;
}
}
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
bool run_conv_bwd_data(const ExecutionConfig& config,
const ck::utils::conv::ConvParam& conv_params,
......@@ -108,7 +27,7 @@ bool run_conv_bwd_data(const ExecutionConfig& config,
wei.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{-5, 5});
break;
default:
out.GenerateTensorValue(GeneratorTensor_3<OutDataType>{-0.5, 0.5});
out.GenerateTensorValue(GeneratorTensor_3<OutDataType>{0.0, 1.0});
wei.GenerateTensorValue(GeneratorTensor_3<WeiDataType>{-0.5, 0.5});
}
......@@ -219,11 +138,7 @@ bool run_conv_bwd_data(const ExecutionConfig& config,
in_device_buf.FromDevice(in_device.mData.data());
return ck::utils::check_err(in_device.mData,
in_host.mData,
"Error: Incorrect results!",
get_rtol<VerifyDataType>(),
get_atol<VerifyDataType>());
return ck::utils::check_err(in_device.mData, in_host.mData);
}
return true;
......
......@@ -194,9 +194,9 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
b1_tensors[i].GenerateTensorValue(GeneratorTensor_3<B1DataType>{-0.5, 0.5});
break;
default:
a0_tensors[i].GenerateTensorValue(GeneratorTensor_1<A0DataType>{1});
b0_tensors[i].GenerateTensorValue(GeneratorTensor_1<B0DataType>{1});
b1_tensors[i].GenerateTensorValue(GeneratorTensor_1<B1DataType>{1});
a0_tensors[i].GenerateTensorValue(GeneratorTensor_Sequential<A0DataType, 0>{});
b0_tensors[i].GenerateTensorValue(GeneratorTensor_Sequential<B0DataType, 1>{});
b1_tensors[i].GenerateTensorValue(GeneratorTensor_Sequential<B1DataType, 1>{});
}
d0_tensors[i].GenerateTensorValue(GeneratorTensor_3<D0DataType>{-0.5, 0.5});
......
......@@ -184,9 +184,9 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
b_tensors[i].GenerateTensorValue(GeneratorTensor_3<B0DataType>{-0.5, 0.5});
break;
default:
a0_tensors[i].GenerateTensorValue(GeneratorTensor_1<A0DataType>{1});
a1_tensors[i].GenerateTensorValue(GeneratorTensor_1<A1DataType>{1});
b_tensors[i].GenerateTensorValue(GeneratorTensor_1<B0DataType>{-1});
a0_tensors[i].GenerateTensorValue(GeneratorTensor_Sequential<A0DataType, 0>{});
a1_tensors[i].GenerateTensorValue(GeneratorTensor_Sequential<A1DataType, 0>{});
b_tensors[i].GenerateTensorValue(GeneratorTensor_Sequential<B0DataType, 1>{});
}
d0_tensors[i].GenerateTensorValue(GeneratorTensor_3<D0DataType>{-0.5, 0.5});
......
......@@ -8,7 +8,7 @@ bool run_convnd_fwd_example(int argc, char* argv[])
print_helper_msg();
bool do_verification = true;
int init_method = 2;
int init_method = 1;
bool time_kernel = false;
ck::utils::conv::ConvParam conv_param{
......
......@@ -8,7 +8,7 @@ bool run_convnd_fwd_example(int argc, char* argv[])
print_helper_msg();
bool do_verification = true;
int init_method = 2;
int init_method = 1;
bool time_kernel = false;
ck::utils::conv::ConvParam conv_param{
......
......@@ -8,7 +8,7 @@ bool run_convnd_fwd_example(int argc, char* argv[])
print_helper_msg();
bool do_verification = true;
int init_method = 2;
int init_method = 1;
bool time_kernel = false;
ck::utils::conv::ConvParam conv_param{
......
......@@ -8,7 +8,7 @@ bool run_convnd_fwd_example(int argc, char* argv[])
print_helper_msg();
bool do_verification = true;
int init_method = 2;
int init_method = 1;
bool time_kernel = false;
ck::utils::conv::ConvParam conv_param{
......
......@@ -8,7 +8,7 @@ bool run_convnd_fwd_example(int argc, char* argv[])
print_helper_msg();
bool do_verification = true;
int init_method = 2;
int init_method = 1;
bool time_kernel = false;
ck::utils::conv::ConvParam conv_param{
......
......@@ -86,7 +86,7 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultiD_Xdl_CShu
int main(int argc, char* argv[])
{
bool do_verification = true;
int init_method = 2;
int init_method = 1;
bool time_kernel = false;
// GEMM shape
......
......@@ -78,7 +78,7 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultiD_ABScale_
int main(int argc, char* argv[])
{
bool do_verification = true;
int init_method = 5;
int init_method = 1;
bool time_kernel = false;
// GEMM shape
......@@ -186,20 +186,6 @@ int main(int argc, char* argv[])
b0_k_n.GenerateTensorValue(GeneratorTensor_1<B0DataType>{});
a1_m_k.GenerateTensorValue(GeneratorTensor_3<A1DataType>{0, 1.0});
b1_k_n.GenerateTensorValue(GeneratorTensor_3<B1DataType>{0, 1.0});
break;
case 6:
a0_m_k.GenerateTensorValue(GeneratorTensor_PI<A0DataType>{});
b0_k_n.GenerateTensorValue(GeneratorTensor_1<B0DataType>{0.5f});
a1_m_k.GenerateTensorValue(GeneratorTensor_1<A1DataType>{0.5});
b1_k_n.GenerateTensorValue(GeneratorTensor_1<B1DataType>{4});
break;
case 7:
a0_m_k.GenerateTensorValue(GeneratorTensor_PI_A<A0DataType>{});
b0_k_n.GenerateTensorValue(GeneratorTensor_PI_B<B0DataType>{});
a1_m_k.GenerateTensorValue(GeneratorTensor_1<A1DataType>{2});
b1_k_n.GenerateTensorValue(GeneratorTensor_1<B1DataType>{0.5});
break;
default:
a0_m_k.GenerateTensorValue(GeneratorTensor_3<A0DataType>{-0.5, 0.5});
......@@ -254,28 +240,23 @@ int main(int argc, char* argv[])
"not support this GEMM problem");
}
std::cout << "Compute GEMM on device... \n";
float ave_time = invoker.Run(argument, StreamConfig{nullptr, time_kernel, 20, 50});
std::cout << "DONE!" << std::endl;
if(time_kernel)
{
std::size_t flop = std::size_t(2) * M * N * K;
std::size_t num_btype =
sizeof(A0DataType) * M * K + sizeof(B0DataType) * K * N + sizeof(EDataType) * M * N;
float tflops = static_cast<float>(flop) / 1.E9 / ave_time;
std::size_t flop = std::size_t(2) * M * N * K;
std::size_t num_btype =
sizeof(A0DataType) * M * K + sizeof(B0DataType) * K * N + sizeof(EDataType) * M * N;
float gb_per_sec = num_btype / 1.E6 / ave_time;
float tflops = static_cast<float>(flop) / 1.E9 / ave_time;
std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec
<< " GB/s" << std::endl;
}
float gb_per_sec = num_btype / 1.E6 / ave_time;
std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec << " GB/s"
<< std::endl;
e_device_buf.FromDevice(e_m_n_device_result.mData.data());
if(do_verification)
{
std::cout << "Running verification on CPU." << std::endl;
Tensor<AccDataType> c_m_n({M, N});
Tensor<float> a_m_k({M, K});
Tensor<float> b_k_n({K, N});
......@@ -325,28 +306,10 @@ int main(int argc, char* argv[])
e_device_buf.FromDevice(e_m_n_device_result.mData.data());
if(init_method == 6 || init_method == 7)
{
std::cout << std::fixed << std::setprecision(16);
float d = ck::type_convert<float>(e_m_n_device_result(0, 10));
float h = ck::type_convert<float>(e_m_n_host_result(10, 0));
std::cout << "device result: " << d << std::endl;
std::cout << "host result: " << h << std::endl;
std::cout << "expected result: " << M_PI << std::endl;
std::cout << "device - host: " << std::abs(d - h) << std::endl;
std::cout << "device - expected: " << std::abs(d - M_PI) << std::endl;
std::cout << "atol: " << 5e-2 << std::endl;
}
if(ck::utils::check_err(
e_m_n_device_result, e_m_n_host_result, "Error: Incorrect results!", 5e-2, 5e-2))
{
std::cout << "Verification on CPU: PASS" << std::endl;
return 0;
}
else
return 1;
return ck::utils::check_err(
e_m_n_device_result, e_m_n_host_result, "Error: Incorrect results!", 5e-2, 5e-2)
? 0
: 1;
}
return 0;
......
......@@ -847,7 +847,7 @@ amd_buffer_load_invalid_element_return_zero(const T* p_src_wave,
vector_t tmp{amd_buffer_load_impl<scalar_t, vector_size, coherence>(
src_wave_buffer_resource, src_thread_addr_offset, 0)};
return src_thread_element_valid ? tmp : vector_t{0};
return src_thread_element_valid ? tmp : vector_t(0);
#endif
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment