Commit 13dd3ab5 authored by Andriy Roshchenko's avatar Andriy Roshchenko
Browse files

Implementation of ConvertFP32Nearest in test_fp8_ocp.

parent e2efb63c
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
"cacheVariables": { "cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug", "CMAKE_BUILD_TYPE": "Debug",
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON", "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
"GPU_TARGETS": "gfx90a", "GPU_TARGETS": "gfx950",
"BUILD_DEV": "ON", "BUILD_DEV": "ON",
"CMAKE_CXX_COMPILER": "/opt/rocm/llvm/bin/clang++", "CMAKE_CXX_COMPILER": "/opt/rocm/llvm/bin/clang++",
"CMAKE_PREFIX_PATH": "/opt/rocm" "CMAKE_PREFIX_PATH": "/opt/rocm"
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
}, },
"cacheVariables": { "cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug", "CMAKE_BUILD_TYPE": "Debug",
"CMAKE_CXX_FLAGS": "-O0" "CMAKE_CXX_FLAGS": "-O0 -ggdb"
} }
}, },
{ {
......
This diff is collapsed.
...@@ -100,6 +100,12 @@ inline __host__ __device__ constexpr bhalf_t type_convert<bhalf_t, int8_t>(int8_ ...@@ -100,6 +100,12 @@ inline __host__ __device__ constexpr bhalf_t type_convert<bhalf_t, int8_t>(int8_
return type_convert<bhalf_t>(x_fp32); return type_convert<bhalf_t>(x_fp32);
} }
template <>
inline __host__ __device__ constexpr f8_ocp_t type_convert<f8_ocp_t, int>(int x)
{
return f8_ocp_t{type_convert<f8_ocp_t::data_type>(x)};
}
// Convert X to Y // Convert X to Y
template <typename Y, typename X> template <typename Y, typename X>
__host__ __device__ constexpr Y type_convert_sp(X x) __host__ __device__ constexpr Y type_convert_sp(X x)
...@@ -409,7 +415,7 @@ inline __host__ __device__ float type_convert<float, f8_fnuz_t>(f8_fnuz_t x) ...@@ -409,7 +415,7 @@ inline __host__ __device__ float type_convert<float, f8_fnuz_t>(f8_fnuz_t x)
} }
template <> template <>
inline __host__ __device__ float2_t type_convert<float2_t, f8x2_t>(f8x2_t x) inline __host__ __device__ float2_t type_convert<float2_t, f8x2_fnuz_t>(f8x2_fnuz_t x)
{ {
#if defined(__gfx94__) #if defined(__gfx94__)
const auto i16val = bit_cast<uint16_t>(x); const auto i16val = bit_cast<uint16_t>(x);
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp"
using ck::bf8_ocp_t;
using ck::f8_convert_rne;
using ck::f8_convert_sr;
using ck::half_t;
using ck::type_convert;
TEST(BF8OCP, NumericLimits) {}
TEST(BF8OCP, ConvertFP32Nearest) {}
TEST(BF8OCP, ConvertFP32Stochastic) {}
TEST(BF8OCP, ConvertFP16Nearest) {}
TEST(BF8OCP, ConvertFP16Stochastic) {}
...@@ -38,6 +38,11 @@ TEST(FP8FNUZ, ConvertFP32Nearest) ...@@ -38,6 +38,11 @@ TEST(FP8FNUZ, ConvertFP32Nearest)
// convert maximal f8_fnuz_t to float and check if equal to fp8 max // convert maximal f8_fnuz_t to float and check if equal to fp8 max
ASSERT_NEAR( ASSERT_NEAR(
max_f8_t_float, type_convert<float>(f8_convert_rne<f8_fnuz_t>(max_f8_t_float)), abs_tol); max_f8_t_float, type_convert<float>(f8_convert_rne<f8_fnuz_t>(max_f8_t_float)), abs_tol);
// XXX: FNUZ f8_convert_rne behavior is inconsistent.
// Clipping large values to fp8 max (saturation to finite) contradicts converting inf float to
// fp8 qNAN (no saturation).
// convert maximal float to fp8 and back, check if clipped to fp8 max // convert maximal float to fp8 and back, check if clipped to fp8 max
ASSERT_NEAR(max_f8_t_float, ASSERT_NEAR(max_f8_t_float,
type_convert<float>(f8_convert_rne<f8_fnuz_t>(std::numeric_limits<float>::max())), type_convert<float>(f8_convert_rne<f8_fnuz_t>(std::numeric_limits<float>::max())),
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp"
using ck::f8_convert_rne;
using ck::f8_convert_sr;
using ck::f8_ocp_t;
using ck::half_t;
using ck::type_convert;
TEST(FP8OCP, NumericLimits)
{
// constants given for OCP FP8
EXPECT_EQ(ck::NumericLimits<f8_ocp_t>::Min(),
type_convert<f8_ocp_t>(0x08)); // 0b00001000 = 2^-6
EXPECT_EQ(ck::NumericLimits<f8_ocp_t>::Max(), type_convert<f8_ocp_t>(0x7E)); // 0b01111110 = 448
EXPECT_EQ(ck::NumericLimits<f8_ocp_t>::Lowest(),
type_convert<f8_ocp_t>(0xFE)); // 0b11111110 = -448
EXPECT_EQ(ck::NumericLimits<f8_ocp_t>::QuietNaN().data,
type_convert<f8_ocp_t>(0x7F).data); // 0b01111111
EXPECT_FALSE(ck::NumericLimits<f8_ocp_t>::QuietNaN() ==
ck::NumericLimits<f8_ocp_t>::QuietNaN());
}
TEST(FP8OCP, ConvertFP32Nearest)
{
// fix the tolerance value
float abs_tol = 1e-6;
// convert 0 float to fp8 and back, check if holds
ASSERT_NEAR(0.0f, type_convert<float>(f8_convert_rne<f8_ocp_t>(0.0f)), 0.0f);
// convert minimal float to fp8 and back, check if holds
ASSERT_NEAR(std::numeric_limits<float>::min(),
type_convert<float>(f8_convert_rne<f8_ocp_t>(std::numeric_limits<float>::min())),
abs_tol);
const auto max_f8_t_float = type_convert<float>(ck::NumericLimits<f8_ocp_t>::Max());
// convert maximal f8_ocp_t to float and check if equal to fp8 max
ASSERT_NEAR(
max_f8_t_float, type_convert<float>(f8_convert_rne<f8_ocp_t>(max_f8_t_float)), 0.0f);
// convert maximal float to fp8 and back, check if clipped to fp8 max (saturation to finite)
ASSERT_NEAR(max_f8_t_float,
type_convert<float>(f8_convert_rne<f8_ocp_t>(std::numeric_limits<float>::max())),
0.0f);
// convert float infinity to f8_ocp_t and check if it is max value (saturation to finite)
ASSERT_EQ(ck::NumericLimits<f8_ocp_t>::Max(),
f8_convert_rne<f8_ocp_t>(std::numeric_limits<float>::infinity()));
// positive norm float value to fp8 and back, check if holds
float pos_float = 0.017578125f;
ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<f8_ocp_t>(pos_float)), abs_tol);
// smallest normal fp8 value to fp8 and back, check if holds
float neg_float = -0.015625f; //-2^-6
ASSERT_NEAR(neg_float, type_convert<float>(f8_convert_rne<f8_ocp_t>(neg_float)), 0.0f);
// positive subnorm float value to fp8 and back, check if holds
pos_float = 0.00390625f;
ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<f8_ocp_t>(pos_float)), abs_tol);
// min subnorm fp8 value to fp8 and back, check if holds
neg_float = -0.001953125f; //-2^-9
ASSERT_NEAR(neg_float, type_convert<float>(f8_convert_rne<f8_ocp_t>(neg_float)), 0.0f);
// smaller than min subnorm fp8 value to fp8 must be zero
auto less_than_min_subnorm = 0.0009765625f; // 2^-10
ASSERT_EQ(0.0f, type_convert<float>(f8_convert_rne<f8_ocp_t>(less_than_min_subnorm)));
// convert quiet NaN to f8_ocp_t and check if it is quiet NaN
auto f8_nan = f8_convert_rne<f8_ocp_t>(std::numeric_limits<float>::quiet_NaN());
ASSERT_TRUE((f8_nan.data & 0x7f) == 0x7f);
}
TEST(FP8OCP, ConvertFP32Stochastic) {}
TEST(FP8OCP, ConvertFP16Nearest) {}
TEST(FP8OCP, ConvertFP16Stochastic) {}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment