Merge pull request #138 from ROCm/merge_from_public

Merge from public

Merge pull request #138 from ROCm/merge_from_public
Merge from public
829e0eb3 · Illia Silin · GitHub · 3d61f89a · 1111bc69 · 829e0eb3
Unverified Commit 829e0eb3 authored Aug 23, 2024 by Illia Silin Committed by GitHub Aug 23, 2024
9 changed files
--- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/xdl/device_grouped_conv3d_fwd_xdl_combconvscale_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/xdl/device_grouped_conv3d_fwd_xdl_combconvscale_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
+#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_outelementop_instance.hpp"
+#include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_convscale.hpp"
+#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+void add_device_grouped_conv3d_fwd_xdl_combconvscale_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instances(
+    std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
+                                                                NDHWGC,
+                                                                GKZYXC,
+                                                                ck::Tuple<>,
+                                                                NDHWGK,
+                                                                F8,
+                                                                F8,
+                                                                ck::Tuple<>,
+                                                                F32,
+                                                                PassThrough,
+                                                                PassThrough,
+                                                                CombConvScale,
+                                                                F8,
+                                                                F8>>>& instances)
+{
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_outelementop_f8_f8_f32_instances<3,
+                                                                     NDHWGC,
+                                                                     GKZYXC,
+                                                                     ck::Tuple<>,
+                                                                     NDHWGK,
+                                                                     ConvFwdDefault,
+                                                                     CombConvScale>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_outelementop_f8_f8_f32_instances<3,
+                                                                     NDHWGC,
+                                                                     GKZYXC,
+                                                                     ck::Tuple<>,
+                                                                     NDHWGK,
+                                                                     ConvFwd1x1P0,
+                                                                     CombConvScale>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_outelementop_f8_f8_f32_instances<3,
+                                                                     NDHWGC,
+                                                                     GKZYXC,
+                                                                     ck::Tuple<>,
+                                                                     NDHWGK,
+                                                                     ConvFwd1x1S1P0,
+                                                                     CombConvScale>{});
+}
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/CMakeLists.txt
 # ONLY XDL_KERNELS
 set(GROUPED_CONV3D_FWD_CONVSCALE_RELU
-   xdl/device_grouped_conv3d_fwd_xdl_convscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp)
+   xdl/device_grouped_conv3d_fwd_xdl_convscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp
+   xdl/device_grouped_conv3d_fwd_xdl_combconvscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp)
 add_instance_library(device_grouped_conv3d_fwd_convscale_relu_instance ${GROUPED_CONV3D_FWD_CONVSCALE_RELU})
--- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/xdl/device_grouped_conv3d_fwd_xdl_combconvscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/xdl/device_grouped_conv3d_fwd_xdl_combconvscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
+#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_outelementop_instance.hpp"
+#include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_convscale_relu.hpp"
+#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+void add_device_grouped_conv3d_fwd_xdl_combconvscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instances(
+    std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
+                                                                NDHWGC,
+                                                                GKZYXC,
+                                                                ck::Tuple<>,
+                                                                NDHWGK,
+                                                                F8,
+                                                                F8,
+                                                                ck::Tuple<>,
+                                                                F32,
+                                                                PassThrough,
+                                                                PassThrough,
+                                                                CombConvScaleRelu,
+                                                                F8,
+                                                                F8>>>& instances)
+{
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_outelementop_f8_f8_f32_instances<3,
+                                                                     NDHWGC,
+                                                                     GKZYXC,
+                                                                     ck::Tuple<>,
+                                                                     NDHWGK,
+                                                                     ConvFwdDefault,
+                                                                     CombConvScaleRelu>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_outelementop_f8_f8_f32_instances<3,
+                                                                     NDHWGC,
+                                                                     GKZYXC,
+                                                                     ck::Tuple<>,
+                                                                     NDHWGK,
+                                                                     ConvFwd1x1P0,
+                                                                     CombConvScaleRelu>{});
+    add_device_operation_instances(
+        instances,
+        device_grouped_conv_fwd_xdl_outelementop_f8_f8_f32_instances<3,
+                                                                     NDHWGC,
+                                                                     GKZYXC,
+                                                                     ck::Tuple<>,
+                                                                     NDHWGK,
+                                                                     ConvFwd1x1S1P0,
+                                                                     CombConvScaleRelu>{});
+}
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/xdl/device_grouped_conv3d_fwd_xdl_convscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/xdl/device_grouped_conv3d_fwd_xdl_convscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp
@@ -3,16 +3,13 @@
 #include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_outelementop_instance.hpp"
 #include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
-#include "ck/tensor_operation/gpu/element/combined_element_wise_operation.hpp"
+#include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_convscale_relu.hpp"
-#include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp"
 namespace ck {
 namespace tensor_operation {
 namespace device {
 namespace instance {
-using ConvScaleRelu = ck::tensor_operation::element_wise::ConvScaleRelu;
 void add_device_grouped_conv3d_fwd_xdl_convscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_instances(
    std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
                                                                NDHWGC,
@@ -57,55 +54,6 @@ void add_device_grouped_conv3d_fwd_xdl_convscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_in
                                                              ConvFwd1x1S1P0,
                                                              ConvScaleRelu>{});
 }
-namespace ew            = ck::tensor_operation::element_wise;
-using CombConvScaleRelu = ew::UnaryCombinedOp<ew::Scale, ew::Scale, ew::Relu>;
-void add_device_grouped_conv3d_fwd_xdl_combconvscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instances(
-    std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
-                                                                NDHWGC,
-                                                                GKZYXC,
-                                                                ck::Tuple<>,
-                                                                NDHWGK,
-                                                                F8,
-                                                                F8,
-                                                                ck::Tuple<>,
-                                                                F32,
-                                                                PassThrough,
-                                                                PassThrough,
-                                                                CombConvScaleRelu,
-                                                                F8,
-                                                                F8>>>& instances)
-{
-    add_device_operation_instances(
-        instances,
-        device_grouped_conv_fwd_xdl_outelementop_f8_f8_f32_instances<3,
-                                                                     NDHWGC,
-                                                                     GKZYXC,
-                                                                     ck::Tuple<>,
-                                                                     NDHWGK,
-                                                                     ConvFwdDefault,
-                                                                     CombConvScaleRelu>{});
-    add_device_operation_instances(
-        instances,
-        device_grouped_conv_fwd_xdl_outelementop_f8_f8_f32_instances<3,
-                                                                     NDHWGC,
-                                                                     GKZYXC,
-                                                                     ck::Tuple<>,
-                                                                     NDHWGK,
-                                                                     ConvFwd1x1P0,
-                                                                     CombConvScaleRelu>{});
-    add_device_operation_instances(
-        instances,
-        device_grouped_conv_fwd_xdl_outelementop_f8_f8_f32_instances<3,
-                                                                     NDHWGC,
-                                                                     GKZYXC,
-                                                                     ck::Tuple<>,
-                                                                     NDHWGK,
-                                                                     ConvFwd1x1S1P0,
-                                                                     CombConvScaleRelu>{});
-}
 } // namespace instance
 } // namespace device
 } // namespace tensor_operation

--- a/profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp
+++ b/profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp
@@ -250,18 +250,14 @@ bool profile_grouped_conv_bwd_weight_impl(int do_verification,
                    {
                        LogRangeAsType<float>(std::cout << "output : ", output.mData, ",")
                            << std::endl;
-                        ;
                        LogRangeAsType<float>(
                            std::cout << "weight (device): ", weight_device_result.mData, ",")
                            << std::endl;
-                        ;
                        LogRangeAsType<float>(
                            std::cout << "weight (host): ", weight_host_result.mData, ",")
                            << std::endl;
-                        ;
                        LogRangeAsType<float>(std::cout << "input: ", input.mData, ",")
                            << std::endl;
-                        ;
                    }
                }
            }

--- a/script/convert_miopen_driver_to_profiler.py
+++ b/script/convert_miopen_driver_to_profiler.py
@@ -26,7 +26,7 @@ def run_ck_profiler_cmd(cmd):
 def parse_data_type(args):
    if args.data_type == "fp32":
        if args.ck_profier_op == "grouped_conv_bwd_weight" or \
-           args.ck_profier_op == "grouped_conv_bwd_weight" or \
+           args.ck_profier_op == "grouped_conv_bwd_data" or \
           args.ck_profier_op == "grouped_conv_fwd":
            args.data_type = 0
    if args.data_type == "fp16":

--- a/test/data_type/CMakeLists.txt
+++ b/test/data_type/CMakeLists.txt
+if (GPU_TARGETS)
+    if (GPU_TARGETS MATCHES "gfx10" OR GPU_TARGETS MATCHES "gfx11" OR GPU_TARGETS MATCHES "gfx12")
+        add_definitions(-DCK_SKIP_FLAKY_F8_TEST)
+        set(CK_SKIP_FLAKY_F8_TEST "ON")
+    endif()
+else()
+    add_definitions(-DCK_SKIP_FLAKY_F8_TEST)
+    set(CK_SKIP_FLAKY_F8_TEST "ON")
+endif()
 if (USE_BITINT_EXTENSION_INT4)
  add_gtest_executable(test_int4 test_int4.cpp)
  if(result EQUAL 0)

--- a/test/data_type/test_bf8.cpp
+++ b/test/data_type/test_bf8.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
 #include "gtest/gtest.h"
 #include "ck/utility/data_type.hpp"
 #include "ck/utility/type_convert.hpp"
 using ck::bf8_t;
+using ck::f8_convert_rne;
 using ck::f8_convert_sr;
 using ck::half_t;
 using ck::type_convert;
@@ -24,33 +25,36 @@ TEST(BF8, ConvertFP32Nearest)
    // fix the tolerance value
    float abs_tol = 1e-6;
    // convert 0 float to bf8 and back, check if holds
-    ASSERT_NEAR(0.0f, type_convert<float>(type_convert<bf8_t>(0.0f)), abs_tol);
+    ASSERT_NEAR(0.0f, type_convert<float>(f8_convert_rne<bf8_t>(0.0f)), abs_tol);
+    // don't run the next test on gfx11 devices
+#ifndef CK_SKIP_FLAKY_F8_TEST
    // convert minimal float to bf8 and back, check if holds
    ASSERT_NEAR(std::numeric_limits<float>::min(),
-                type_convert<float>(type_convert<bf8_t>(std::numeric_limits<float>::min())),
+                type_convert<float>(f8_convert_rne<bf8_t>(std::numeric_limits<float>::min())),
                abs_tol);
+#endif
    // convert maximal bf8_t to float and check if equal to 57344.0
-    ASSERT_NEAR(57344.0f, type_convert<float>(type_convert<bf8_t>(57344.0f)), abs_tol);
+    ASSERT_NEAR(57344.0f, type_convert<float>(f8_convert_rne<bf8_t>(57344.0f)), abs_tol);
    // convert maximal float to bf8 and back, check if clipped to 57344.0
    ASSERT_NEAR(57344.0f,
-                type_convert<float>(type_convert<bf8_t>(std::numeric_limits<float>::max())),
+                type_convert<float>(f8_convert_rne<bf8_t>(std::numeric_limits<float>::max())),
                abs_tol);
    // convert inf float to bf8_t and check if it is qNan
    ASSERT_NEAR(type_convert<bf8_t>(0x80),
-                type_convert<bf8_t>(std::numeric_limits<float>::infinity()),
+                f8_convert_rne<bf8_t>(std::numeric_limits<float>::infinity()),
                abs_tol);
    // positive norm float value to bf8 and back, check if holds
    float pos_float = 0.0000762939f;
-    ASSERT_NEAR(pos_float, type_convert<float>(type_convert<bf8_t>(pos_float)), abs_tol);
+    ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<bf8_t>(pos_float)), abs_tol);
    // negative norm float value to bf8 and back, check if holds
    float neg_float = -0.0000610351f;
-    ASSERT_NEAR(neg_float, type_convert<float>(type_convert<bf8_t>(neg_float)), abs_tol);
+    ASSERT_NEAR(neg_float, type_convert<float>(f8_convert_rne<bf8_t>(neg_float)), abs_tol);
    // positive subnorm float value to bf8 and back, check if holds
    pos_float = 0.0000305175f;
-    ASSERT_NEAR(pos_float, type_convert<float>(type_convert<bf8_t>(pos_float)), abs_tol);
+    ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<bf8_t>(pos_float)), abs_tol);
    // negative subnorm float value to bf8 and back, check if holds
    neg_float = -0.0000152587f;
-    ASSERT_NEAR(neg_float, type_convert<float>(type_convert<bf8_t>(neg_float)), abs_tol);
+    ASSERT_NEAR(neg_float, type_convert<float>(f8_convert_rne<bf8_t>(neg_float)), abs_tol);
 }
 TEST(BF8, ConvertFP32Stochastic)
@@ -92,34 +96,34 @@ TEST(BF8, ConvertFP16Nearest)
    // fix the tolerance value
    float abs_tol = 1e-3;
    // convert 0 fp16 to bf8 and back, check if holds
-    ASSERT_NEAR(half_t{0.0}, type_convert<half_t>(type_convert<bf8_t>(half_t{0.0})), abs_tol);
+    ASSERT_NEAR(half_t{0.0}, type_convert<half_t>(f8_convert_rne<bf8_t>(half_t{0.0})), abs_tol);
    // convert minimal fp16 to bf8 and back, check if holds
    ASSERT_NEAR(ck::NumericLimits<half_t>::Min(),
-                type_convert<half_t>(type_convert<bf8_t>(ck::NumericLimits<half_t>::Min())),
+                type_convert<half_t>(f8_convert_rne<bf8_t>(ck::NumericLimits<half_t>::Min())),
                abs_tol);
    // convert maximal bf8_t to fp16 and check if equal to 57344.0
    ASSERT_NEAR(
-        half_t{57344.0}, type_convert<half_t>(type_convert<bf8_t>(half_t{57344.0})), abs_tol);
+        half_t{57344.0}, type_convert<half_t>(f8_convert_rne<bf8_t>(half_t{57344.0})), abs_tol);
    // convert maximal fp16 to bf8 and back, check if clipped to 57344.0
    ASSERT_NEAR(half_t{57344.0},
-                type_convert<half_t>(type_convert<bf8_t>(ck::NumericLimits<half_t>::Max())),
+                type_convert<half_t>(f8_convert_rne<bf8_t>(ck::NumericLimits<half_t>::Max())),
                abs_tol);
    // convert QuietNaN fp16 to bf8_t and check if it is QuietNaN
    ASSERT_NEAR(type_convert<bf8_t>(0x80),
-                type_convert<bf8_t>(ck::NumericLimits<half_t>::QuietNaN()),
+                f8_convert_rne<bf8_t>(ck::NumericLimits<half_t>::QuietNaN()),
                abs_tol);
    // positive norm fp16 value to bf8 and back, check if holds
    half_t pos_half = half_t{0.0000762939};
-    ASSERT_NEAR(pos_half, type_convert<half_t>(type_convert<bf8_t>(pos_half)), abs_tol);
+    ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_rne<bf8_t>(pos_half)), abs_tol);
    // negative norm fp16 value to bf8 and back, check if holds
    half_t neg_half = half_t{-0.0000610351};
-    ASSERT_NEAR(neg_half, type_convert<half_t>(type_convert<bf8_t>(neg_half)), abs_tol);
+    ASSERT_NEAR(neg_half, type_convert<half_t>(f8_convert_rne<bf8_t>(neg_half)), abs_tol);
    // positive subnorm fp16 value to bf8 and back, check if holds
    pos_half = half_t{0.0000305175};
-    ASSERT_NEAR(pos_half, type_convert<half_t>(type_convert<bf8_t>(pos_half)), abs_tol);
+    ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_rne<bf8_t>(pos_half)), abs_tol);
    // negative subnorm fp16 value to bf8 and back, check if holds
    neg_half = half_t{-0.0000152587};
-    ASSERT_NEAR(neg_half, type_convert<half_t>(type_convert<bf8_t>(neg_half)), abs_tol);
+    ASSERT_NEAR(neg_half, type_convert<half_t>(f8_convert_rne<bf8_t>(neg_half)), abs_tol);
 }
 TEST(BF8, ConvertFP16Stochastic)

--- a/test/data_type/test_fp8.cpp
+++ b/test/data_type/test_fp8.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
 #include "gtest/gtest.h"
 #include "ck/utility/data_type.hpp"
 #include "ck/utility/type_convert.hpp"
+using ck::f8_convert_rne;
 using ck::f8_convert_sr;
 using ck::f8_t;
 using ck::half_t;
@@ -24,33 +25,36 @@ TEST(FP8, ConvertFP32Nearest)
    // fix the tolerance value
    float abs_tol = 1e-6;
    // convert 0 float to fp8 and back, check if holds
-    ASSERT_NEAR(0.0f, type_convert<float>(type_convert<f8_t>(0.0f)), abs_tol);
+    ASSERT_NEAR(0.0f, type_convert<float>(f8_convert_rne<f8_t>(0.0f)), abs_tol);
+    // don't run the next test on gfx11 devices
+#ifndef CK_SKIP_FLAKY_F8_TEST
    // convert minimal float to fp8 and back, check if holds
    ASSERT_NEAR(std::numeric_limits<float>::min(),
-                type_convert<float>(type_convert<f8_t>(std::numeric_limits<float>::min())),
+                type_convert<float>(f8_convert_rne<f8_t>(std::numeric_limits<float>::min())),
                abs_tol);
+#endif
    // convert maximal f8_t to float and check if equal to 240.0
-    ASSERT_NEAR(240.0f, type_convert<float>(type_convert<f8_t>(240.0f)), abs_tol);
+    ASSERT_NEAR(240.0f, type_convert<float>(f8_convert_rne<f8_t>(240.0f)), abs_tol);
    // convert maximal float to fp8 and back, check if clipped to 240.0
    ASSERT_NEAR(240.0f,
-                type_convert<float>(type_convert<f8_t>(std::numeric_limits<float>::max())),
+                type_convert<float>(f8_convert_rne<f8_t>(std::numeric_limits<float>::max())),
                abs_tol);
    // convert inf float to f8_t and check if it is qNan
    ASSERT_NEAR(type_convert<f8_t>(0x80),
-                type_convert<f8_t>(std::numeric_limits<float>::infinity()),
+                f8_convert_rne<f8_t>(std::numeric_limits<float>::infinity()),
                abs_tol);
    // positive norm float value to fp8 and back, check if holds
    float pos_float = 0.017578125f;
-    ASSERT_NEAR(pos_float, type_convert<float>(type_convert<f8_t>(pos_float)), abs_tol);
+    ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<f8_t>(pos_float)), abs_tol);
    // negative norm float value to fp8 and back, check if holds
    float neg_float = -0.015625f;
-    ASSERT_NEAR(neg_float, type_convert<float>(type_convert<f8_t>(neg_float)), abs_tol);
+    ASSERT_NEAR(neg_float, type_convert<float>(f8_convert_rne<f8_t>(neg_float)), abs_tol);
    // positive subnorm float value to fp8 and back, check if holds
    pos_float = 0.00390625f;
-    ASSERT_NEAR(pos_float, type_convert<float>(type_convert<f8_t>(pos_float)), abs_tol);
+    ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<f8_t>(pos_float)), abs_tol);
    // negative subnorm float value to fp8 and back, check if holds
    neg_float = -0.001953125f;
-    ASSERT_NEAR(neg_float, type_convert<float>(type_convert<f8_t>(neg_float)), abs_tol);
+    ASSERT_NEAR(neg_float, type_convert<float>(f8_convert_rne<f8_t>(neg_float)), abs_tol);
 }
 TEST(FP8, ConvertFP32Stochastic)
@@ -92,33 +96,33 @@ TEST(FP8, ConvertFP16Nearest)
    // fix the tolerance value
    float abs_tol = 1e-3;
    // convert 0 fp16 to fp8 and back, check if holds
-    ASSERT_NEAR(half_t{0.0}, type_convert<half_t>(type_convert<f8_t>(half_t{0.0})), abs_tol);
+    ASSERT_NEAR(half_t{0.0}, type_convert<half_t>(f8_convert_rne<f8_t>(half_t{0.0})), abs_tol);
    // convert minimal fp16 to fp8 and back, check if holds
    ASSERT_NEAR(ck::NumericLimits<half_t>::Min(),
-                type_convert<half_t>(type_convert<f8_t>(ck::NumericLimits<half_t>::Min())),
+                type_convert<half_t>(f8_convert_rne<f8_t>(ck::NumericLimits<half_t>::Min())),
                abs_tol);
    // convert maximal f8_t to fp16 and check if equal to 240.0
-    ASSERT_NEAR(half_t{240.0}, type_convert<half_t>(type_convert<f8_t>(half_t{240.0})), abs_tol);
+    ASSERT_NEAR(half_t{240.0}, type_convert<half_t>(f8_convert_rne<f8_t>(half_t{240.0})), abs_tol);
    // convert maximal fp16 to fp8 and back, check if clipped to 240.0
    ASSERT_NEAR(half_t{240.0},
-                type_convert<half_t>(type_convert<f8_t>(ck::NumericLimits<half_t>::Max())),
+                type_convert<half_t>(f8_convert_rne<f8_t>(ck::NumericLimits<half_t>::Max())),
                abs_tol);
    // convert QuietNaN fp16 to f8_t and check if it is QuietNaN
    ASSERT_NEAR(type_convert<f8_t>(0x80),
-                type_convert<f8_t>(ck::NumericLimits<half_t>::QuietNaN()),
+                f8_convert_rne<f8_t>(ck::NumericLimits<half_t>::QuietNaN()),
                abs_tol);
    // positive norm fp16 value to fp8 and back, check if holds
    half_t pos_half = half_t{0.017578125};
-    ASSERT_NEAR(pos_half, type_convert<half_t>(type_convert<f8_t>(pos_half)), abs_tol);
+    ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_rne<f8_t>(pos_half)), abs_tol);
    // negative norm fp16 value to fp8 and back, check if holds
    half_t neg_half = half_t{-0.015625};
-    ASSERT_NEAR(neg_half, type_convert<half_t>(type_convert<f8_t>(neg_half)), abs_tol);
+    ASSERT_NEAR(neg_half, type_convert<half_t>(f8_convert_rne<f8_t>(neg_half)), abs_tol);
    // positive subnorm fp16 value to fp8 and back, check if holds
    pos_half = half_t{0.00390625};
-    ASSERT_NEAR(pos_half, type_convert<half_t>(type_convert<f8_t>(pos_half)), abs_tol);
+    ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_rne<f8_t>(pos_half)), abs_tol);
    // negative subnorm fp16 value to fp8 and back, check if holds
    neg_half = half_t{-0.001953125};
-    ASSERT_NEAR(neg_half, type_convert<half_t>(type_convert<f8_t>(neg_half)), abs_tol);
+    ASSERT_NEAR(neg_half, type_convert<half_t>(f8_convert_rne<f8_t>(neg_half)), abs_tol);
 }
 TEST(FP8, ConvertFP16Stochastic)