Merge branch 'develop' into concat2

3ae5f9ed · Chris Austen · GitHub · 6d5a34d2 · 785ff7d7 · 3ae5f9ed
Unverified Commit 3ae5f9ed authored Dec 01, 2023 by Chris Austen Committed by GitHub Dec 01, 2023
20 changed files
--- a/src/targets/gpu/jit/scatternd.cpp
+++ b/src/targets/gpu/jit/scatternd.cpp
@@ -21,11 +21,7 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include <migraphx/gpu/compiler.hpp>
+#include "scatter.hpp"
-#include <migraphx/make_op.hpp>
-#include <migraphx/gpu/context.hpp>
-#include <migraphx/gpu/compile_hip_code_object.hpp>
-#include <migraphx/gpu/compile_hip.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -55,46 +51,21 @@ MIGRAPHX_GLOBAL void scatternd_kernel(void* in_indices, void* in_updates, void*
 )__migraphx__";
-struct scatternd_compiler : compiler<scatternd_compiler>
+struct scatternd_compiler : scatter_compiler<scatternd_compiler>
 {
    std::vector<std::string> names() const
    {
-        return {"scatternd_none", "scatternd_add", "scatternd_mul"};
+        return {
+            "scatternd_none", "scatternd_add", "scatternd_mul", "scatternd_min", "scatternd_max"};
    }
-    operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
+    std::string make_interpolated_string(const operation& op) const
    {
-        hip_compile_options options;
+        const auto reduction = op.name().substr(std::char_traits<char>::length("scatternd_"));
-        options.set_launch_params(v, compute_global_for(ctx, inputs.at(1).elements()));
+        return interpolate_string(scatternd_kernel, {{"reduction", "assign_" + reduction}});
-        options.inputs         = inputs;
-        options.output         = inputs.back();
-        options.kernel_name    = "scatternd_kernel";
-        options.virtual_inputs = inputs;
-        auto reduction         = "assign_" + v.get("reduction", std::string{"none"});
-        auto src               = interpolate_string(scatternd_kernel, {{"reduction", reduction}});
-        return compile_hip_code_object(src, options);
    }
-    compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
+    std::string get_kernel_name(const operation&) const { return "scatternd_kernel"; }
-    {
-        assert(starts_with(op.name(), "scatternd_"));
-        auto reduction = op.name().substr(10);
-        return insert(compile_op(
-            ctx,
-            to_shapes(std::vector<instruction_ref>{ins->inputs().begin() + 1, ins->inputs().end()}),
-            {{"reduction", reduction}}));
-    }
-    compiler_replace insert(const operation& co) const
-    {
-        return {co, [](module& m, instruction_ref ins, const operation& op) {
-                    auto args = ins->inputs();
-                    args.back() =
-                        m.insert_instruction(ins, make_op("hip::copy"), args.front(), args.back());
-                    args.erase(args.begin());
-                    return m.replace_instruction(ins, op, args);
-                }};
-    }
 };
 } // namespace gpu

--- a/src/targets/gpu/kernels/include/migraphx/kernels/gathernd.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/gathernd.hpp
@@ -53,35 +53,35 @@ __device__ void gathernd(const T& data_t, const U& indices_t, const V& output_t,
    auto indices_shape_lens = indices_shape.lens;
    auto data_shape_lens    = data_shape.lens;
    auto num_slice_dims     = indices_shape_lens.back();
-    std::size_t num_slices =
+    size_t num_slices =
        accumulate(indices_shape_lens.begin(), indices_shape_lens.end() - 1, 1, op::product{});
-    std::size_t slice_size = accumulate(data_shape_lens.begin() + num_slice_dims + batch_dims,
+    size_t slice_size = accumulate(data_shape_lens.begin() + num_slice_dims + batch_dims,
-                                        data_shape_lens.end(),
+                                   data_shape_lens.end(),
-                                        1,
+                                   1,
-                                        op::product{});
+                                   op::product{});
-    const std::size_t num_batches =
+    const size_t num_batches =
        accumulate(data_shape_lens.begin(), data_shape_lens.begin() + batch_dims, 1, op::product{});
-    const std::size_t data_batch_stride =
+    const size_t data_batch_stride =
        accumulate(data_shape_lens.begin() + batch_dims, data_shape_lens.end(), 1, op::product{});
    const auto num_slices_per_batch = num_slices / num_batches;
    ind.global_stride(output_shape.elements(), [&](auto i) {
        const auto* indices_ptr     = indices_t.data();
-        const std::size_t j         = i / slice_size;
+        const size_t j              = i / slice_size;
-        const std::size_t batch_idx = j / num_slices_per_batch;
+        const size_t batch_idx      = j / num_slices_per_batch;
        auto* slice_indices               = indices_ptr + (j * num_slice_dims);
-        std::size_t relative_slice_offset = 0;
+        size_t relative_slice_offset      = 0;
-        for(std::size_t idx = 0; idx < num_slice_dims; ++idx)
+        for(size_t idx = 0; idx < num_slice_dims; ++idx)
        {
            int64_t index                   = slice_indices[idx];
-            const std::size_t input_dim_idx = batch_dims + idx;
+            const size_t input_dim_idx      = batch_dims + idx;
            const auto input_dim            = data_shape_lens[input_dim_idx];
            MIGRAPHX_ASSERT(index >= -static_cast<int64_t>(input_dim) and
                            index < static_cast<int64_t>(input_dim));
            if(index < 0)
                index += input_dim;
-            std::size_t size_from_slice_dims =
+            size_t size_from_slice_dims =
                accumulate(data_shape_lens.begin() + batch_dims + idx + 1,
                           data_shape_lens.begin() + batch_dims + num_slice_dims,
                           slice_size,

--- a/src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
@@ -103,6 +103,7 @@ MIGRAPHX_DEVICE_MATH(floor, ::floor)
 MIGRAPHX_DEVICE_MATH(isnan, ::isnan)
 MIGRAPHX_DEVICE_MATH(isinf, ::isinf)
 MIGRAPHX_DEVICE_MATH(log, ::log)
+MIGRAPHX_DEVICE_MATH(nearbyint, ::nearbyint)
 MIGRAPHX_DEVICE_MATH(pow, ::pow)
 MIGRAPHX_DEVICE_MATH(remainder, ::remainder)
 MIGRAPHX_DEVICE_MATH(round, ::round)
@@ -152,6 +153,7 @@ MIGRAPHX_DEVICE_MATH_HALF(atan, ::atan)
 MIGRAPHX_DEVICE_MATH_HALF(atanh, ::atanh)
 MIGRAPHX_DEVICE_MATH_HALF(cosh, ::cosh)
 MIGRAPHX_DEVICE_MATH_HALF(erf, ::erf)
+MIGRAPHX_DEVICE_MATH_HALF(nearbyint, ::nearbyint)
 MIGRAPHX_DEVICE_MATH_HALF(pow, ::pow)
 MIGRAPHX_DEVICE_MATH_HALF(remainder, ::remainder)
 MIGRAPHX_DEVICE_MATH_HALF(round, ::round)
@@ -236,6 +238,7 @@ MIGRAPHX_DEVICE_MATH_VEC(isnan)
 MIGRAPHX_DEVICE_MATH_VEC(log)
 MIGRAPHX_DEVICE_MATH_VEC(max)
 MIGRAPHX_DEVICE_MATH_VEC(min)
+MIGRAPHX_DEVICE_MATH_VEC(nearbyint)
 MIGRAPHX_DEVICE_MATH_VEC(pow)
 MIGRAPHX_DEVICE_MATH_VEC(remainder)
 MIGRAPHX_DEVICE_MATH_VEC(round)

--- a/src/targets/gpu/kernels/include/migraphx/kernels/scatter_reduction_modes.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/scatter_reduction_modes.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_KERNELS_SCATTER_REDUCTION_MODES_HPP
+#define MIGRAPHX_GUARD_KERNELS_SCATTER_REDUCTION_MODES_HPP
+#include <migraphx/kernels/types.hpp>
+namespace migraphx {
+struct assign_none
+{
+    template <class T, class U>
+    MIGRAPHX_DEVICE_CONSTEXPR void operator()(T& x, U y) const
+    {
+        x = y;
+    }
+};
+struct assign_add
+{
+    template <class T, class U>
+    MIGRAPHX_DEVICE_CONSTEXPR void operator()(T& x, U y) const
+    {
+        atomicAdd(&x, y);
+    }
+};
+struct assign_mul
+{
+    template <class T, class U>
+    MIGRAPHX_DEVICE_CONSTEXPR void operator()(T& x, U y) const
+    {
+        T old = x;
+        T assumed;
+        do
+        {
+            assumed = old;
+            old     = atomicCAS(&x, assumed, assumed * y);
+        } while(assumed != old);
+    }
+};
+struct assign_max
+{
+    template <typename T, typename U>
+    MIGRAPHX_DEVICE_CONSTEXPR void operator()(T& x, U y) const
+    {
+        atomicMax(&x, y);
+    }
+};
+struct assign_min
+{
+    template <typename T, typename U>
+    MIGRAPHX_DEVICE_CONSTEXPR void operator()(T& x, U y) const
+    {
+        atomicMin(&x, y);
+    }
+};
+} // namespace migraphx
+#endif
--- a/src/targets/gpu/kernels/include/migraphx/kernels/scatternd.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/scatternd.hpp
@@ -26,36 +26,10 @@
 #include <migraphx/kernels/index.hpp>
 #include <migraphx/kernels/algorithm.hpp>
+#include <migraphx/kernels/scatter_reduction_modes.hpp>
 namespace migraphx {
-struct assign_none
-{
-    template <class T, class U>
-    MIGRAPHX_DEVICE_CONSTEXPR void operator()(T& x, U y) const
-    {
-        x = y;
-    }
-};
-struct assign_add
-{
-    template <class T, class U>
-    MIGRAPHX_DEVICE_CONSTEXPR void operator()(T& x, U y) const
-    {
-        x += y;
-    }
-};
-struct assign_mul
-{
-    template <class T, class U>
-    MIGRAPHX_DEVICE_CONSTEXPR void operator()(T& x, U y) const
-    {
-        x *= y;
-    }
-};
 template <class T, class U, class V, class F>
 __device__ void scatternd(const T& indices_t, const U& updates_t, const V& output_t, F f)
 {

--- a/src/targets/gpu/mlir.cpp
+++ b/src/targets/gpu/mlir.cpp
@@ -73,6 +73,7 @@ namespace gpu {
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_MLIR);
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_TUNE_EXHAUSTIVE);
+MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_TUNE_LIMIT);
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_TUNING_DB);
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_TUNING_CFG);
@@ -796,7 +797,9 @@ struct mlir_program
        if(enabled(MIGRAPHX_MLIR_TUNE_EXHAUSTIVE{}))
            tuning_mode = RocmlirTuningParamSetKindExhaustive;
        mlir_tuning_space params{mlirRockTuningSpaceCreate(mmodule.get(), tuning_mode)};
-        for(auto i : range(mlirRockTuningGetNumParams(params.get())))
+        const auto limit =
+            value_of(MIGRAPHX_MLIR_TUNE_LIMIT{}, std::numeric_limits<std::size_t>::max());
+        for(auto i : range(std::min<std::size_t>(limit, mlirRockTuningGetNumParams(params.get()))))
        {
            mlir_tuning_param param{mlirRockTuningParamCreate()};
            if(not mlirRockTuningParamGet(params.get(), i, param.get()))
@@ -1032,6 +1035,15 @@ tuning_config get_tuning_config_mlir(const context& migraphx_ctx,
    mlir_program mp;
    mp.set_gpu_properties(migraphx_ctx);
    mp.parse(m);
+    const bool trace = enabled(MIGRAPHX_TRACE_MLIR{});
+    static std::mutex mutex;
+    if(trace)
+    {
+        const std::lock_guard<std::mutex> lock(mutex);
+        auto mod_op = mlirModuleGetOperation(mp.mmodule.get());
+        std::cout << mlir_print(&mlirOperationPrint, mod_op) << std::endl;
+    }
    return mp.get_tuning_config(exhaustive);
 }

--- a/src/targets/gpu/prefuse_ops.cpp
+++ b/src/targets/gpu/prefuse_ops.cpp
@@ -28,7 +28,10 @@
 #include <migraphx/register_op.hpp>
 #include <migraphx/pass_manager.hpp>
 #include <migraphx/dead_code_elimination.hpp>
+#ifdef MIGRAPHX_USE_COMPOSABLEKERNEL
 #include <migraphx/gpu/ck.hpp>
+#endif
+#include <migraphx/gpu/fuse_mlir.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -128,26 +131,49 @@ struct pre_gemm_softmax_gemm : gemm_softmax_gemm
 };
 MIGRAPHX_REGISTER_OP(pre_gemm_softmax_gemm);
-MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
+auto is_ck_gemm()
 {
-    if(ins->name() != "dot")
+    return match::make_basic_pred_matcher([=](instruction_ref ins) {
+#ifdef MIGRAPHX_USE_COMPOSABLEKERNEL
+        if(not enabled(MIGRAPHX_ENABLE_CK{}))
+            return false;
+        if(ins->name() != "dot")
+            return false;
+        if(not pre_gemm_softmax_gemm::is_ck_supported_type(ins->get_shape().type()))
+            return false;
+        return true;
+#else
+        (void)ins;
        return false;
-    if(not pre_gemm_softmax_gemm::is_ck_supported_type(ins->get_shape().type()))
+#endif
-        return false;
+    });
-    return true;
+}
+auto is_mlir_gemm()
+{
+    return match::make_basic_pred_matcher([=](instruction_ref ins) {
+        if(not mlir_attention_enabled())
+            return false;
+        if(ins->name() != "dot")
+            return false;
+        return std::all_of(ins->inputs().begin(), ins->inputs().end(), [&](auto i) {
+            return pre_gemm_softmax_gemm::is_mlir_supported_type(i->get_shape().type());
+        });
+    });
 }
 struct find_gemm_softmax_gemm
 {
    auto matcher() const
    {
-        auto gemm1 =
+        auto gemm1 = match::skip(match::name("contiguous"))(
-            match::skip(match::name("contiguous"))(match::name("dot")(is_ck_gemm().bind("gemm1")));
+            match::name("dot")(match::any_of(is_ck_gemm(), is_mlir_gemm()).bind("gemm1")));
        auto mul = match::name("mul")(
            match::nargs(2), match::either_arg(0, 1)(match::is_constant().bind("scale"), gemm1));
        auto softmax = match::name("softmax")(match::arg(0)(mul)).bind("softmax");
-        return match::name("dot")(is_ck_gemm().bind("gemm2"))(match::arg(0)(softmax));
+        return match::name("dot")(match::any_of(is_ck_gemm(), is_mlir_gemm()).bind("gemm2"))(
+            match::arg(0)(softmax));
    }
    void apply(module_pass_manager& mpm, const match::matcher_result& r) const
@@ -182,8 +208,7 @@ void prefuse_ops::apply(module_pass_manager& mpm) const
    match::find_matches(mpm.get_module(), find_layernorm{});
    mpm.run_pass(dead_code_elimination{});
    match::find_matches(mpm.get_module(), find_add_layernorm{});
-    if(enabled(MIGRAPHX_ENABLE_CK{}))
+    match::find_matches(mpm, find_gemm_softmax_gemm{});
-        match::find_matches(mpm, find_gemm_softmax_gemm{});
 }
 } // namespace gpu

--- a/src/targets/ref/CMakeLists.txt
+++ b/src/targets/ref/CMakeLists.txt
@@ -33,8 +33,9 @@ rocm_set_soversion(migraphx_ref ${MIGRAPHX_SO_VERSION})
 find_path(BLAZE_INCLUDE blaze/Blaze.h)
 rocm_clang_tidy_check(migraphx_ref)
+target_link_libraries(migraphx_ref PRIVATE Threads::Threads)
 target_link_libraries(migraphx_ref PUBLIC migraphx)
-target_include_directories(migraphx_ref PRIVATE ${BLAZE_INCLUDE})
+target_include_directories(migraphx_ref SYSTEM PRIVATE ${BLAZE_INCLUDE})
 target_compile_definitions(migraphx_ref PRIVATE -DBLAZE_USE_CPP_THREADS)
 migraphx_generate_export_header(migraphx_ref)

--- a/src/tf/CMakeLists.txt
+++ b/src/tf/CMakeLists.txt
@@ -38,7 +38,11 @@ protobuf_generate_cpp(
 )
 add_library(tf-proto STATIC ${PROTO_SRCS})
 target_include_directories(tf-proto SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${PROTOBUF_INCLUDE_DIR})
-target_compile_options(tf-proto PRIVATE -w)
+if(MSVC)
+    target_compile_options(tf-proto PRIVATE /w)
+else()
+    target_compile_options(tf-proto PRIVATE -w)
+endif()
 target_link_libraries(tf-proto PRIVATE ${PROTOBUF_LIBRARY})
 set_target_properties(tf-proto PROPERTIES POSITION_INDEPENDENT_CODE On)
@@ -49,7 +53,10 @@ target_include_directories(migraphx_tf PRIVATE include)
 set_target_properties(migraphx_tf PROPERTIES EXPORT_NAME tf)
 rocm_set_soversion(migraphx_tf ${MIGRAPHX_SO_VERSION})
 rocm_clang_tidy_check(migraphx_tf)
-target_link_libraries(migraphx_tf PRIVATE tf-proto "-Wl,--exclude-libs,ALL")
+target_link_libraries(migraphx_tf PRIVATE tf-proto)
+if(NOT WIN32)
+    target_link_libraries(migraphx_tf PRIVATE "-Wl,--exclude-libs,ALL")
+endif()
 target_link_libraries(migraphx_tf PUBLIC migraphx)
 rocm_install_targets(

--- a/src/tmp_dir.cpp
+++ b/src/tmp_dir.cpp
@@ -31,8 +31,18 @@
 #include <sstream>
 #include <iostream>
 #include <string>
-#include <sys/types.h>
+#ifdef _WIN32
+// cppcheck-suppress definePrefix
+#define WIN32_LEAN_AND_MEAN
+#include <Windows.h>
+#undef getpid
+// cppcheck-suppress [definePrefix, defineUpperCase]
+#define getpid _getpid
+#else
 #include <unistd.h>
+#include <sys/types.h>
+#endif
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

--- a/src/verify_args.cpp
+++ b/src/verify_args.cpp
@@ -88,7 +88,6 @@ bool verify_args(const std::string& name,
            if(target_nan_idx >= 0)
                std::cout << "Non finite number found in target at " << target_nan_idx << ": "
                          << target[target_nan_idx] << std::endl;
-            std::cout << "MIGraphX verification passed successfully." << std::endl;
        }
    });
    return passed;

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -150,6 +150,7 @@ function(test_headers PREFIX)
        list(REMOVE_ITEM HEADERS
              ${CMAKE_SOURCE_DIR}/src/targets/gpu/include/migraphx/gpu/ck.hpp)
    endif()
+    list(REMOVE_ITEM HEADERS ${CMAKE_SOURCE_DIR}/src/include/migraphx/float8_impl.hpp)
    foreach(HEADER ${HEADERS})
        file(RELATIVE_PATH HEADER_REL ${CMAKE_SOURCE_DIR} ${HEADER})
        string(MAKE_C_IDENTIFIER ${HEADER_REL} TEST_NAME)

--- a/test/float_equal.cpp
+++ b/test/float_equal.cpp
@@ -22,6 +22,7 @@
 * THE SOFTWARE.
 */
 #include <migraphx/float_equal.hpp>
+#include <migraphx/float8.hpp>
 #include <migraphx/half.hpp>
 #include "test.hpp"
@@ -53,7 +54,7 @@ auto test_float_equal(T x, U y)
 template <class T, class U>
 void test_equality()
 {
-    auto x1 = T(0.1);
+    auto x1 = T(0.125);
    auto x2 = U(0.0);
    auto x3 = U(1.0);
    EXPECT(test_float_equal(x1, x1));
@@ -71,8 +72,12 @@ void test_equality()
 TEST_CASE_REGISTER(test_equality<double, float>);
 TEST_CASE_REGISTER(test_equality<double, int>);
 TEST_CASE_REGISTER(test_equality<double, migraphx::half>);
+TEST_CASE_REGISTER(test_equality<double, migraphx::fp8::fp8e4m3fnuz>);
 TEST_CASE_REGISTER(test_equality<float, int>);
+TEST_CASE_REGISTER(test_equality<float, migraphx::fp8::fp8e4m3fnuz>);
 TEST_CASE_REGISTER(test_equality<migraphx::half, int>);
+TEST_CASE_REGISTER(test_equality<migraphx::half, migraphx::fp8::fp8e4m3fnuz>);
+TEST_CASE_REGISTER(test_equality<migraphx::fp8::fp8e4m3fnuz, int>);
 template <class T, class U>
 void test_limits()
@@ -110,8 +115,13 @@ void test_limits()
 TEST_CASE_REGISTER(test_limits<double, float>);
 TEST_CASE_REGISTER(test_limits<double, int>);
 TEST_CASE_REGISTER(test_limits<double, migraphx::half>);
+TEST_CASE_REGISTER(test_limits<double, migraphx::fp8::fp8e4m3fnuz>);
 TEST_CASE_REGISTER(test_limits<float, int>);
+TEST_CASE_REGISTER(test_limits<float, migraphx::fp8::fp8e4m3fnuz>);
 TEST_CASE_REGISTER(test_limits<int, migraphx::half>);
+TEST_CASE_REGISTER(test_limits<int, migraphx::fp8::fp8e4m3fnuz>);
+TEST_CASE_REGISTER(test_limits<migraphx::fp8::fp8e4m3fnuz, migraphx::half>);
 #ifndef _WIN32
 // On Windows, types int and long have the same min and max values.
 TEST_CASE_REGISTER(test_limits<long, int>);

--- a/test/fp8e4m3fn.cpp
+++ b/test/fp8e4m3fn.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <cmath>
+#include <migraphx/float_equal.hpp>
+#include <migraphx/float8.hpp>
+#include <migraphx/half.hpp>
+#include <migraphx/ranges.hpp>
+#include "test.hpp"
+#include <limits>
+float fp8e4m3fn_to_fp32_value(uint8_t input)
+{
+    constexpr std::array<float, 256> e4m3fnuz_lut = {
+        0.0,        0.001953125,  0.00390625,  0.005859375,
+        0.0078125,  0.009765625,  0.01171875,  0.013671875,
+        0.015625,   0.017578125,  0.01953125,  0.021484375,
+        0.0234375,  0.025390625,  0.02734375,  0.029296875,
+        0.03125,    0.03515625,   0.0390625,   0.04296875,
+        0.046875,   0.05078125,   0.0546875,   0.05859375,
+        0.0625,     0.0703125,    0.078125,    0.0859375,
+        0.09375,    0.1015625,    0.109375,    0.1171875,
+        0.125,      0.140625,     0.15625,     0.171875,
+        0.1875,     0.203125,     0.21875,     0.234375,
+        0.25,       0.28125,      0.3125,      0.34375,
+        0.375,      0.40625,      0.4375,      0.46875,
+        0.5,        0.5625,       0.625,       0.6875,
+        0.75,       0.8125,       0.875,       0.9375,
+        1.0,        1.125,        1.25,        1.375,
+        1.5,        1.625,        1.75,        1.875,
+        2.0,        2.25,         2.5,         2.75,
+        3.0,        3.25,         3.5,         3.75,
+        4.0,        4.5,          5.0,         5.5,
+        6.0,        6.5,          7.0,         7.5,
+        8.0,        9.0,          10.0,        11.0,
+        12.0,       13.0,         14.0,        15.0,
+        16.0,       18.0,         20.0,        22.0,
+        24.0,       26.0,         28.0,        30.0,
+        32.0,       36.0,         40.0,        44.0,
+        48.0,       52.0,         56.0,        60.0,
+        64.0,       72.0,         80.0,        88.0,
+        96.0,       104.0,        112.0,       120.0,
+        128.0,      144.0,        160.0,       176.0,
+        192.0,      208.0,        224.0,       240.0,
+        256.0,      288.0,        320.0,       352.0,
+        384.0,      416.0,        448.0,       std::numeric_limits<float>::quiet_NaN(),
+        -0.0,       -0.001953125, -0.00390625, -0.005859375,
+        -0.0078125, -0.009765625, -0.01171875, -0.013671875,
+        -0.015625,  -0.017578125, -0.01953125, -0.021484375,
+        -0.0234375, -0.025390625, -0.02734375, -0.029296875,
+        -0.03125,   -0.03515625,  -0.0390625,  -0.04296875,
+        -0.046875,  -0.05078125,  -0.0546875,  -0.05859375,
+        -0.0625,    -0.0703125,   -0.078125,   -0.0859375,
+        -0.09375,   -0.1015625,   -0.109375,   -0.1171875,
+        -0.125,     -0.140625,    -0.15625,    -0.171875,
+        -0.1875,    -0.203125,    -0.21875,    -0.234375,
+        -0.25,      -0.28125,     -0.3125,     -0.34375,
+        -0.375,     -0.40625,     -0.4375,     -0.46875,
+        -0.5,       -0.5625,      -0.625,      -0.6875,
+        -0.75,      -0.8125,      -0.875,      -0.9375,
+        -1.0,       -1.125,       -1.25,       -1.375,
+        -1.5,       -1.625,       -1.75,       -1.875,
+        -2.0,       -2.25,        -2.5,        -2.75,
+        -3.0,       -3.25,        -3.5,        -3.75,
+        -4.0,       -4.5,         -5.0,        -5.5,
+        -6.0,       -6.5,         -7.0,        -7.5,
+        -8.0,       -9.0,         -10.0,       -11.0,
+        -12.0,      -13.0,        -14.0,       -15.0,
+        -16.0,      -18.0,        -20.0,       -22.0,
+        -24.0,      -26.0,        -28.0,       -30.0,
+        -32.0,      -36.0,        -40.0,       -44.0,
+        -48.0,      -52.0,        -56.0,       -60.0,
+        -64.0,      -72.0,        -80.0,       -88.0,
+        -96.0,      -104.0,       -112.0,      -120.0,
+        -128.0,     -144.0,       -160.0,      -176.0,
+        -192.0,     -208.0,       -224.0,      -240.0,
+        -256.0,     -288.0,       -320.0,      -352.0,
+        -384.0,     -416.0,       -448.0,      std::numeric_limits<float>::quiet_NaN(),
+    };
+    return e4m3fnuz_lut[input];
+}
+TEST_CASE(test_fp8_cast_to_float)
+{
+    std::vector<uint8_t> bit_vals(256);
+    std::iota(bit_vals.begin(), bit_vals.end(), 0);
+    EXPECT(bool{std::all_of(bit_vals.begin(), bit_vals.end(), [](uint8_t bit_val) {
+        migraphx::fp8::fp8e4m3fn fp8_val(bit_val, migraphx::fp8::fp8e4m3fn::from_bits());
+        if(std::isnan(float(fp8_val)) and std::isnan(fp8e4m3fn_to_fp32_value(bit_val)))
+        {
+            return true;
+        }
+        return migraphx::float_equal(float(fp8_val), fp8e4m3fn_to_fp32_value(bit_val));
+    })});
+}
+TEST_CASE(test_fp8_cast_from_float)
+{
+    std::unordered_map<float, uint8_t> test_vals = {
+        {{512, 0x7e},        {-512, 0xfe},        {448, 0x7e},        {-448, 0xfe},
+         {256, 0x78},        {-256, 0xf8},        {240, 0x77},        {-240, 0xf7},
+         {1e-07, 0x0},       {1e+07, 0x7e},       {1, 0x38},          {-1, 0xb8},
+         {0.1, 0x1d},        {0.11, 0x1e},        {0.111, 0x1e},      {0.1111, 0x1e},
+         {-0.1, 0x9d},       {-0.11, 0x9e},       {-0.111, 0x9e},     {-0.1111, 0x9e},
+         {0.2, 0x25},        {2, 0x40},           {20, 0x5a},         {200, 0x74},
+         {-0.2, 0xa5},       {-2, 0xc0},          {-20, 0xda},        {-200, 0xf4},
+         {0.5, 0x30},        {-0.5, 0xb0},        {1.17549e-38, 0x0}, {1.4013e-45, 0x0},
+         {0.0078125, 0x4},   {-0.0078125, 0x84},  {0.000976562, 0x0}, {-0.000976562, 0x80},
+         {0.000488281, 0x0}, {-0.000488281, 0x80}}};
+    EXPECT(bool{std::all_of(test_vals.begin(), test_vals.end(), [](const auto sample) {
+        return migraphx::float_equal(
+            migraphx::fp8::fp8e4m3fn(sample.first),
+            migraphx::fp8::fp8e4m3fn(sample.second, migraphx::fp8::fp8e4m3fn::from_bits()));
+    })});
+}
+TEST_CASE(test_positive_zero)
+{
+    float zero = 0.0;
+    migraphx::fp8::fp8e4m3fn fp8_zero(zero);
+    EXPECT(fp8_zero.is_zero());
+    EXPECT(migraphx::float_equal(zero, float(fp8_zero)));
+}
+TEST_CASE(test_negative_zero)
+{
+    float nzero = -0.0;
+    migraphx::fp8::fp8e4m3fn fp8_nzero(nzero);
+    EXPECT(fp8_nzero.is_zero());
+    //  negative zero is preserved for fp8e4m3fn
+    EXPECT(migraphx::float_equal(nzero, float(fp8_nzero)));
+}
+TEST_CASE(test_pos_zero_eq_neg_zero)
+{
+    float nzero = -0.0;
+    float pzero = 0.0;
+    migraphx::fp8::fp8e5m2 fp8_nzero(nzero);
+    migraphx::fp8::fp8e5m2 fp8_pzero(pzero);
+    EXPECT(fp8_nzero == fp8_pzero);
+}
+TEST_CASE(test_nan_1)
+{
+    float fnan = std::numeric_limits<float>::quiet_NaN();
+    migraphx::fp8::fp8e4m3fn fp8_nan(fnan);
+    EXPECT(fp8_nan.is_nan());
+    EXPECT(std::isnan(fp8_nan));
+}
+TEST_CASE(test_nan_2)
+{
+    auto fnan = std::numeric_limits<migraphx::fp8::fp8e4m3fn>::quiet_NaN();
+    migraphx::fp8::fp8e4m3fn fp8_nan(fnan.data, migraphx::fp8::fp8e4m3fn::from_bits());
+    EXPECT(fp8_nan.is_nan());
+    EXPECT(std::isnan(fp8_nan));
+    EXPECT(std::isnan(float(fp8_nan)));
+}
+TEST_CASE(test_infinity_1)
+{
+    float finf = std::numeric_limits<float>::infinity();
+    // no inf in fp8e4m3fn, it gets clipped to max()
+    migraphx::fp8::fp8e4m3fn fp8_max(finf);
+    EXPECT(fp8_max == std::numeric_limits<migraphx::fp8::fp8e4m3fn>::max());
+}
+TEST_CASE(test_infinity_2)
+{
+    // neg inf
+    float finf = -1.0 * std::numeric_limits<float>::infinity();
+    // no inf in fp8e4m3fn, it gets clipped to lowest
+    migraphx::fp8::fp8e4m3fn fp8_lowest(finf);
+    EXPECT(bool{fp8_lowest == std::numeric_limits<migraphx::fp8::fp8e4m3fn>::lowest()});
+}
+TEST_CASE(test_numeric_max_1)
+{
+    float fmax = std::numeric_limits<float>::max();
+    migraphx::fp8::fp8e4m3fn fp8_max(fmax);
+    EXPECT(fp8_max == std::numeric_limits<migraphx::fp8::fp8e4m3fn>::max());
+}
+TEST_CASE(test_numeric_max_2)
+{
+    // gets clipped to max
+    float fmax = 2 * std::numeric_limits<migraphx::fp8::fp8e4m3fn>::max();
+    migraphx::fp8::fp8e4m3fn fp8_max(fmax);
+    EXPECT(fp8_max == std::numeric_limits<migraphx::fp8::fp8e4m3fn>::max());
+}
+TEST_CASE(test_numeric_lowest_1)
+{
+    float flowest = std::numeric_limits<float>::lowest();
+    migraphx::fp8::fp8e4m3fn fp8_lowest(flowest);
+    EXPECT(fp8_lowest == std::numeric_limits<migraphx::fp8::fp8e4m3fn>::lowest());
+}
+TEST_CASE(test_numeric_lowest_2)
+{
+    // gets clipped to lowest
+    float fmin = 2.0 * std::numeric_limits<migraphx::fp8::fp8e4m3fn>::lowest();
+    migraphx::fp8::fp8e4m3fn fp8_lowest(fmin);
+    EXPECT(fp8_lowest == std::numeric_limits<migraphx::fp8::fp8e4m3fn>::lowest());
+}
+TEST_CASE(test_max_eq_lowest)
+{
+    EXPECT(migraphx::float_equal(std::numeric_limits<migraphx::fp8::fp8e4m3fn>::lowest(),
+                                 -1 * std::numeric_limits<migraphx::fp8::fp8e4m3fn>::max()));
+}
+TEST_CASE(test_isfinite)
+{
+    EXPECT(std::isfinite(migraphx::fp8::fp8e4m3fn(0.0)));
+    EXPECT(std::isfinite(migraphx::fp8::fp8e4m3fn(-0.0)));
+    EXPECT(not std::isfinite(
+        migraphx::fp8::fp8e4m3fn(std::numeric_limits<migraphx::fp8::fp8e4m3fn>::quiet_NaN())));
+}
+TEST_CASE(test_no_infinity)
+{
+    EXPECT(not bool{std::numeric_limits<migraphx::fp8::fp8e4m3fn>::has_infinity});
+}
+TEST_CASE(test_binary_ops)
+{
+    auto a = migraphx::fp8::fp8e4m3fn(-1.0);
+    auto b = migraphx::fp8::fp8e4m3fn(1.0);
+    auto c = migraphx::fp8::fp8e4m3fn(0.0);
+    auto d = migraphx::fp8::fp8e4m3fn(-0.0);
+    EXPECT(migraphx::float_equal((c + d), c));
+    EXPECT(migraphx::float_equal((c + d), d));
+    EXPECT(migraphx::float_equal((a + b), c));
+    EXPECT(migraphx::float_equal((a + b), d));
+    auto e = migraphx::fp8::fp8e4m3fn(10.0);
+    auto f = migraphx::fp8::fp8e4m3fn(-10.0);
+    EXPECT(bool{e > f});
+    EXPECT(bool{f < e});
+    EXPECT(bool{f <= e});
+    EXPECT(bool{e >= f});
+    EXPECT(bool{e <= e});
+    EXPECT(bool{f >= f});
+    EXPECT(not migraphx::float_equal(f, e));
+}
+TEST_CASE(test_fabs)
+{
+    auto a = migraphx::fp8::fp8e4m3fn(-1.0);
+    auto b = migraphx::fp8::fp8e4m3fn(1.0);
+    EXPECT(migraphx::float_equal(b, migraphx::fp8::fabs(a)));
+}
+TEST_CASE(test_stream_op)
+{
+    auto a = migraphx::fp8::fp8e4m3fn(-1.0);
+    std::stringstream ss;
+    ss << a;
+    EXPECT(std::string("-1") == ss.str());
+    ss     = std::stringstream();
+    auto b = std::numeric_limits<migraphx::fp8::fp8e4m3fn>::quiet_NaN();
+    ss << b;
+    EXPECT(std::string("nan") == ss.str());
+}
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/fp8e4m3fnuz.cpp
+++ b/test/fp8e4m3fnuz.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <cmath>
+#include <migraphx/float_equal.hpp>
+#include <migraphx/float8.hpp>
+#include <migraphx/half.hpp>
+#include <migraphx/ranges.hpp>
+#include "test.hpp"
+#include <limits>
+float fp8e4m3fnuz_to_fp32_value(uint8_t input)
+{
+    constexpr std::array<float, 256> e4m3fnuz_lut = {
+        0.0f,           0.0009765625f,  0.001953125f,
+        0.0029296875f,  0.00390625f,    0.0048828125f,
+        0.005859375f,   0.0068359375f,  0.0078125f,
+        0.0087890625f,  0.009765625f,   0.0107421875f,
+        0.01171875f,    0.0126953125f,  0.013671875f,
+        0.0146484375f,  0.015625f,      0.017578125f,
+        0.01953125f,    0.021484375f,   0.0234375f,
+        0.025390625f,   0.02734375f,    0.029296875f,
+        0.03125f,       0.03515625f,    0.0390625f,
+        0.04296875f,    0.046875f,      0.05078125f,
+        0.0546875f,     0.05859375f,    0.0625f,
+        0.0703125f,     0.078125f,      0.0859375f,
+        0.09375f,       0.1015625f,     0.109375f,
+        0.1171875f,     0.125f,         0.140625f,
+        0.15625f,       0.171875f,      0.1875f,
+        0.203125f,      0.21875f,       0.234375f,
+        0.25f,          0.28125f,       0.3125f,
+        0.34375f,       0.375f,         0.40625f,
+        0.4375f,        0.46875f,       0.5f,
+        0.5625f,        0.625f,         0.6875f,
+        0.75f,          0.8125f,        0.875f,
+        0.9375f,        1.0f,           1.125f,
+        1.25f,          1.375f,         1.5f,
+        1.625f,         1.75f,          1.875f,
+        2.0f,           2.25f,          2.5f,
+        2.75f,          3.0f,           3.25f,
+        3.5f,           3.75f,          4.0f,
+        4.5f,           5.0f,           5.5f,
+        6.0f,           6.5f,           7.0f,
+        7.5f,           8.0f,           9.0f,
+        10.0f,          11.0f,          12.0f,
+        13.0f,          14.0f,          15.0f,
+        16.0f,          18.0f,          20.0f,
+        22.0f,          24.0f,          26.0f,
+        28.0f,          30.0f,          32.0f,
+        36.0f,          40.0f,          44.0f,
+        48.0f,          52.0f,          56.0f,
+        60.0f,          64.0f,          72.0f,
+        80.0f,          88.0f,          96.0f,
+        104.0f,         112.0f,         120.0f,
+        128.0f,         144.0f,         160.0f,
+        176.0f,         192.0f,         208.0f,
+        224.0f,         240.0f,         std::numeric_limits<float>::quiet_NaN(),
+        -0.0009765625f, -0.001953125f,  -0.0029296875f,
+        -0.00390625f,   -0.0048828125f, -0.005859375f,
+        -0.0068359375f, -0.0078125f,    -0.0087890625f,
+        -0.009765625f,  -0.0107421875f, -0.01171875f,
+        -0.0126953125f, -0.013671875f,  -0.0146484375f,
+        -0.015625f,     -0.017578125f,  -0.01953125f,
+        -0.021484375f,  -0.0234375f,    -0.025390625f,
+        -0.02734375f,   -0.029296875f,  -0.03125f,
+        -0.03515625f,   -0.0390625f,    -0.04296875f,
+        -0.046875f,     -0.05078125f,   -0.0546875f,
+        -0.05859375f,   -0.0625f,       -0.0703125f,
+        -0.078125f,     -0.0859375f,    -0.09375f,
+        -0.1015625f,    -0.109375f,     -0.1171875f,
+        -0.125f,        -0.140625f,     -0.15625f,
+        -0.171875f,     -0.1875f,       -0.203125f,
+        -0.21875f,      -0.234375f,     -0.25f,
+        -0.28125f,      -0.3125f,       -0.34375f,
+        -0.375f,        -0.40625f,      -0.4375f,
+        -0.46875f,      -0.5f,          -0.5625f,
+        -0.625f,        -0.6875f,       -0.75f,
+        -0.8125f,       -0.875f,        -0.9375f,
+        -1.0f,          -1.125f,        -1.25f,
+        -1.375f,        -1.5f,          -1.625f,
+        -1.75f,         -1.875f,        -2.0f,
+        -2.25f,         -2.5f,          -2.75f,
+        -3.0f,          -3.25f,         -3.5f,
+        -3.75f,         -4.0f,          -4.5f,
+        -5.0f,          -5.5f,          -6.0f,
+        -6.5f,          -7.0f,          -7.5f,
+        -8.0f,          -9.0f,          -10.0f,
+        -11.0f,         -12.0f,         -13.0f,
+        -14.0f,         -15.0f,         -16.0f,
+        -18.0f,         -20.0f,         -22.0f,
+        -24.0f,         -26.0f,         -28.0f,
+        -30.0f,         -32.0f,         -36.0f,
+        -40.0f,         -44.0f,         -48.0f,
+        -52.0f,         -56.0f,         -60.0f,
+        -64.0f,         -72.0f,         -80.0f,
+        -88.0f,         -96.0f,         -104.0f,
+        -112.0f,        -120.0f,        -128.0f,
+        -144.0f,        -160.0f,        -176.0f,
+        -192.0f,        -208.0f,        -224.0f,
+        -240.0f,
+    };
+    return e4m3fnuz_lut[input];
+}
+TEST_CASE(test_fp8_cast_to_float)
+{
+    std::vector<uint8_t> bit_vals(256);
+    std::iota(bit_vals.begin(), bit_vals.end(), 0);
+    EXPECT(bool{std::all_of(bit_vals.begin(), bit_vals.end(), [](uint8_t bit_val) {
+        migraphx::fp8::fp8e4m3fnuz fp8_val(bit_val, migraphx::fp8::fp8e4m3fnuz::from_bits());
+        if(std::isnan(float(fp8_val)) and std::isnan(fp8e4m3fnuz_to_fp32_value(bit_val)))
+        {
+            return true;
+        }
+        return migraphx::float_equal(float(fp8_val), fp8e4m3fnuz_to_fp32_value(bit_val));
+    })});
+}
+TEST_CASE(test_fp8_cast_from_float)
+{
+    std::unordered_map<float, uint8_t> test_vals = {{256, 0x7f},        {-256, 0xff},
+                                                    {240, 0x7f},        {-240, 0xff},
+                                                    {1e-07, 0x0},       {1e+07, 0x7f},
+                                                    {1, 0x40},          {-1, 0xc0},
+                                                    {0.1, 0x25},        {0.11, 0x26},
+                                                    {0.111, 0x26},      {0.1111, 0x26},
+                                                    {-0.1, 0xa5},       {-0.11, 0xa6},
+                                                    {-0.111, 0xa6},     {-0.1111, 0xa6},
+                                                    {0.2, 0x2d},        {2, 0x48},
+                                                    {20, 0x62},         {200, 0x7c},
+                                                    {-0.2, 0xad},       {-2, 0xc8},
+                                                    {-20, 0xe2},        {-200, 0xfc},
+                                                    {0.5, 0x38},        {-0.5, 0xb8},
+                                                    {1.17549e-38, 0x0}, {1.4013e-45, 0x0},
+                                                    {0.00390625, 0x4},  {-0.00390625, 0x84},
+                                                    {0.00195312, 0x2},  {-0.00195312, 0x82},
+                                                    {0.000976562, 0x1}, {-0.000976562, 0x81},
+                                                    {0.000488281, 0x0}, {-0.000488281, 0x0}};
+    EXPECT(bool{std::all_of(test_vals.begin(), test_vals.end(), [](const auto sample) {
+        return migraphx::float_equal(
+            migraphx::fp8::fp8e4m3fnuz(sample.first),
+            migraphx::fp8::fp8e4m3fnuz(sample.second, migraphx::fp8::fp8e4m3fnuz::from_bits()));
+    })});
+}
+TEST_CASE(test_positive_zero)
+{
+    float zero = 0.0;
+    migraphx::fp8::fp8e4m3fnuz fp8_zero(zero);
+    EXPECT(fp8_zero.is_zero());
+    EXPECT(migraphx::float_equal(zero, float(fp8_zero)));
+}
+TEST_CASE(test_negative_zero)
+{
+    float nzero = -0.0;
+    float pzero = 0.0;
+    migraphx::fp8::fp8e4m3fnuz fp8_nzero(nzero);
+    EXPECT(fp8_nzero.is_zero());
+    //  negative zero gets converted to positive zero
+    EXPECT(migraphx::float_equal(pzero, float(fp8_nzero)));
+}
+TEST_CASE(test_nan_1)
+{
+    float fnan = std::numeric_limits<float>::quiet_NaN();
+    migraphx::fp8::fp8e4m3fnuz fp8_nan(fnan);
+    EXPECT(fp8_nan.is_nan());
+    EXPECT(std::isnan(fp8_nan));
+}
+TEST_CASE(test_nan_2)
+{
+    auto fnan = std::numeric_limits<migraphx::fp8::fp8e4m3fnuz>::quiet_NaN();
+    migraphx::fp8::fp8e4m3fnuz fp8_nan(fnan.data, migraphx::fp8::fp8e4m3fnuz::from_bits());
+    EXPECT(fp8_nan.is_nan());
+    EXPECT(std::isnan(fp8_nan));
+    EXPECT(std::isnan(float(fp8_nan)));
+}
+TEST_CASE(test_infinity_1)
+{
+    float finf = std::numeric_limits<float>::infinity();
+    // no inf in fp8e4m3fnuz it gets clipped to Nans
+    migraphx::fp8::fp8e4m3fnuz fp8_nan(finf);
+    EXPECT(fp8_nan.is_nan());
+    EXPECT(std::isnan(float(fp8_nan)));
+}
+TEST_CASE(test_infinity_2)
+{
+    // neg inf
+    float finf = -1.0 * std::numeric_limits<float>::infinity();
+    // no inf in fp8e4m3fnuz it gets clipped to NaNs
+    migraphx::fp8::fp8e4m3fnuz fp8_nan(finf);
+    EXPECT(fp8_nan.is_nan());
+    EXPECT(std::isnan(float(fp8_nan)));
+}
+TEST_CASE(test_numeric_max_1)
+{
+    float fmax = std::numeric_limits<float>::max();
+    migraphx::fp8::fp8e4m3fnuz fp8_max(fmax);
+    EXPECT(fp8_max == std::numeric_limits<migraphx::fp8::fp8e4m3fnuz>::max());
+}
+TEST_CASE(test_numeric_max_2)
+{
+    // gets clipped to max
+    float fmax = 2 * std::numeric_limits<migraphx::fp8::fp8e4m3fnuz>::max();
+    migraphx::fp8::fp8e4m3fnuz fp8_max(fmax);
+    EXPECT(fp8_max == std::numeric_limits<migraphx::fp8::fp8e4m3fnuz>::max());
+}
+TEST_CASE(test_numeric_lowest_1)
+{
+    float flowest = std::numeric_limits<float>::lowest();
+    migraphx::fp8::fp8e4m3fnuz fp8_lowest(flowest);
+    EXPECT(fp8_lowest == std::numeric_limits<migraphx::fp8::fp8e4m3fnuz>::lowest());
+}
+TEST_CASE(test_numeric_lowest_2)
+{
+    // gets clipped to lowest
+    float fmin = 2.0 * std::numeric_limits<migraphx::fp8::fp8e4m3fnuz>::lowest();
+    migraphx::fp8::fp8e4m3fnuz fp8_lowest(fmin);
+    EXPECT(fp8_lowest == std::numeric_limits<migraphx::fp8::fp8e4m3fnuz>::lowest());
+}
+TEST_CASE(test_max_eq_lowest)
+{
+    EXPECT(migraphx::float_equal(std::numeric_limits<migraphx::fp8::fp8e4m3fnuz>::lowest(),
+                                 -1 * std::numeric_limits<migraphx::fp8::fp8e4m3fnuz>::max()));
+}
+TEST_CASE(test_isfinite)
+{
+    EXPECT(std::isfinite(migraphx::fp8::fp8e4m3fnuz(0.0)));
+    EXPECT(std::isfinite(migraphx::fp8::fp8e4m3fnuz(-0.0)));
+    EXPECT(not std::isfinite(
+        migraphx::fp8::fp8e4m3fnuz(std::numeric_limits<migraphx::fp8::fp8e4m3fnuz>::quiet_NaN())));
+}
+TEST_CASE(test_no_infinity)
+{
+    EXPECT(not bool{std::numeric_limits<migraphx::fp8::fp8e4m3fnuz>::has_infinity});
+}
+TEST_CASE(test_binary_ops)
+{
+    auto a = migraphx::fp8::fp8e4m3fnuz(-1.0);
+    auto b = migraphx::fp8::fp8e4m3fnuz(1.0);
+    auto c = migraphx::fp8::fp8e4m3fnuz(0.0);
+    auto d = migraphx::fp8::fp8e4m3fnuz(-0.0);
+    EXPECT(migraphx::float_equal((c + d), c));
+    EXPECT(migraphx::float_equal((c + d), d));
+    EXPECT(migraphx::float_equal((a + b), c));
+    EXPECT(migraphx::float_equal((a + b), d));
+    auto e = migraphx::fp8::fp8e4m3fnuz(10.0);
+    auto f = migraphx::fp8::fp8e4m3fnuz(-10.0);
+    EXPECT(bool{e > f});
+    EXPECT(bool{f < e});
+    EXPECT(bool{f <= e});
+    EXPECT(bool{e >= f});
+    EXPECT(bool{e <= e});
+    EXPECT(bool{f >= f});
+    EXPECT(not migraphx::float_equal(f, e));
+}
+TEST_CASE(test_fabs)
+{
+    auto a = migraphx::fp8::fp8e4m3fnuz(-1.0);
+    auto b = migraphx::fp8::fp8e4m3fnuz(1.0);
+    EXPECT(migraphx::float_equal(b, migraphx::fp8::fabs(a)));
+}
+TEST_CASE(test_stream_op)
+{
+    auto a = migraphx::fp8::fp8e4m3fnuz(-1.0);
+    std::stringstream ss;
+    ss << a;
+    EXPECT(std::string("-1") == ss.str());
+    ss     = std::stringstream();
+    auto b = std::numeric_limits<migraphx::fp8::fp8e4m3fnuz>::quiet_NaN();
+    ss << b;
+    EXPECT(std::string("nan") == ss.str());
+}
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/fp8e5m2.cpp
+++ b/test/fp8e5m2.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <cmath>
+#include <migraphx/float_equal.hpp>
+#include <migraphx/float8.hpp>
+#include <migraphx/half.hpp>
+#include <migraphx/ranges.hpp>
+#include "test.hpp"
+#include <limits>
+#include <sstream>
+float fp8e5m2_to_fp32_value(uint8_t input)
+{
+    constexpr std::array<float, 256> e4m3fnuz_lut = {
+        0.0,
+        1.52587890625e-05,
+        3.0517578125e-05,
+        4.57763671875e-05,
+        6.103515625e-05,
+        7.62939453125e-05,
+        9.1552734375e-05,
+        0.0001068115234375,
+        0.0001220703125,
+        0.000152587890625,
+        0.00018310546875,
+        0.000213623046875,
+        0.000244140625,
+        0.00030517578125,
+        0.0003662109375,
+        0.00042724609375,
+        0.00048828125,
+        0.0006103515625,
+        0.000732421875,
+        0.0008544921875,
+        0.0009765625,
+        0.001220703125,
+        0.00146484375,
+        0.001708984375,
+        0.001953125,
+        0.00244140625,
+        0.0029296875,
+        0.00341796875,
+        0.00390625,
+        0.0048828125,
+        0.005859375,
+        0.0068359375,
+        0.0078125,
+        0.009765625,
+        0.01171875,
+        0.013671875,
+        0.015625,
+        0.01953125,
+        0.0234375,
+        0.02734375,
+        0.03125,
+        0.0390625,
+        0.046875,
+        0.0546875,
+        0.0625,
+        0.078125,
+        0.09375,
+        0.109375,
+        0.125,
+        0.15625,
+        0.1875,
+        0.21875,
+        0.25,
+        0.3125,
+        0.375,
+        0.4375,
+        0.5,
+        0.625,
+        0.75,
+        0.875,
+        1.0,
+        1.25,
+        1.5,
+        1.75,
+        2.0,
+        2.5,
+        3.0,
+        3.5,
+        4.0,
+        5.0,
+        6.0,
+        7.0,
+        8.0,
+        10.0,
+        12.0,
+        14.0,
+        16.0,
+        20.0,
+        24.0,
+        28.0,
+        32.0,
+        40.0,
+        48.0,
+        56.0,
+        64.0,
+        80.0,
+        96.0,
+        112.0,
+        128.0,
+        160.0,
+        192.0,
+        224.0,
+        256.0,
+        320.0,
+        384.0,
+        448.0,
+        512.0,
+        640.0,
+        768.0,
+        896.0,
+        1024.0,
+        1280.0,
+        1536.0,
+        1792.0,
+        2048.0,
+        2560.0,
+        3072.0,
+        3584.0,
+        4096.0,
+        5120.0,
+        6144.0,
+        7168.0,
+        8192.0,
+        10240.0,
+        12288.0,
+        14336.0,
+        16384.0,
+        20480.0,
+        24576.0,
+        28672.0,
+        32768.0,
+        40960.0,
+        49152.0,
+        57344.0,
+        std::numeric_limits<float>::infinity(),
+        std::numeric_limits<float>::quiet_NaN(),
+        std::numeric_limits<float>::quiet_NaN(),
+        std::numeric_limits<float>::quiet_NaN(),
+        -0.0,
+        -1.52587890625e-05,
+        -3.0517578125e-05,
+        -4.57763671875e-05,
+        -6.103515625e-05,
+        -7.62939453125e-05,
+        -9.1552734375e-05,
+        -0.0001068115234375,
+        -0.0001220703125,
+        -0.000152587890625,
+        -0.00018310546875,
+        -0.000213623046875,
+        -0.000244140625,
+        -0.00030517578125,
+        -0.0003662109375,
+        -0.00042724609375,
+        -0.00048828125,
+        -0.0006103515625,
+        -0.000732421875,
+        -0.0008544921875,
+        -0.0009765625,
+        -0.001220703125,
+        -0.00146484375,
+        -0.001708984375,
+        -0.001953125,
+        -0.00244140625,
+        -0.0029296875,
+        -0.00341796875,
+        -0.00390625,
+        -0.0048828125,
+        -0.005859375,
+        -0.0068359375,
+        -0.0078125,
+        -0.009765625,
+        -0.01171875,
+        -0.013671875,
+        -0.015625,
+        -0.01953125,
+        -0.0234375,
+        -0.02734375,
+        -0.03125,
+        -0.0390625,
+        -0.046875,
+        -0.0546875,
+        -0.0625,
+        -0.078125,
+        -0.09375,
+        -0.109375,
+        -0.125,
+        -0.15625,
+        -0.1875,
+        -0.21875,
+        -0.25,
+        -0.3125,
+        -0.375,
+        -0.4375,
+        -0.5,
+        -0.625,
+        -0.75,
+        -0.875,
+        -1.0,
+        -1.25,
+        -1.5,
+        -1.75,
+        -2.0,
+        -2.5,
+        -3.0,
+        -3.5,
+        -4.0,
+        -5.0,
+        -6.0,
+        -7.0,
+        -8.0,
+        -10.0,
+        -12.0,
+        -14.0,
+        -16.0,
+        -20.0,
+        -24.0,
+        -28.0,
+        -32.0,
+        -40.0,
+        -48.0,
+        -56.0,
+        -64.0,
+        -80.0,
+        -96.0,
+        -112.0,
+        -128.0,
+        -160.0,
+        -192.0,
+        -224.0,
+        -256.0,
+        -320.0,
+        -384.0,
+        -448.0,
+        -512.0,
+        -640.0,
+        -768.0,
+        -896.0,
+        -1024.0,
+        -1280.0,
+        -1536.0,
+        -1792.0,
+        -2048.0,
+        -2560.0,
+        -3072.0,
+        -3584.0,
+        -4096.0,
+        -5120.0,
+        -6144.0,
+        -7168.0,
+        -8192.0,
+        -10240.0,
+        -12288.0,
+        -14336.0,
+        -16384.0,
+        -20480.0,
+        -24576.0,
+        -28672.0,
+        -32768.0,
+        -40960.0,
+        -49152.0,
+        -57344.0,
+        -1.0f * std::numeric_limits<float>::infinity(),
+        std::numeric_limits<float>::quiet_NaN(),
+        std::numeric_limits<float>::quiet_NaN(),
+        std::numeric_limits<float>::quiet_NaN(),
+    };
+    return e4m3fnuz_lut[input];
+}
+TEST_CASE(test_fp8_cast_to_float)
+{
+    std::vector<uint8_t> bit_vals(256);
+    std::iota(bit_vals.begin(), bit_vals.end(), 0);
+    EXPECT(bool{std::all_of(bit_vals.begin(), bit_vals.end(), [](uint8_t bit_val) {
+        migraphx::fp8::fp8e5m2 fp8_val(bit_val, migraphx::fp8::fp8e5m2::from_bits());
+        if(std::isnan(float(fp8_val)) and std::isnan(fp8e5m2_to_fp32_value(bit_val)))
+        {
+            return true;
+        }
+        else if(std::isinf(float(fp8_val)) and std::isinf(fp8e5m2_to_fp32_value(bit_val)))
+        {
+            return true;
+        }
+        return migraphx::float_equal(float(fp8_val), fp8e5m2_to_fp32_value(bit_val));
+    })});
+}
+TEST_CASE(test_fp8_cast_from_float)
+{
+    std::unordered_map<float, uint8_t> test_vals = {
+        {-60000, 0xfb},
+        {-57344, 0xfb},
+        {-448, 0xdf},
+        {-256, 0xdc},
+        {-240, 0xdc},
+        {-200, 0xda},
+        {-20, 0xcd},
+        {-2, 0xc0},
+        {-1, 0xbc},
+        {-0.5, 0xb8},
+        {-0.2, 0xb2},
+        {-0.1111, 0xaf},
+        {-0.111, 0xaf},
+        {-0.11, 0xaf},
+        {-0.1, 0xae},
+        {6.10351e-05, 0x4},
+        {-6.10351e-05, 0x84},
+        {3.05176e-05, 0x2},
+        {-3.05176e-05, 0x82},
+        {1.52588e-05, 0x1},
+        {-1.52588e-05, 0x81},
+        {7.62939e-06, 0x0},
+        {-7.62939e-06, 0x80},
+        {0.1, 0x2e},
+        {0.11, 0x2f},
+        {0.111, 0x2f},
+        {0.1111, 0x2f},
+        {0.2, 0x32},
+        {0.5, 0x38},
+        {1, 0x3c},
+        {2, 0x40},
+        {20, 0x4d},
+        {200, 0x5a},
+        {240, 0x5c},
+        {256, 0x5c},
+        {448, 0x5f},
+        {57344, 0x7b},
+        {60000, 0x7b},
+        {1e+07, 0x7b},
+    };
+    EXPECT(bool{std::all_of(test_vals.begin(), test_vals.end(), [](const auto sample) {
+        return migraphx::float_equal(
+            migraphx::fp8::fp8e5m2(sample.first),
+            migraphx::fp8::fp8e5m2(sample.second, migraphx::fp8::fp8e5m2::from_bits()));
+    })});
+}
+TEST_CASE(test_positive_zero)
+{
+    float zero = 0.0;
+    migraphx::fp8::fp8e5m2 fp8_zero(zero);
+    EXPECT(fp8_zero.is_zero());
+    EXPECT(migraphx::float_equal(zero, float(fp8_zero)));
+}
+TEST_CASE(test_negative_zero)
+{
+    float nzero = -0.0;
+    migraphx::fp8::fp8e5m2 fp8_nzero(nzero);
+    EXPECT(fp8_nzero.is_zero());
+    //  negative zero is preserved for fp8e5m2
+    EXPECT(migraphx::float_equal(nzero, float(fp8_nzero)));
+}
+TEST_CASE(test_pos_zero_eq_neg_zero)
+{
+    float nzero = -0.0;
+    float pzero = 0.0;
+    migraphx::fp8::fp8e5m2 fp8_nzero(nzero);
+    migraphx::fp8::fp8e5m2 fp8_pzero(pzero);
+    EXPECT(fp8_nzero == fp8_pzero);
+}
+TEST_CASE(test_nan_1)
+{
+    float fnan = std::numeric_limits<float>::quiet_NaN();
+    migraphx::fp8::fp8e5m2 fp8_nan(fnan);
+    EXPECT(fp8_nan.is_nan());
+    EXPECT(std::isnan(fp8_nan));
+}
+TEST_CASE(test_nan_2)
+{
+    auto fnan = std::numeric_limits<migraphx::fp8::fp8e5m2>::quiet_NaN();
+    migraphx::fp8::fp8e5m2 fp8_nan(fnan.data, migraphx::fp8::fp8e5m2::from_bits());
+    EXPECT(fp8_nan.is_nan());
+    EXPECT(std::isnan(fp8_nan));
+    EXPECT(std::isnan(float(fp8_nan)));
+}
+TEST_CASE(test_infinity_1)
+{
+    // float infinity should get clipped to max
+    float finf = std::numeric_limits<float>::infinity();
+    migraphx::fp8::fp8e5m2 fp8_max(finf);
+    EXPECT(fp8_max == std::numeric_limits<migraphx::fp8::fp8e5m2>::max());
+}
+TEST_CASE(test_infinity_2)
+{
+    // neg inf
+    float finf = -1.0 * std::numeric_limits<float>::infinity();
+    // no inf in fp8e5m2, it gets clipped to lowest
+    migraphx::fp8::fp8e5m2 fp8_lowest(finf);
+    EXPECT(bool{fp8_lowest == std::numeric_limits<migraphx::fp8::fp8e5m2>::lowest()});
+}
+TEST_CASE(test_numeric_max_1)
+{
+    float fmax = std::numeric_limits<float>::max();
+    migraphx::fp8::fp8e5m2 fp8_max(fmax);
+    EXPECT(fp8_max == std::numeric_limits<migraphx::fp8::fp8e5m2>::max());
+}
+TEST_CASE(test_numeric_max_2)
+{
+    // gets clipped to max
+    float fmax = 2 * std::numeric_limits<migraphx::fp8::fp8e5m2>::max();
+    migraphx::fp8::fp8e5m2 fp8_max(fmax);
+    EXPECT(fp8_max == std::numeric_limits<migraphx::fp8::fp8e5m2>::max());
+}
+TEST_CASE(test_numeric_lowest_1)
+{
+    float flowest = std::numeric_limits<float>::lowest();
+    migraphx::fp8::fp8e5m2 fp8_lowest(flowest);
+    EXPECT(fp8_lowest == std::numeric_limits<migraphx::fp8::fp8e5m2>::lowest());
+}
+TEST_CASE(test_numeric_lowest_2)
+{
+    // gets clipped to lowest
+    float fmin = 2.0 * std::numeric_limits<migraphx::fp8::fp8e5m2>::lowest();
+    migraphx::fp8::fp8e5m2 fp8_lowest(fmin);
+    EXPECT(fp8_lowest == std::numeric_limits<migraphx::fp8::fp8e5m2>::lowest());
+}
+TEST_CASE(test_max_eq_lowest)
+{
+    EXPECT(migraphx::float_equal(std::numeric_limits<migraphx::fp8::fp8e5m2>::lowest(),
+                                 -1 * std::numeric_limits<migraphx::fp8::fp8e5m2>::max()));
+}
+TEST_CASE(test_isfinite)
+{
+    EXPECT(std::isfinite(migraphx::fp8::fp8e5m2(0.0)));
+    EXPECT(std::isfinite(migraphx::fp8::fp8e5m2(-0.0)));
+    EXPECT(not std::isfinite(
+        migraphx::fp8::fp8e5m2(std::numeric_limits<migraphx::fp8::fp8e5m2>::quiet_NaN())));
+    EXPECT(not std::isfinite(std::numeric_limits<migraphx::fp8::fp8e5m2>::infinity()));
+    // -1.0 * inf  is float(-inf) which with clipping/saturation gets converted into fp8::lowest()
+    EXPECT(std::isfinite(
+        migraphx::fp8::fp8e5m2(-1.0 * std::numeric_limits<migraphx::fp8::fp8e5m2>::infinity())));
+    EXPECT(not std::isfinite(migraphx::fp8::fp8e5m2(0xFC, migraphx::fp8::fp8e5m2::from_bits())));
+}
+TEST_CASE(test_binary_ops)
+{
+    auto a = migraphx::fp8::fp8e5m2(-1.0);
+    auto b = migraphx::fp8::fp8e5m2(1.0);
+    auto c = migraphx::fp8::fp8e5m2(0.0);
+    auto d = migraphx::fp8::fp8e5m2(-0.0);
+    EXPECT(migraphx::float_equal((c + d), c));
+    EXPECT(migraphx::float_equal((c + d), d));
+    EXPECT(migraphx::float_equal((a + b), c));
+    EXPECT(migraphx::float_equal((a + b), d));
+    auto e = migraphx::fp8::fp8e5m2(10.0);
+    auto f = migraphx::fp8::fp8e5m2(-10.0);
+    EXPECT(bool{e > f});
+    EXPECT(bool{f < e});
+    EXPECT(bool{f <= e});
+    EXPECT(bool{e >= f});
+    EXPECT(bool{e <= e});
+    EXPECT(bool{f >= f});
+    EXPECT(not migraphx::float_equal(f, e));
+}
+TEST_CASE(test_fabs)
+{
+    auto a = migraphx::fp8::fp8e5m2(-1.0);
+    auto b = migraphx::fp8::fp8e5m2(1.0);
+    EXPECT(migraphx::float_equal(b, migraphx::fp8::fabs(a)));
+}
+TEST_CASE(test_stream_op)
+{
+    auto a = migraphx::fp8::fp8e5m2(-1.0);
+    std::stringstream ss;
+    ss << a;
+    EXPECT(std::string("-1") == ss.str());
+    ss     = std::stringstream();
+    auto b = std::numeric_limits<migraphx::fp8::fp8e5m2>::quiet_NaN();
+    ss << b;
+    EXPECT(std::string("nan") == ss.str());
+}
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/fp8e5m2fnuz.cpp
+++ b/test/fp8e5m2fnuz.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <cmath>
+#include <migraphx/float_equal.hpp>
+#include <migraphx/float8.hpp>
+#include <migraphx/half.hpp>
+#include <migraphx/ranges.hpp>
+#include "test.hpp"
+#include <limits>
+float fp8e5m2fnuz_to_fp32_value(uint8_t input)
+{
+    constexpr std::array<float, 256> e4m3fnuz_lut = {
+        0.0,
+        7.62939453125e-06,
+        1.52587890625e-05,
+        2.288818359375e-05,
+        3.0517578125e-05,
+        3.814697265625e-05,
+        4.57763671875e-05,
+        5.340576171875e-05,
+        6.103515625e-05,
+        7.62939453125e-05,
+        9.1552734375e-05,
+        0.0001068115234375,
+        0.0001220703125,
+        0.000152587890625,
+        0.00018310546875,
+        0.000213623046875,
+        0.000244140625,
+        0.00030517578125,
+        0.0003662109375,
+        0.00042724609375,
+        0.00048828125,
+        0.0006103515625,
+        0.000732421875,
+        0.0008544921875,
+        0.0009765625,
+        0.001220703125,
+        0.00146484375,
+        0.001708984375,
+        0.001953125,
+        0.00244140625,
+        0.0029296875,
+        0.00341796875,
+        0.00390625,
+        0.0048828125,
+        0.005859375,
+        0.0068359375,
+        0.0078125,
+        0.009765625,
+        0.01171875,
+        0.013671875,
+        0.015625,
+        0.01953125,
+        0.0234375,
+        0.02734375,
+        0.03125,
+        0.0390625,
+        0.046875,
+        0.0546875,
+        0.0625,
+        0.078125,
+        0.09375,
+        0.109375,
+        0.125,
+        0.15625,
+        0.1875,
+        0.21875,
+        0.25,
+        0.3125,
+        0.375,
+        0.4375,
+        0.5,
+        0.625,
+        0.75,
+        0.875,
+        1.0,
+        1.25,
+        1.5,
+        1.75,
+        2.0,
+        2.5,
+        3.0,
+        3.5,
+        4.0,
+        5.0,
+        6.0,
+        7.0,
+        8.0,
+        10.0,
+        12.0,
+        14.0,
+        16.0,
+        20.0,
+        24.0,
+        28.0,
+        32.0,
+        40.0,
+        48.0,
+        56.0,
+        64.0,
+        80.0,
+        96.0,
+        112.0,
+        128.0,
+        160.0,
+        192.0,
+        224.0,
+        256.0,
+        320.0,
+        384.0,
+        448.0,
+        512.0,
+        640.0,
+        768.0,
+        896.0,
+        1024.0,
+        1280.0,
+        1536.0,
+        1792.0,
+        2048.0,
+        2560.0,
+        3072.0,
+        3584.0,
+        4096.0,
+        5120.0,
+        6144.0,
+        7168.0,
+        8192.0,
+        10240.0,
+        12288.0,
+        14336.0,
+        16384.0,
+        20480.0,
+        24576.0,
+        28672.0,
+        32768.0,
+        40960.0,
+        49152.0,
+        57344.0,
+        std::numeric_limits<float>::quiet_NaN(),
+        -7.62939453125e-06,
+        -1.52587890625e-05,
+        -2.288818359375e-05,
+        -3.0517578125e-05,
+        -3.814697265625e-05,
+        -4.57763671875e-05,
+        -5.340576171875e-05,
+        -6.103515625e-05,
+        -7.62939453125e-05,
+        -9.1552734375e-05,
+        -0.0001068115234375,
+        -0.0001220703125,
+        -0.000152587890625,
+        -0.00018310546875,
+        -0.000213623046875,
+        -0.000244140625,
+        -0.00030517578125,
+        -0.0003662109375,
+        -0.00042724609375,
+        -0.00048828125,
+        -0.0006103515625,
+        -0.000732421875,
+        -0.0008544921875,
+        -0.0009765625,
+        -0.001220703125,
+        -0.00146484375,
+        -0.001708984375,
+        -0.001953125,
+        -0.00244140625,
+        -0.0029296875,
+        -0.00341796875,
+        -0.00390625,
+        -0.0048828125,
+        -0.005859375,
+        -0.0068359375,
+        -0.0078125,
+        -0.009765625,
+        -0.01171875,
+        -0.013671875,
+        -0.015625,
+        -0.01953125,
+        -0.0234375,
+        -0.02734375,
+        -0.03125,
+        -0.0390625,
+        -0.046875,
+        -0.0546875,
+        -0.0625,
+        -0.078125,
+        -0.09375,
+        -0.109375,
+        -0.125,
+        -0.15625,
+        -0.1875,
+        -0.21875,
+        -0.25,
+        -0.3125,
+        -0.375,
+        -0.4375,
+        -0.5,
+        -0.625,
+        -0.75,
+        -0.875,
+        -1.0,
+        -1.25,
+        -1.5,
+        -1.75,
+        -2.0,
+        -2.5,
+        -3.0,
+        -3.5,
+        -4.0,
+        -5.0,
+        -6.0,
+        -7.0,
+        -8.0,
+        -10.0,
+        -12.0,
+        -14.0,
+        -16.0,
+        -20.0,
+        -24.0,
+        -28.0,
+        -32.0,
+        -40.0,
+        -48.0,
+        -56.0,
+        -64.0,
+        -80.0,
+        -96.0,
+        -112.0,
+        -128.0,
+        -160.0,
+        -192.0,
+        -224.0,
+        -256.0,
+        -320.0,
+        -384.0,
+        -448.0,
+        -512.0,
+        -640.0,
+        -768.0,
+        -896.0,
+        -1024.0,
+        -1280.0,
+        -1536.0,
+        -1792.0,
+        -2048.0,
+        -2560.0,
+        -3072.0,
+        -3584.0,
+        -4096.0,
+        -5120.0,
+        -6144.0,
+        -7168.0,
+        -8192.0,
+        -10240.0,
+        -12288.0,
+        -14336.0,
+        -16384.0,
+        -20480.0,
+        -24576.0,
+        -28672.0,
+        -32768.0,
+        -40960.0,
+        -49152.0,
+        -57344.0,
+    };
+    return e4m3fnuz_lut[input];
+}
+TEST_CASE(test_fp8_cast_to_float)
+{
+    std::vector<uint8_t> bit_vals(256);
+    std::iota(bit_vals.begin(), bit_vals.end(), 0);
+    EXPECT(bool{std::all_of(bit_vals.begin(), bit_vals.end(), [](uint8_t bit_val) {
+        migraphx::fp8::fp8e5m2fnuz fp8_val(bit_val, migraphx::fp8::fp8e5m2fnuz::from_bits());
+        if(std::isnan(float(fp8_val)) and std::isnan(fp8e5m2fnuz_to_fp32_value(bit_val)))
+        {
+            return true;
+        }
+        return migraphx::float_equal(float(fp8_val), fp8e5m2fnuz_to_fp32_value(bit_val));
+    })});
+}
+TEST_CASE(test_fp8_cast_from_float)
+{
+    std::unordered_map<float, uint8_t> test_vals = {
+        {57344, 0x7f},      {-57344, 0xff},       {60000, 0x7f},      {-60000, 0xff},
+        {448, 0x63},        {-448, 0xe3},         {256, 0x60},        {-256, 0xe0},
+        {240, 0x60},        {-240, 0xe0},         {3.05176e-05, 0x4}, {-3.05176e-05, 0x84},
+        {1.52588e-05, 0x2}, {-1.52588e-05, 0x82}, {7.62939e-06, 0x1}, {-7.62939e-06, 0x81},
+        {3.81469e-06, 0x0}, {-3.81469e-06, 0x0},  {1e+07, 0x7f},      {1, 0x40},
+        {-1, 0xc0},         {0.1, 0x32},          {0.11, 0x33},       {0.111, 0x33},
+        {0.1111, 0x33},     {-0.1, 0xb2},         {-0.11, 0xb3},      {-0.111, 0xb3},
+        {-0.1111, 0xb3},    {0.2, 0x36},          {2, 0x44},          {20, 0x51},
+        {200, 0x5e},        {-0.2, 0xb6},         {-2, 0xc4},         {-20, 0xd1},
+        {-200, 0xde},       {0.5, 0x3c},          {-0.5, 0xbc},       {1.17549e-38, 0x0},
+        {1.4013e-45, 0x0},
+    };
+    EXPECT(bool{std::all_of(test_vals.begin(), test_vals.end(), [](const auto sample) {
+        return migraphx::float_equal(
+            migraphx::fp8::fp8e5m2fnuz(sample.first),
+            migraphx::fp8::fp8e5m2fnuz(sample.second, migraphx::fp8::fp8e5m2fnuz::from_bits()));
+    })});
+}
+TEST_CASE(test_positive_zero)
+{
+    float zero = 0.0;
+    migraphx::fp8::fp8e5m2fnuz fp8_zero(zero);
+    EXPECT(fp8_zero.is_zero());
+    EXPECT(migraphx::float_equal(zero, float(fp8_zero)));
+}
+TEST_CASE(test_negative_zero)
+{
+    float nzero = -0.0;
+    float pzero = 0.0;
+    migraphx::fp8::fp8e5m2fnuz fp8_nzero(nzero);
+    EXPECT(fp8_nzero.is_zero());
+    //  negative zero gets converted to positive zero
+    EXPECT(migraphx::float_equal(pzero, float(fp8_nzero)));
+}
+TEST_CASE(test_nan_1)
+{
+    float fnan = std::numeric_limits<float>::quiet_NaN();
+    migraphx::fp8::fp8e5m2fnuz fp8_nan(fnan);
+    EXPECT(fp8_nan.is_nan());
+    EXPECT(std::isnan(fp8_nan));
+}
+TEST_CASE(test_nan_2)
+{
+    auto fnan = std::numeric_limits<migraphx::fp8::fp8e5m2fnuz>::quiet_NaN();
+    migraphx::fp8::fp8e5m2fnuz fp8_nan(fnan.data, migraphx::fp8::fp8e5m2fnuz::from_bits());
+    EXPECT(fp8_nan.is_nan());
+    EXPECT(std::isnan(fp8_nan));
+    EXPECT(std::isnan(float(fp8_nan)));
+}
+TEST_CASE(test_infinity_1)
+{
+    float finf = std::numeric_limits<float>::infinity();
+    // no inf in fp8e5m2fnuz it gets clipped to Nans
+    migraphx::fp8::fp8e5m2fnuz fp8_nan(finf);
+    EXPECT(fp8_nan.is_nan());
+    EXPECT(std::isnan(float(fp8_nan)));
+}
+TEST_CASE(test_infinity_2)
+{
+    // neg inf
+    float finf = -1.0 * std::numeric_limits<float>::infinity();
+    // no inf in fp8e5m2fnuz it gets clipped to NaNs
+    migraphx::fp8::fp8e5m2fnuz fp8_nan(finf);
+    EXPECT(fp8_nan.is_nan());
+    EXPECT(std::isnan(float(fp8_nan)));
+}
+TEST_CASE(test_numeric_max_1)
+{
+    float fmax = std::numeric_limits<float>::max();
+    migraphx::fp8::fp8e5m2fnuz fp8_max(fmax);
+    EXPECT(fp8_max == std::numeric_limits<migraphx::fp8::fp8e5m2fnuz>::max());
+}
+TEST_CASE(test_numeric_max_2)
+{
+    // gets clipped to max
+    float fmax = 2 * std::numeric_limits<migraphx::fp8::fp8e5m2fnuz>::max();
+    migraphx::fp8::fp8e5m2fnuz fp8_max(fmax);
+    EXPECT(fp8_max == std::numeric_limits<migraphx::fp8::fp8e5m2fnuz>::max());
+}
+TEST_CASE(test_numeric_lowest_1)
+{
+    float flowest = std::numeric_limits<float>::lowest();
+    migraphx::fp8::fp8e5m2fnuz fp8_lowest(flowest);
+    EXPECT(fp8_lowest == std::numeric_limits<migraphx::fp8::fp8e5m2fnuz>::lowest());
+}
+TEST_CASE(test_numeric_lowest_2)
+{
+    // gets clipped to lowest
+    float fmin = 2.0 * std::numeric_limits<migraphx::fp8::fp8e5m2fnuz>::lowest();
+    migraphx::fp8::fp8e5m2fnuz fp8_lowest(fmin);
+    EXPECT(fp8_lowest == std::numeric_limits<migraphx::fp8::fp8e5m2fnuz>::lowest());
+}
+TEST_CASE(test_max_eq_lowest)
+{
+    EXPECT(migraphx::float_equal(std::numeric_limits<migraphx::fp8::fp8e5m2fnuz>::lowest(),
+                                 -1 * std::numeric_limits<migraphx::fp8::fp8e5m2fnuz>::max()));
+}
+TEST_CASE(test_isfinite)
+{
+    EXPECT(std::isfinite(migraphx::fp8::fp8e5m2fnuz(0.0)));
+    EXPECT(std::isfinite(migraphx::fp8::fp8e5m2fnuz(-0.0)));
+    EXPECT(not std::isfinite(
+        migraphx::fp8::fp8e5m2fnuz(std::numeric_limits<migraphx::fp8::fp8e5m2fnuz>::quiet_NaN())));
+}
+TEST_CASE(test_no_infinity)
+{
+    EXPECT(not bool{std::numeric_limits<migraphx::fp8::fp8e5m2fnuz>::has_infinity});
+}
+TEST_CASE(test_binary_ops)
+{
+    auto a = migraphx::fp8::fp8e5m2fnuz(-1.0);
+    auto b = migraphx::fp8::fp8e5m2fnuz(1.0);
+    auto c = migraphx::fp8::fp8e5m2fnuz(0.0);
+    auto d = migraphx::fp8::fp8e5m2fnuz(-0.0);
+    EXPECT(migraphx::float_equal((c + d), c));
+    EXPECT(migraphx::float_equal((c + d), d));
+    EXPECT(migraphx::float_equal((a + b), c));
+    EXPECT(migraphx::float_equal((a + b), d));
+    auto e = migraphx::fp8::fp8e5m2fnuz(10.0);
+    auto f = migraphx::fp8::fp8e5m2fnuz(-10.0);
+    EXPECT(bool{e > f});
+    EXPECT(bool{f < e});
+    EXPECT(bool{f <= e});
+    EXPECT(bool{e >= f});
+    EXPECT(bool{e <= e});
+    EXPECT(bool{f >= f});
+    EXPECT(not migraphx::float_equal(f, e));
+}
+TEST_CASE(test_fabs)
+{
+    auto a = migraphx::fp8::fp8e5m2fnuz(-1.0);
+    auto b = migraphx::fp8::fp8e5m2fnuz(1.0);
+    EXPECT(migraphx::float_equal(b, migraphx::fp8::fabs(a)));
+}
+TEST_CASE(test_stream_op)
+{
+    auto a = migraphx::fp8::fp8e5m2fnuz(-1.0);
+    std::stringstream ss;
+    ss << a;
+    EXPECT(std::string("-1") == ss.str());
+    ss     = std::stringstream();
+    auto b = std::numeric_limits<migraphx::fp8::fp8e5m2fnuz>::quiet_NaN();
+    ss << b;
+    EXPECT(std::string("nan") == ss.str());
+}
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/fuse_pointwise.cpp
+++ b/test/fuse_pointwise.cpp
@@ -414,8 +414,8 @@ TEST_CASE(add_reshape_add_nonstandard)
        auto y       = mm->add_parameter("y", s1);
        auto z       = mm->add_parameter("z", s2);
        auto add1    = mm->add_instruction(migraphx::make_op("add"), x, y);
-        auto c       = mm->add_instruction(migraphx::make_op("contiguous"), add1);
+        auto reshape =
-        auto reshape = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), c);
+            mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), add1);
        auto add2    = mm->add_instruction(migraphx::make_op("add"), reshape, z);
        mm->add_return({add2});
    }
@@ -426,10 +426,8 @@ TEST_CASE(add_reshape_add_nonstandard)
        auto x   = mm->add_parameter("x", s1);
        auto y   = mm->add_parameter("y", s1);
        auto z   = mm->add_parameter("z", s2);
-        auto cx  = mm->add_instruction(migraphx::make_op("contiguous"), x);
+        auto x2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), x);
-        auto cy  = mm->add_instruction(migraphx::make_op("contiguous"), y);
+        auto y2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), y);
-        auto x2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), cx);
-        auto y2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), cy);
        auto z2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), z);
        auto fadd =
            add_pointwise(p2, "main:pointwise0", {x2, y2, z2}, [=](auto* pm, const auto& inputs) {
@@ -466,10 +464,8 @@ TEST_CASE(add_unsqueeze_add_nonstandard)
        auto x   = mm->add_parameter("x", s1);
        auto y   = mm->add_parameter("y", s1);
        auto z   = mm->add_parameter("z", s2);
-        auto cx  = mm->add_instruction(migraphx::make_op("contiguous"), x);
+        auto x2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), x);
-        auto cy  = mm->add_instruction(migraphx::make_op("contiguous"), y);
+        auto y2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), y);
-        auto x2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), cx);
-        auto y2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), cy);
        auto fadd =
            add_pointwise(p2, "main:pointwise0", {x2, y2, z}, [=](auto* pm, const auto& inputs) {
                auto add1 = pm->add_instruction(migraphx::make_op("add"), inputs[0], inputs[1]);

--- a/test/gpu/codegen_literal.cpp
+++ b/test/gpu/codegen_literal.cpp
@@ -64,7 +64,7 @@ TEST_CASE(mul_literal_round_test)
    auto l1 = mm->add_literal(1 / 0.00787402f);
    auto mul   = mm->add_instruction(migraphx::make_op("mul"), l0, l1);
-    auto round = mm->add_instruction(migraphx::make_op("round"), mul);
+    auto round = mm->add_instruction(migraphx::make_op("nearbyint"), mul);
    mm->add_return({round});

--- a/test/gpu/fuse_mlir.cpp
+++ b/test/gpu/fuse_mlir.cpp
@@ -144,10 +144,12 @@ TEST_CASE(int_quant_dot_tanh_fails)
        auto tanh = add_pointwise(p1, "main:pointwise0", {dot}, single_pointwise("tanh"));
        mm->add_return({tanh});
    }
-    migraphx::program p2(p1);
+    // This pass should not fuse as int32_t tanh isn't supported.
-    // This pass should do nothing as int32_t tanh isn't supported.
    run_pass(p1);
-    EXPECT(p1 == p2);
+    auto* mm = p1.get_main_module();
+    bool has_pointwise =
+        std::any_of(mm->begin(), mm->end(), [&](const auto& i) { return i.name() == "pointwise"; });
+    EXPECT(has_pointwise);
 }
 int main(int argc, const char* argv[])