Commit 4b0b327b authored by Umang Yadav's avatar Umang Yadav
Browse files

merge migx-jit-lib-hiprtc branch

parent ba251e4a
...@@ -4,11 +4,26 @@ ...@@ -4,11 +4,26 @@
#pragma once #pragma once
namespace ck { namespace ck {
#ifdef __HIPCC_RTC__
template <bool B, class T = void>
struct enable_if
{
};
template <class T>
struct enable_if<true, T>
{
using type = T;
};
template <bool B, class T = void>
using enable_if_t = typename enable_if<B, T>::type;
#else
template <bool B, typename T = void> template <bool B, typename T = void>
using enable_if = std::enable_if<B, T>; using enable_if = std::enable_if<B, T>;
template <bool B, typename T = void> template <bool B, typename T = void>
using enable_if_t = typename std::enable_if<B, T>::type; using enable_if_t = typename std::enable_if<B, T>::type;
#endif
} // namespace ck } // namespace ck
...@@ -44,7 +44,7 @@ __host__ __device__ f8_t run_cast_to_f8(T x, uint32_t rng) ...@@ -44,7 +44,7 @@ __host__ __device__ f8_t run_cast_to_f8(T x, uint32_t rng)
constexpr uint32_t nan_mask = is_half ? 0x7C00 : 0x7F800000; constexpr uint32_t nan_mask = is_half ? 0x7C00 : 0x7F800000;
// convert to bitwise // convert to bitwise
typedef typename std::conditional<std::is_same<T, half_t>::value, uint16_t, uint32_t>::type typedef typename ck::conditional<std::is_same<T, half_t>::value, uint16_t, uint32_t>::type
T_bitwise; T_bitwise;
T_bitwise x_bitwise = *(reinterpret_cast<T_bitwise*>(&x)); T_bitwise x_bitwise = *(reinterpret_cast<T_bitwise*>(&x));
...@@ -180,7 +180,7 @@ __host__ __device__ T run_cast_from_f8(f8_t x) ...@@ -180,7 +180,7 @@ __host__ __device__ T run_cast_from_f8(f8_t x)
constexpr int exp_low_cutoff = constexpr int exp_low_cutoff =
(1 << (type_exp - 1)) - (1 << (f8_exp - 1)) + 1 - (negative_zero_nan ? 1 : 0); (1 << (type_exp - 1)) - (1 << (f8_exp - 1)) + 1 - (negative_zero_nan ? 1 : 0);
typename std::conditional<std::is_same<T, half_t>::value, uint16_t, uint32_t>::type retval; typename ck::conditional<std::is_same<T, half_t>::value, uint16_t, uint32_t>::type retval;
if constexpr(negative_zero_nan) if constexpr(negative_zero_nan)
{ {
......
...@@ -120,11 +120,11 @@ constexpr auto conditional_expr(X&& x, Y&& y) ...@@ -120,11 +120,11 @@ constexpr auto conditional_expr(X&& x, Y&& y)
{ {
if constexpr(predicate) if constexpr(predicate)
{ {
return std::forward<X>(x); return ck::forward<X>(x);
} }
else else
{ {
return std::forward<Y>(y); return ck::forward<Y>(y);
} }
} }
......
...@@ -21,7 +21,7 @@ struct unpack_impl<Sequence<Is...>> ...@@ -21,7 +21,7 @@ struct unpack_impl<Sequence<Is...>>
template <typename F, typename X> template <typename F, typename X>
__host__ __device__ constexpr auto operator()(F&& f, X&& x) const __host__ __device__ constexpr auto operator()(F&& f, X&& x) const
{ {
return std::forward<F>(f)(std::forward<X>(x).At(Number<Is>{})...); return ck::forward<F>(f)(ck::forward<X>(x).At(Number<Is>{})...);
} }
}; };
...@@ -35,8 +35,8 @@ struct unpack2_impl<Sequence<Is...>, Sequence<Js...>> ...@@ -35,8 +35,8 @@ struct unpack2_impl<Sequence<Is...>, Sequence<Js...>>
template <typename F, typename X, typename Y> template <typename F, typename X, typename Y>
__host__ __device__ constexpr auto operator()(F&& f, X&& x, Y&& y) const __host__ __device__ constexpr auto operator()(F&& f, X&& x, Y&& y) const
{ {
return std::forward<F>(f)(std::forward<X>(x).At(Number<Is>{})..., return ck::forward<F>(f)(ck::forward<X>(x).At(Number<Is>{})...,
std::forward<Y>(y).At(Number<Js>{})...); ck::forward<Y>(y).At(Number<Js>{})...);
} }
}; };
...@@ -47,7 +47,7 @@ __host__ __device__ constexpr auto unpack(F&& f, X&& x) ...@@ -47,7 +47,7 @@ __host__ __device__ constexpr auto unpack(F&& f, X&& x)
{ {
using X_ = remove_reference_t<X>; using X_ = remove_reference_t<X>;
return detail::unpack_impl<typename arithmetic_sequence_gen<0, X_::Size(), 1>::type>{}( return detail::unpack_impl<typename arithmetic_sequence_gen<0, X_::Size(), 1>::type>{}(
std::forward<F>(f), std::forward<X>(x)); ck::forward<F>(f), ck::forward<X>(x));
} }
// TODO: properly implement unpack that takes any number of containers // TODO: properly implement unpack that takes any number of containers
...@@ -58,7 +58,7 @@ __host__ __device__ constexpr auto unpack2(F&& f, X&& x, Y&& y) ...@@ -58,7 +58,7 @@ __host__ __device__ constexpr auto unpack2(F&& f, X&& x, Y&& y)
using Y_ = remove_reference_t<Y>; using Y_ = remove_reference_t<Y>;
return detail::unpack2_impl<typename arithmetic_sequence_gen<0, X_::Size(), 1>::type, return detail::unpack2_impl<typename arithmetic_sequence_gen<0, X_::Size(), 1>::type,
typename arithmetic_sequence_gen<0, Y_::Size(), 1>::type>{}( typename arithmetic_sequence_gen<0, Y_::Size(), 1>::type>{}(
std::forward<F>(f), std::forward<X>(x), std::forward<Y>(y)); ck::forward<F>(f), ck::forward<X>(x), ck::forward<Y>(y));
} }
} // namespace ck } // namespace ck
......
...@@ -48,4 +48,9 @@ __host__ __device__ constexpr auto operator%(integral_constant<TX, X>, integral_ ...@@ -48,4 +48,9 @@ __host__ __device__ constexpr auto operator%(integral_constant<TX, X>, integral_
return integral_constant<decltype(X % Y), X % Y>{}; return integral_constant<decltype(X % Y), X % Y>{};
} }
template <bool B>
using bool_constant = integral_constant<bool, B>;
using true_type = bool_constant<true>;
using false_type = bool_constant<false>;
} // namespace ck } // namespace ck
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#include "type.hpp" #include "type.hpp"
#include "tuple.hpp" #include "tuple.hpp"
#define INT32_MAX 2147483647
namespace ck { namespace ck {
// magic number division // magic number division
......
...@@ -168,9 +168,11 @@ __device__ double exp<double>(double x) ...@@ -168,9 +168,11 @@ __device__ double exp<double>(double x)
return exp(x); return exp(x);
} }
#ifndef __HIPCC_RTC__
static inline __host__ float exp(float x) { return ::expf(x); } static inline __host__ float exp(float x) { return ::expf(x); }
static inline __host__ double exp(double x) { return std::exp(x); } static inline __host__ double exp(double x) { return std::exp(x); }
#endif
// greatest common divisor, aka highest common factor // greatest common divisor, aka highest common factor
__host__ __device__ constexpr index_t gcd(index_t x, index_t y) __host__ __device__ constexpr index_t gcd(index_t x, index_t y)
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
namespace ck { namespace ck {
namespace math { namespace math {
#ifndef __HIPCC_RTC__
// math functions for the host, some are implemented by calling C++ std functions // math functions for the host, some are implemented by calling C++ std functions
static inline __host__ float abs(float x) { return std::abs(x); }; static inline __host__ float abs(float x) { return std::abs(x); };
...@@ -100,7 +101,7 @@ static inline __host__ half_t tanh(half_t x) ...@@ -100,7 +101,7 @@ static inline __host__ half_t tanh(half_t x)
static inline __host__ float tanh(float x) { return std::tanh(x); }; static inline __host__ float tanh(float x) { return std::tanh(x); };
static inline __host__ double tanh(double x) { return std::tanh(x); }; static inline __host__ double tanh(double x) { return std::tanh(x); };
#endif
// math functions for the HIP kernel, some are implemented by calling hip builtin functions // math functions for the HIP kernel, some are implemented by calling hip builtin functions
static inline __device__ float abs(float x) { return ::abs(x); }; static inline __device__ float abs(float x) { return ::abs(x); };
......
...@@ -2,12 +2,13 @@ ...@@ -2,12 +2,13 @@
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include <ck/utility/ignore.hpp>
namespace ck { namespace ck {
// Pseudo random number generator // Pseudo random number generator
// version for fp32 // version for fp32
template <typename T, uint32_t seed_t, std::enable_if_t<std::is_same<float, T>{}, bool> = false> template <typename T, uint32_t seed_t, ck::enable_if_t<std::is_same<float, T>{}, bool> = false>
__host__ __device__ uint32_t prand_generator(index_t id, T val, uint32_t seed = seed_t) __host__ __device__ uint32_t prand_generator(index_t id, T val, uint32_t seed = seed_t)
{ {
uint32_t x = *(reinterpret_cast<uint32_t*>(&val)); uint32_t x = *(reinterpret_cast<uint32_t*>(&val));
...@@ -23,7 +24,7 @@ __host__ __device__ uint32_t prand_generator(index_t id, T val, uint32_t seed = ...@@ -23,7 +24,7 @@ __host__ __device__ uint32_t prand_generator(index_t id, T val, uint32_t seed =
} }
// version for fp16 // version for fp16
template <typename T, uint32_t seed_t, std::enable_if_t<std::is_same<half_t, T>{}, bool> = false> template <typename T, uint32_t seed_t, ck::enable_if_t<std::is_same<half_t, T>{}, bool> = false>
__host__ __device__ uint32_t prand_generator(index_t id, T val, uint32_t seed = seed_t) __host__ __device__ uint32_t prand_generator(index_t id, T val, uint32_t seed = seed_t)
{ {
uint16_t x = *(reinterpret_cast<uint16_t*>(&val)); uint16_t x = *(reinterpret_cast<uint16_t*>(&val));
...@@ -40,12 +41,12 @@ __host__ __device__ uint32_t prand_generator(index_t id, T val, uint32_t seed = ...@@ -40,12 +41,12 @@ __host__ __device__ uint32_t prand_generator(index_t id, T val, uint32_t seed =
// return 0 if data is not fp16 or fp32 // return 0 if data is not fp16 or fp32
template <typename T, template <typename T,
uint32_t seed_t, uint32_t seed_t,
std::enable_if_t<!(std::is_same<float, T>{} || std::is_same<half_t, T>{}), bool> = false> ck::enable_if_t<!(std::is_same<float, T>{} || std::is_same<half_t, T>{}), bool> = false>
__host__ __device__ uint32_t prand_generator(int id, T val, uint32_t seed = seed_t) __host__ __device__ uint32_t prand_generator(int id, T val, uint32_t seed = seed_t)
{ {
std::ignore = id; ck::ignore = id;
std::ignore = val; ck::ignore = val;
std::ignore = seed; ck::ignore = seed;
return 0; return 0;
} }
......
...@@ -32,7 +32,7 @@ struct TupleElementKeyData ...@@ -32,7 +32,7 @@ struct TupleElementKeyData
template <typename T, template <typename T,
typename enable_if<!is_same<remove_cvref_t<T>, TupleElementKeyData>::value, typename enable_if<!is_same<remove_cvref_t<T>, TupleElementKeyData>::value,
bool>::type = false> bool>::type = false>
__host__ __device__ constexpr TupleElementKeyData(T&& v) : mData(std::forward<T>(v)) __host__ __device__ constexpr TupleElementKeyData(T&& v) : mData(ck::forward<T>(v))
{ {
} }
...@@ -67,7 +67,7 @@ get_tuple_element_data_reference(TupleElementKeyData<Key, Data>&& x) ...@@ -67,7 +67,7 @@ get_tuple_element_data_reference(TupleElementKeyData<Key, Data>&& x)
template <typename Key, typename Data> template <typename Key, typename Data>
__host__ __device__ constexpr Data get_tuple_element_data(const TupleElementKeyData<Key, Data>& x) __host__ __device__ constexpr Data get_tuple_element_data(const TupleElementKeyData<Key, Data>& x)
{ {
return std::forward(x.mData); return ck::forward(x.mData);
} }
template <typename Indices, typename... Xs> template <typename Indices, typename... Xs>
...@@ -83,13 +83,13 @@ struct TupleImpl<Sequence<Is...>, Xs...> : TupleElementKeyData<TupleElementKey<I ...@@ -83,13 +83,13 @@ struct TupleImpl<Sequence<Is...>, Xs...> : TupleElementKeyData<TupleElementKey<I
!is_same<remove_cvref_t<Y>, TupleImpl>::value, !is_same<remove_cvref_t<Y>, TupleImpl>::value,
bool>::type = false> bool>::type = false>
__host__ __device__ constexpr TupleImpl(Y&& y) __host__ __device__ constexpr TupleImpl(Y&& y)
: TupleElementKeyData<TupleElementKey<Is>, Xs>(std::forward<Y>(y))... : TupleElementKeyData<TupleElementKey<Is>, Xs>(ck::forward<Y>(y))...
{ {
} }
template <typename... Ys, typename enable_if<sizeof...(Ys) >= 2, bool>::type = false> template <typename... Ys, typename enable_if<sizeof...(Ys) >= 2, bool>::type = false>
__host__ __device__ constexpr TupleImpl(Ys&&... ys) __host__ __device__ constexpr TupleImpl(Ys&&... ys)
: TupleElementKeyData<TupleElementKey<Is>, Xs>(std::forward<Ys>(ys))... : TupleElementKeyData<TupleElementKey<Is>, Xs>(ck::forward<Ys>(ys))...
{ {
static_assert(sizeof...(Is) == sizeof...(Xs) && sizeof...(Is) == sizeof...(Ys), static_assert(sizeof...(Is) == sizeof...(Xs) && sizeof...(Is) == sizeof...(Ys),
"wrong! inconsistent size"); "wrong! inconsistent size");
...@@ -123,14 +123,14 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X ...@@ -123,14 +123,14 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
template <typename Y, template <typename Y,
typename enable_if<sizeof...(Xs) == 1 && !is_same<remove_cvref_t<Y>, Tuple>::value, typename enable_if<sizeof...(Xs) == 1 && !is_same<remove_cvref_t<Y>, Tuple>::value,
bool>::type = false> bool>::type = false>
__host__ __device__ constexpr Tuple(Y&& y) : base(std::forward<Y>(y)) __host__ __device__ constexpr Tuple(Y&& y) : base(ck::forward<Y>(y))
{ {
} }
template <typename... Ys, template <typename... Ys,
typename enable_if<sizeof...(Ys) == sizeof...(Xs) && sizeof...(Ys) >= 2, bool>::type = typename enable_if<sizeof...(Ys) == sizeof...(Xs) && sizeof...(Ys) >= 2, bool>::type =
false> false>
__host__ __device__ constexpr Tuple(Ys&&... ys) : base(std::forward<Ys>(ys)...) __host__ __device__ constexpr Tuple(Ys&&... ys) : base(ck::forward<Ys>(ys)...)
{ {
} }
...@@ -208,7 +208,7 @@ using tuple_element_t = typename tuple_element<I, TTuple>::type; ...@@ -208,7 +208,7 @@ using tuple_element_t = typename tuple_element<I, TTuple>::type;
template <typename... Xs> template <typename... Xs>
__host__ __device__ constexpr auto make_tuple(Xs&&... xs) __host__ __device__ constexpr auto make_tuple(Xs&&... xs)
{ {
return Tuple<remove_cvref_t<Xs>...>(std::forward<Xs>(xs)...); return Tuple<remove_cvref_t<Xs>...>(ck::forward<Xs>(xs)...);
} }
// https://en.cppreference.com/w/cpp/utility/tuple/tie // https://en.cppreference.com/w/cpp/utility/tuple/tie
......
...@@ -28,7 +28,7 @@ __host__ __device__ constexpr auto concat_tuple_of_reference(const Tuple<X&...>& ...@@ -28,7 +28,7 @@ __host__ __device__ constexpr auto concat_tuple_of_reference(const Tuple<X&...>&
const Tuple<Y&...>& ty) const Tuple<Y&...>& ty)
{ {
return unpack2( return unpack2(
[&](auto&&... zs) { return Tuple<decltype(zs)...>{std::forward<decltype(zs)>(zs)...}; }, [&](auto&&... zs) { return Tuple<decltype(zs)...>{ck::forward<decltype(zs)>(zs)...}; },
tx, tx,
ty); ty);
} }
......
...@@ -4,10 +4,122 @@ ...@@ -4,10 +4,122 @@
#pragma once #pragma once
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/utility/integral_constant.hpp"
#include "ck/utility/enable_if.hpp" #include "ck/utility/enable_if.hpp"
#include "ck/utility/integral_constant.hpp"
namespace ck { namespace ck {
#ifdef __HIPCC_RTC__
// NOLINTNEXTLINE
#define CK_BUILTIN_TYPE_TRAIT1(name) \
template <class T> \
struct name : bool_constant<__##name(T)> \
{ \
}
// NOLINTNEXTLINE
#define CK_BUILTIN_TYPE_TRAIT2(name) \
template <class T, class U> \
struct name : bool_constant<__##name(T, U)> \
{ \
}
// NOLINTNEXTLINE
#define CK_BUILTIN_TYPE_TRAITN(name) \
template <class... Ts> \
struct name : bool_constant<__##name(Ts...)> \
{ \
}
CK_BUILTIN_TYPE_TRAIT1(is_class);
CK_BUILTIN_TYPE_TRAIT1(is_pointer);
CK_BUILTIN_TYPE_TRAIT1(is_reference);
CK_BUILTIN_TYPE_TRAIT1(is_trivially_copyable);
CK_BUILTIN_TYPE_TRAIT1(is_unsigned);
CK_BUILTIN_TYPE_TRAIT2(is_base_of);
template <class T>
struct remove_cv
{
using type = T;
};
template <class T>
struct remove_cv<const T> : remove_cv<T>
{
};
template <class T>
struct remove_cv<volatile T> : remove_cv<T>
{
};
template <class T>
struct remove_reference
{
typedef T type;
};
template <class T>
struct remove_reference<T&>
{
typedef T type;
};
template <class T>
struct remove_reference<T&&>
{
typedef T type;
};
template <class T>
struct remove_pointer
{
typedef T type;
};
template <class T>
struct remove_pointer<T*>
{
typedef T type;
};
template <class T>
struct remove_pointer<T* const>
{
typedef T type;
};
template <class T>
struct remove_pointer<T* volatile>
{
typedef T type;
};
template <class T>
struct remove_pointer<T* const volatile>
{
typedef T type;
};
template <typename T>
constexpr T&& forward(typename remove_reference<T>::type& t_) noexcept
{
return static_cast<T&&>(t_);
}
template <typename T>
constexpr T&& forward(typename remove_reference<T>::type&& t_) noexcept
{
return static_cast<T&&>(t_);
}
#else
#include <utility>
#include <type_traits>
using std::forward;
using std::is_base_of;
using std::is_class;
using std::is_pointer;
using std::is_reference;
using std::is_trivially_copyable;
using std::is_unsigned;
using std::remove_cv;
using std::remove_pointer;
using std::remove_reference;
#endif
template <typename X, typename Y> template <typename X, typename Y>
struct is_same : public integral_constant<bool, false> struct is_same : public integral_constant<bool, false>
...@@ -19,25 +131,39 @@ struct is_same<X, X> : public integral_constant<bool, true> ...@@ -19,25 +131,39 @@ struct is_same<X, X> : public integral_constant<bool, true>
{ {
}; };
template <typename T>
inline constexpr bool is_reference_v = is_reference<T>::value;
template <typename X, typename Y> template <typename X, typename Y>
inline constexpr bool is_same_v = is_same<X, Y>::value; inline constexpr bool is_same_v = is_same<X, Y>::value;
template <typename X, typename Y>
inline constexpr bool is_base_of_v = is_base_of<X, Y>::value;
template <typename T>
inline constexpr bool is_unsigned_v = is_unsigned<T>::value;
template <typename T>
using remove_reference_t = typename remove_reference<T>::type;
template <typename T> template <typename T>
using remove_reference_t = typename std::remove_reference<T>::type; using remove_reference_t = typename remove_reference<T>::type;
template <typename T> template <typename T>
using remove_cv_t = typename std::remove_cv<T>::type; using remove_cv_t = typename remove_cv<T>::type;
template <typename T> template <typename T>
using remove_cvref_t = remove_cv_t<std::remove_reference_t<T>>; using remove_cvref_t = remove_cv_t<remove_reference_t<T>>;
template <typename T> template <typename T>
using remove_pointer_t = typename std::remove_pointer<T>::type; using remove_pointer_t = typename remove_pointer<T>::type;
template <typename T> template <typename T>
inline constexpr bool is_pointer_v = std::is_pointer<T>::value; inline constexpr bool is_pointer_v = is_pointer<T>::value;
template <typename Y, typename X, typename enable_if<sizeof(X) == sizeof(Y), bool>::type = false> template <typename Y,
typename X,
typename ck::enable_if<sizeof(X) == sizeof(Y), bool>::type = false>
__host__ __device__ constexpr Y bit_cast(const X& x) __host__ __device__ constexpr Y bit_cast(const X& x)
{ {
#if CK_EXPERIMENTAL_USE_MEMCPY_FOR_BIT_CAST #if CK_EXPERIMENTAL_USE_MEMCPY_FOR_BIT_CAST
......
...@@ -13,7 +13,7 @@ namespace ck { ...@@ -13,7 +13,7 @@ namespace ck {
template <typename Y, typename X> template <typename Y, typename X>
__host__ __device__ constexpr Y type_convert(X x) __host__ __device__ constexpr Y type_convert(X x)
{ {
static_assert(!std::is_reference_v<Y> && !std::is_reference_v<X>); static_assert(!ck::is_reference_v<Y> && !ck::is_reference_v<X>);
return static_cast<Y>(x); return static_cast<Y>(x);
} }
...@@ -190,7 +190,7 @@ inline __host__ __device__ f8_t f8_convert_sr<f8_t, float>(float x) ...@@ -190,7 +190,7 @@ inline __host__ __device__ f8_t f8_convert_sr<f8_t, float>(float x)
constexpr f8_rounding_mode rm = f8_rounding_mode::stochastic; constexpr f8_rounding_mode rm = f8_rounding_mode::stochastic;
constexpr int seed = 42; constexpr int seed = 42;
// as thread id is not available on host, use 0 for prn generation // as thread id is not available on host, use 0 for prn generation
uint32_t rng = prand_generator<float, seed>(reinterpret_cast<uintptr_t>(&x), x); uint32_t rng = prand_generator<float, seed>(reinterpret_cast<size_t>(&x), x);
return utils::cast_to_f8<float, negative_zero_nan, clip, (rm == f8_rounding_mode::stochastic)>( return utils::cast_to_f8<float, negative_zero_nan, clip, (rm == f8_rounding_mode::stochastic)>(
x, rng); x, rng);
} }
...@@ -204,7 +204,7 @@ inline __host__ __device__ f8_t f8_convert_sr<f8_t, half_t>(half_t x) ...@@ -204,7 +204,7 @@ inline __host__ __device__ f8_t f8_convert_sr<f8_t, half_t>(half_t x)
constexpr f8_rounding_mode rm = f8_rounding_mode::stochastic; constexpr f8_rounding_mode rm = f8_rounding_mode::stochastic;
constexpr int seed = 42; constexpr int seed = 42;
// as thread id is not available on host, use 0 for prn generation // as thread id is not available on host, use 0 for prn generation
uint32_t rng = prand_generator<half_t, seed>(reinterpret_cast<uintptr_t>(&x), x); uint32_t rng = prand_generator<half_t, seed>(reinterpret_cast<size_t>(&x), x);
return utils::cast_to_f8<half_t, negative_zero_nan, clip, (rm == f8_rounding_mode::stochastic)>( return utils::cast_to_f8<half_t, negative_zero_nan, clip, (rm == f8_rounding_mode::stochastic)>(
x, rng); x, rng);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment