Codegen hipRTC compilation (#1579)

* updating codegen build for MIOpen access: adding .cmake for codegen component * updating CMake * adding in header guards for some headers due to issues with hiprtc compilation in MIOpen * some more header guards * putting env file in header guard * cleaning up some includes * updated types file for hiprtc purposes * fixed types file: bit-wise/memcpy issue * updating multiple utility files to deal with standard header inclusion for hiprtc * added some more header guards in the utility files, replacing some standard header functionality * added some more header guards * fixing some conflicts in utility files, another round of header guards * fixing errors in data type file * resolved conflict errors in a few utility files * added header guards/replicated functionality in device files * resolved issues with standard headers in device files: device_base and device_grouped_conv_fwd_multiple_abd * resolved issues with standard headers in device files: device_base.hpp, device_grouped_conv_fwd_multiple_abd.hpp, device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp * added header guards for gridwise gemm files: gridwise_gemm_multiple_abd_xdl_cshuffle.hpp and gridwise_gemm_multiple_d_xdl_cshuffle.hpp * fixed issue with numerics header, removed from transform_conv_fwd_to_gemm and added to device_column_to_image_impl, device_grouped_conv_fwd_multiple_abd_xdl_cshuffle, device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3, device_image_to_column_impl * replaced standard header usage and added header guards in block to ctile map and gridwise_gemm_pipeline_selector * resolved errors in device_gemm_xdl_splitk_c_shuffle files in regards to replacement of standard headers in previous commit * added replicated functionality for standard header methods in utility files * replaced standard header functionality in threadwise tensor slice transfer files and added header guards in element_wise_operation.hpp * temp fix for namespace error in MIOpen * remove standard header usage in codegen device op * removed standard header usage in elementwise files, resolved namespace errors * formatting fix * changed codegen argument to ON for testing * temporarily removing codegen compiler flag for testing purposes * added codegen flag again, set default to ON * set codegen flag default back to OFF * replaced enable_if_t standard header usage in data_type.hpp * added some debug prints to pinpoint issues in MIOpen * added print outs to debug in MIOpen * removed debug print outs from device op * resolved stdexcept include error * formatting fix * adding includes to new fp8 file to resolve ck::enable_if_t errors * made changes to amd_wave_read_first_lane * updated functionality in type utility file * fixed end of file issue * resovled errors in type utility file, added functionality to array utility file * fixed standard header usage replication in data_type file, resolves error with failing examples on navi3x * formatting fix * replaced standard header usage in amd_ck_fp8 file * added include to random_gen file * removed and replicated standard header usage from data_type and type_convert files for fp8 changes * replicated standard unsigned integer types in random_gen * resolved comments from review: put calls to reinterpret_cast for size_t in header guards * updated/added copyright headers * removed duplicate header * fixed typo in header guard * updated copyright headers --------- Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>

Codegen hipRTC compilation (#1579)
* updating codegen build for MIOpen access: adding .cmake for codegen component * updating CMake * adding in header guards for some headers due to issues with hiprtc compilation in MIOpen * some more header guards * putting env file in header guard * cleaning up some includes * updated types file for hiprtc purposes * fixed types file: bit-wise/memcpy issue * updating multiple utility files to deal with standard header inclusion for hiprtc * added some more header guards in the utility files, replacing some standard header functionality * added some more header guards * fixing some conflicts in utility files, another round of header guards * fixing errors in data type file * resolved conflict errors in a few utility files * added header guards/replicated functionality in device files * resolved issues with standard headers in device files: device_base and device_grouped_conv_fwd_multiple_abd * resolved issues with standard headers in device files: device_base.hpp, device_grouped_conv_fwd_multiple_abd.hpp, device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp * added header guards for gridwise gemm files: gridwise_gemm_multiple_abd_xdl_cshuffle.hpp and gridwise_gemm_multiple_d_xdl_cshuffle.hpp * fixed issue with numerics header, removed from transform_conv_fwd_to_gemm and added to device_column_to_image_impl, device_grouped_conv_fwd_multiple_abd_xdl_cshuffle, device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3, device_image_to_column_impl * replaced standard header usage and added header guards in block to ctile map and gridwise_gemm_pipeline_selector * resolved errors in device_gemm_xdl_splitk_c_shuffle files in regards to replacement of standard headers in previous commit * added replicated functionality for standard header methods in utility files * replaced standard header functionality in threadwise tensor slice transfer files and added header guards in element_wise_operation.hpp * temp fix for namespace error in MIOpen * remove standard header usage in codegen device op * removed standard header usage in elementwise files, resolved namespace errors * formatting fix * changed codegen argument to ON for testing * temporarily removing codegen compiler flag for testing purposes * added codegen flag again, set default to ON * set codegen flag default back to OFF * replaced enable_if_t standard header usage in data_type.hpp * added some debug prints to pinpoint issues in MIOpen * added print outs to debug in MIOpen * removed debug print outs from device op * resolved stdexcept include error * formatting fix * adding includes to new fp8 file to resolve ck::enable_if_t errors * made changes to amd_wave_read_first_lane * updated functionality in type utility file * fixed end of file issue * resovled errors in type utility file, added functionality to array utility file * fixed standard header usage replication in data_type file, resolves error with failing examples on navi3x * formatting fix * replaced standard header usage in amd_ck_fp8 file * added include to random_gen file * removed and replicated standard header usage from data_type and type_convert files for fp8 changes * replicated standard unsigned integer types in random_gen * resolved comments from review: put calls to reinterpret_cast for size_t in header guards * updated/added copyright headers * removed duplicate header * fixed typo in header guard * updated copyright headers --------- Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
2e3183af · arai713 · GitHub · 2ab8bf4c · 2e3183af · 2e3183af
Unverified Commit 2e3183af authored Jan 31, 2025 by arai713 Committed by GitHub Jan 31, 2025
5 changed files
--- a/include/ck/utility/statically_indexed_array_multi_index.hpp
+++ b/include/ck/utility/statically_indexed_array_multi_index.hpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.

 #ifndef CK_STATICALLY_INDEXED_ARRAY_MULTI_INDEX_HPP
 #define CK_STATICALLY_INDEXED_ARRAY_MULTI_INDEX_HPP
@@ -35,10 +35,9 @@ __host__ __device__ constexpr auto to_multi_index(const T& x)
 // is the alias of the latter. This is because compiler cannot infer the NSize if
 // using MultiIndex<NSize>
 // TODO: how to fix this?
-template <
-    typename... Ys,
-    typename X,
-    enable_if_t<!std::is_integral<X>::value && !std::is_floating_point<X>::value, bool> = false>
+template <typename... Ys,
+          typename X,
+          enable_if_t<!ck::is_integral<X>::value && !ck::is_floating_point<X>::value, bool> = false>
 __host__ __device__ constexpr auto operator+=(Tuple<Ys...>& y, const X& x)
 {
    static_assert(X::Size() == sizeof...(Ys), "wrong! size not the same");
@@ -47,10 +46,9 @@ __host__ __device__ constexpr auto operator+=(Tuple<Ys...>& y, const X& x)
    return y;
 }

-template <
-    typename... Ys,
-    typename X,
-    enable_if_t<!std::is_integral<X>::value && !std::is_floating_point<X>::value, bool> = false>
+template <typename... Ys,
+          typename X,
+          enable_if_t<!ck::is_integral<X>::value && !ck::is_floating_point<X>::value, bool> = false>
 __host__ __device__ constexpr auto operator-=(Tuple<Ys...>& y, const X& x)
 {
    static_assert(X::Size() == sizeof...(Ys), "wrong! size not the same");
@@ -59,10 +57,9 @@ __host__ __device__ constexpr auto operator-=(Tuple<Ys...>& y, const X& x)
    return y;
 }

-template <
-    typename... Xs,
-    typename Y,
-    enable_if_t<!std::is_integral<Y>::value && !std::is_floating_point<Y>::value, bool> = false>
+template <typename... Xs,
+          typename Y,
+          enable_if_t<!ck::is_integral<Y>::value && !ck::is_floating_point<Y>::value, bool> = false>
 __host__ __device__ constexpr auto operator+(const Tuple<Xs...>& x, const Y& y)
 {
    static_assert(Y::Size() == sizeof...(Xs), "wrong! size not the same");
@@ -73,10 +70,9 @@ __host__ __device__ constexpr auto operator+(const Tuple<Xs...>& x, const Y& y)
    return r;
 }

-template <
-    typename... Xs,
-    typename Y,
-    enable_if_t<!std::is_integral<Y>::value && !std::is_floating_point<Y>::value, bool> = false>
+template <typename... Xs,
+          typename Y,
+          enable_if_t<!ck::is_integral<Y>::value && !ck::is_floating_point<Y>::value, bool> = false>
 __host__ __device__ constexpr auto operator-(const Tuple<Xs...>& x, const Y& y)
 {
    static_assert(Y::Size() == sizeof...(Xs), "wrong! size not the same");
@@ -87,10 +83,9 @@ __host__ __device__ constexpr auto operator-(const Tuple<Xs...>& x, const Y& y)
    return r;
 }

-template <
-    typename... Xs,
-    typename Y,
-    enable_if_t<!std::is_integral<Y>::value && !std::is_floating_point<Y>::value, bool> = false>
+template <typename... Xs,
+          typename Y,
+          enable_if_t<!ck::is_integral<Y>::value && !ck::is_floating_point<Y>::value, bool> = false>
 __host__ __device__ constexpr auto operator*(const Tuple<Xs...>& x, const Y& y)
 {
    static_assert(Y::Size() == sizeof...(Xs), "wrong! size not the same");
@@ -104,7 +99,7 @@ __host__ __device__ constexpr auto operator*(const Tuple<Xs...>& x, const Y& y)
 // MultiIndex = scalar * MultiIndex
 template <typename... Xs,
          typename Y,
-          enable_if_t<std::is_integral<Y>::value || std::is_floating_point<Y>::value, bool> = false>
+          enable_if_t<ck::is_integral<Y>::value || ck::is_floating_point<Y>::value, bool> = false>
 __host__ __device__ constexpr auto operator*(Y a, const Tuple<Xs...>& x)
 {
    constexpr index_t NSize = sizeof...(Xs);
@@ -117,7 +112,7 @@ __host__ __device__ constexpr auto operator*(Y a, const Tuple<Xs...>& x)
 // MultiIndex = MultiIndex * scalar
 template <typename... Xs,
          typename Y,
-          enable_if_t<std::is_integral<Y>::value || std::is_floating_point<Y>::value, bool> = false>
+          enable_if_t<ck::is_integral<Y>::value || ck::is_floating_point<Y>::value, bool> = false>
 __host__ __device__ constexpr auto operator*(const Tuple<Xs...>& x, Y a)
 {
    return a * x;

--- a/include/ck/utility/tuple.hpp
+++ b/include/ck/utility/tuple.hpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.

 #pragma once

@@ -32,7 +32,7 @@ struct TupleElementKeyData
    template <typename T,
              typename enable_if<!is_same<remove_cvref_t<T>, TupleElementKeyData>::value,
                                 bool>::type = false>
-    __host__ __device__ constexpr TupleElementKeyData(T&& v) : mData(std::forward<T>(v))
+    __host__ __device__ constexpr TupleElementKeyData(T&& v) : mData(ck::forward<T>(v))
    {
    }

@@ -67,7 +67,7 @@ get_tuple_element_data_reference(TupleElementKeyData<Key, Data>&& x)
 template <typename Key, typename Data>
 __host__ __device__ constexpr Data get_tuple_element_data(const TupleElementKeyData<Key, Data>& x)
 {
-    return std::forward(x.mData);
+    return ck::forward(x.mData);
 }

 template <typename Indices, typename... Xs>
@@ -83,13 +83,13 @@ struct TupleImpl<Sequence<Is...>, Xs...> : TupleElementKeyData<TupleElementKey<I
                                     !is_same<remove_cvref_t<Y>, TupleImpl>::value,
                                 bool>::type = false>
    __host__ __device__ constexpr TupleImpl(Y&& y)
-        : TupleElementKeyData<TupleElementKey<Is>, Xs>(std::forward<Y>(y))...
+        : TupleElementKeyData<TupleElementKey<Is>, Xs>(ck::forward<Y>(y))...
    {
    }

    template <typename... Ys, typename enable_if<sizeof...(Ys) >= 2, bool>::type = false>
    __host__ __device__ constexpr TupleImpl(Ys&&... ys)
-        : TupleElementKeyData<TupleElementKey<Is>, Xs>(std::forward<Ys>(ys))...
+        : TupleElementKeyData<TupleElementKey<Is>, Xs>(ck::forward<Ys>(ys))...
    {
        static_assert(sizeof...(Is) == sizeof...(Xs) && sizeof...(Is) == sizeof...(Ys),
                      "wrong! inconsistent size");
@@ -123,14 +123,14 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
    template <typename Y,
              typename enable_if<sizeof...(Xs) == 1 && !is_same<remove_cvref_t<Y>, Tuple>::value,
                                 bool>::type = false>
-    __host__ __device__ constexpr Tuple(Y&& y) : base(std::forward<Y>(y))
+    __host__ __device__ constexpr Tuple(Y&& y) : base(ck::forward<Y>(y))
    {
    }

    template <typename... Ys,
              typename enable_if<sizeof...(Ys) == sizeof...(Xs) && sizeof...(Ys) >= 2, bool>::type =
                  false>
-    __host__ __device__ constexpr Tuple(Ys&&... ys) : base(std::forward<Ys>(ys)...)
+    __host__ __device__ constexpr Tuple(Ys&&... ys) : base(ck::forward<Ys>(ys)...)
    {
    }

@@ -210,7 +210,7 @@ using tuple_element_t = typename tuple_element<I, TTuple>::type;
 template <typename... Xs>
 __host__ __device__ constexpr auto make_tuple(Xs&&... xs)
 {
-    return Tuple<remove_cvref_t<Xs>...>(std::forward<Xs>(xs)...);
+    return Tuple<remove_cvref_t<Xs>...>(ck::forward<Xs>(xs)...);
 }

 // https://en.cppreference.com/w/cpp/utility/tuple/tie

--- a/include/ck/utility/tuple_helper.hpp
+++ b/include/ck/utility/tuple_helper.hpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.

 #pragma once

 #include "functional4.hpp"
 #include "tuple.hpp"
+#ifndef CK_CODE_GEN_RTC
 #include "is_detected.hpp"
+#endif

 namespace ck {

@@ -29,7 +31,7 @@ __host__ __device__ constexpr auto concat_tuple_of_reference(const Tuple<X&...>&
                                                             const Tuple<Y&...>& ty)
 {
    return unpack2(
-        [&](auto&&... zs) { return Tuple<decltype(zs)...>{std::forward<decltype(zs)>(zs)...}; },
+        [&](auto&&... zs) { return Tuple<decltype(zs)...>{ck::forward<decltype(zs)>(zs)...}; },
        tx,
        ty);
 }
@@ -38,7 +40,7 @@ template <typename... X, typename... Y>
 __host__ __device__ constexpr auto concat_tuple(const Tuple<X...>& tx, const Tuple<Y...>& ty)
 {
    return unpack2(
-        [&](auto... zs) { return Tuple<decltype(zs)...>{std::forward<decltype(zs)>(zs)...}; },
+        [&](auto... zs) { return Tuple<decltype(zs)...>{ck::forward<decltype(zs)>(zs)...}; },
        tx,
        ty);
 }
@@ -157,13 +159,17 @@ __host__ __device__ constexpr auto TupleReduce(F&& f, const Tuple<Ts...>& tuple)
    }
 }

+#ifndef CK_CODE_GEN_RTC
 template <typename T>
-using is_tuple = decltype(std::declval<T&>().IsTuple());
+using is_tuple = decltype(ck::declval<T&>().IsTuple());
+#endif

 template <typename... Ts>
 __host__ __device__ constexpr auto IsNestedTuple(const Tuple<Ts...>&)
 {
+#ifndef CK_CODE_GEN_RTC
    return (is_detected<is_tuple, Ts>::value || ...);
+#endif
 }

 template <index_t depth = 0, typename T>

--- a/include/ck/utility/type.hpp
+++ b/include/ck/utility/type.hpp
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
-
-#pragma once
-
-#include "ck/ck.hpp"
-#include "ck/utility/integral_constant.hpp"
-#include "ck/utility/enable_if.hpp"
-
-namespace ck {
-
-template <typename X, typename Y>
-struct is_same : public integral_constant<bool, false>
-{
-};
-
-template <typename X>
-struct is_same<X, X> : public integral_constant<bool, true>
-{
-};
-
-template <typename X, typename Y>
-inline constexpr bool is_same_v = is_same<X, Y>::value;
-
-template <typename T>
-using remove_reference_t = typename std::remove_reference<T>::type;
-
-template <typename T>
-using remove_cv_t = typename std::remove_cv<T>::type;
-
-template <typename T>
-using remove_cvref_t = remove_cv_t<std::remove_reference_t<T>>;
-
-template <typename T>
-using remove_pointer_t = typename std::remove_pointer<T>::type;
-
-template <typename T>
-inline constexpr bool is_pointer_v = std::is_pointer<T>::value;
-
-template <typename Y, typename X, typename enable_if<sizeof(X) == sizeof(Y), bool>::type = false>
-__host__ __device__ constexpr Y bit_cast(const X& x)
-{
-    static_assert(__has_builtin(__builtin_bit_cast), "");
-    static_assert(sizeof(X) == sizeof(Y), "Do not support cast between different size of type");
-
-    return __builtin_bit_cast(Y, x);
-}
-
-} // namespace ck
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
+
+#pragma once
+
+#include "ck/ck.hpp"
+#include "ck/utility/enable_if.hpp"
+#include "ck/utility/integral_constant.hpp"
+
+namespace ck {
+#ifdef CK_CODE_GEN_RTC
+// NOLINTNEXTLINE
+#define CK_BUILTIN_TYPE_TRAIT1(name)         \
+    template <class T>                       \
+    struct name : bool_constant<__##name(T)> \
+    {                                        \
+    }
+
+// NOLINTNEXTLINE
+#define CK_BUILTIN_TYPE_TRAIT2(name)            \
+    template <class T, class U>                 \
+    struct name : bool_constant<__##name(T, U)> \
+    {                                           \
+    }
+
+// NOLINTNEXTLINE
+#define CK_BUILTIN_TYPE_TRAITN(name)             \
+    template <class... Ts>                       \
+    struct name : bool_constant<__##name(Ts...)> \
+    {                                            \
+    }
+
+CK_BUILTIN_TYPE_TRAIT1(is_class);
+CK_BUILTIN_TYPE_TRAIT1(is_pointer);
+CK_BUILTIN_TYPE_TRAIT1(is_reference);
+CK_BUILTIN_TYPE_TRAIT1(is_trivially_copyable);
+CK_BUILTIN_TYPE_TRAIT1(is_unsigned);
+CK_BUILTIN_TYPE_TRAIT2(is_base_of);
+
+template <class T>
+struct remove_cv
+{
+    using type = T;
+};
+
+template <class T>
+struct remove_cv<const T> : remove_cv<T>
+{
+};
+
+template <class T>
+struct remove_cv<volatile T> : remove_cv<T>
+{
+};
+
+template <class T>
+struct remove_reference
+{
+    typedef T type;
+};
+template <class T>
+struct remove_reference<T&>
+{
+    typedef T type;
+};
+template <class T>
+struct remove_reference<T&&>
+{
+    typedef T type;
+};
+template <class T>
+struct remove_pointer
+{
+    typedef T type;
+};
+template <class T>
+struct remove_pointer<T*>
+{
+    typedef T type;
+};
+template <class T>
+struct remove_pointer<T* const>
+{
+    typedef T type;
+};
+template <class T>
+struct remove_pointer<T* volatile>
+{
+    typedef T type;
+};
+template <class T>
+struct remove_pointer<T* const volatile>
+{
+    typedef T type;
+};
+
+template <typename T>
+constexpr T&& forward(typename remove_reference<T>::type& t_) noexcept
+{
+    return static_cast<T&&>(t_);
+}
+template <typename T>
+constexpr T&& forward(typename remove_reference<T>::type&& t_) noexcept
+{
+    return static_cast<T&&>(t_);
+}
+
+template <class T>
+struct is_const : public integral_constant<bool, false>
+{
+};
+template <class T>
+struct is_const<const T> : public integral_constant<bool, true>
+{
+};
+template <class T>
+inline constexpr bool is_const_v = is_const<T>::value;
+
+template <typename T>
+inline constexpr bool is_reference_v = is_reference<T>::value;
+
+template <class T>
+struct remove_const
+{
+    typedef T type;
+};
+template <class T>
+struct remove_const<const T>
+{
+    typedef T type;
+};
+template <class T>
+using remove_const_t = typename remove_const<T>::type;
+template <class T>
+inline constexpr bool is_class_v = is_class<T>::value;
+
+template <class T>
+inline constexpr bool is_trivially_copyable_v = is_trivially_copyable<T>::value;
+// template <typename T>
+// T&& declval() noexcept;
+
+template <class T, class U = T&&>
+U private_declval(int);
+
+template <class T>
+T private_declval(long);
+
+template <class T>
+auto declval() noexcept -> decltype(private_declval<T>(0));
+
+template <class...>
+using void_t = void;
+#else
+#include <utility>
+#include <type_traits>
+using std::declval;
+using std::forward;
+using std::is_base_of;
+using std::is_class;
+using std::is_class_v;
+using std::is_const_v;
+using std::is_pointer;
+using std::is_reference;
+using std::is_reference_v;
+using std::is_trivially_copyable;
+using std::is_trivially_copyable_v;
+using std::is_unsigned;
+using std::remove_const_t;
+using std::remove_cv;
+using std::remove_pointer;
+using std::remove_reference;
+using std::void_t;
+#endif
+
+template <typename X, typename Y>
+struct is_same : public integral_constant<bool, false>
+{
+};
+
+template <typename X>
+struct is_same<X, X> : public integral_constant<bool, true>
+{
+};
+
+template <typename X>
+struct is_floating_point : public integral_constant<bool, false>
+{
+};
+
+template <>
+struct is_floating_point<float> : public integral_constant<bool, true>
+{
+};
+
+template <>
+struct is_floating_point<double> : public integral_constant<bool, true>
+{
+};
+template <>
+struct is_floating_point<long double> : public integral_constant<bool, true>
+{
+};
+
+template <typename X>
+struct is_integral : public integral_constant<bool, false>
+{
+};
+
+template <>
+struct is_integral<int> : public integral_constant<bool, true>
+{
+};
+
+template <>
+struct is_integral<unsigned int> : public integral_constant<bool, true>
+{
+};
+
+template <>
+struct is_integral<long> : public integral_constant<bool, true>
+{
+};
+
+template <>
+struct is_integral<unsigned long> : public integral_constant<bool, true>
+{
+};
+
+template <>
+struct is_integral<short> : public integral_constant<bool, true>
+{
+};
+template <>
+struct is_integral<unsigned short> : public integral_constant<bool, true>
+{
+};
+
+template <>
+struct is_integral<long long> : public integral_constant<bool, true>
+{
+};
+
+template <>
+struct is_integral<unsigned long long> : public integral_constant<bool, true>
+{
+};
+
+template <>
+struct is_integral<char> : public integral_constant<bool, true>
+{
+};
+
+template <>
+struct is_integral<signed char> : public integral_constant<bool, true>
+{
+};
+
+template <>
+struct is_integral<unsigned char> : public integral_constant<bool, true>
+{
+};
+
+template <>
+struct is_integral<wchar_t> : public integral_constant<bool, true>
+{
+};
+template <>
+struct is_integral<char16_t> : public integral_constant<bool, true>
+{
+};
+
+template <>
+struct is_integral<char32_t> : public integral_constant<bool, true>
+{
+};
+
+template <>
+struct is_integral<bool> : public integral_constant<bool, true>
+{
+};
+
+template <typename X, typename Y>
+inline constexpr bool is_same_v = is_same<X, Y>::value;
+
+template <typename X, typename Y>
+inline constexpr bool is_base_of_v = is_base_of<X, Y>::value;
+
+template <typename T>
+inline constexpr bool is_unsigned_v = is_unsigned<T>::value;
+
+template <typename T>
+using remove_reference_t = typename remove_reference<T>::type;
+
+template <typename T>
+using remove_reference_t = typename remove_reference<T>::type;
+
+template <typename T>
+using remove_cv_t = typename remove_cv<T>::type;
+template <typename T>
+using remove_cvref_t = remove_cv_t<remove_reference_t<T>>;
+
+template <typename T>
+using remove_pointer_t = typename remove_pointer<T>::type;
+
+template <typename T>
+inline constexpr bool is_pointer_v = is_pointer<T>::value;
+
+template <typename Y, typename X, typename enable_if<sizeof(X) == sizeof(Y), bool>::type = false>
+__host__ __device__ constexpr Y bit_cast(const X& x)
+{
+    static_assert(__has_builtin(__builtin_bit_cast), "");
+    static_assert(sizeof(X) == sizeof(Y), "Do not support cast between different size of type");
+
+    return __builtin_bit_cast(Y, x);
+}
+} // namespace ck
--- a/include/ck/utility/type_convert.hpp
+++ b/include/ck/utility/type_convert.hpp
@@ -52,10 +52,10 @@ inline __host__ __device__ constexpr bhalf_t bf16_convert_rtn<bhalf_t, half_t>(h
 // Convert X to Y, both X and Y are non-const data types.
 template <typename Y,
          typename X,
-          std::enable_if_t<!(std::is_const_v<Y> || std::is_const_v<X>), bool> = false>
+          ck::enable_if_t<!(ck::is_const_v<Y> || ck::is_const_v<X>), bool> = false>
 __host__ __device__ constexpr Y type_convert(X x)
 {
-    static_assert(!std::is_reference_v<Y> && !std::is_reference_v<X>);
+    static_assert(!ck::is_reference_v<Y> && !ck::is_reference_v<X>);

    return static_cast<Y>(x);
 }
@@ -63,13 +63,13 @@ __host__ __device__ constexpr Y type_convert(X x)
 // Convert X to Y, either X or Y is a const data type.
 template <typename Y,
          typename X,
-          std::enable_if_t<std::is_const_v<Y> || std::is_const_v<X>, bool> = false>
+          ck::enable_if_t<ck::is_const_v<Y> || ck::is_const_v<X>, bool> = false>
 __host__ __device__ constexpr Y type_convert(X x)
 {
-    static_assert(!std::is_reference_v<Y> && !std::is_reference_v<X>);
+    static_assert(!ck::is_reference_v<Y> && !ck::is_reference_v<X>);

-    using NonConstY = std::remove_const_t<Y>;
-    using NonConstX = std::remove_const_t<X>;
+    using NonConstY = ck::remove_const_t<Y>;
+    using NonConstX = ck::remove_const_t<X>;
    return static_cast<Y>(type_convert<NonConstY, NonConstX>(x));
 }

@@ -149,7 +149,7 @@ inline __host__ __device__ constexpr bf8_ocp_t type_convert<bf8_ocp_t, int>(int
 template <typename Y, typename X>
 __host__ __device__ constexpr Y type_convert_sp(X x)
 {
-    static_assert(!std::is_reference_v<Y> && !std::is_reference_v<X>);
+    static_assert(!ck::is_reference_v<Y> && !ck::is_reference_v<X>);

    return static_cast<Y>(x);
 }
@@ -211,7 +211,11 @@ template <>
 inline __host__ __device__ f8_fnuz_t f8_convert_sr<f8_fnuz_t, float>(float x)
 {
    constexpr int seed = 1254739;
-    uint32_t rng       = prand_generator<float, seed>(reinterpret_cast<uintptr_t>(&x), x);
+#ifndef CK_CODE_GEN_RTC
+    uint32_t rng = prand_generator<float, seed>(reinterpret_cast<uintptr_t>(&x), x);
+#else
+    uint32_t rng = prand_generator<float, seed>(reinterpret_cast<size_t>(&x), x);
+#endif
 #if defined(__gfx94__)
    union
    {
@@ -251,7 +255,12 @@ inline __host__ __device__ f8_fnuz_t f8_convert_sr<f8_fnuz_t, half_t>(half_t x)
    constexpr bool clip              = true;
    constexpr f8_rounding_mode rm    = f8_rounding_mode::stochastic;
    constexpr int seed               = 1254739;
+
+#ifndef CK_CODE_GEN_RTC
    uint32_t rng = prand_generator<half_t, seed>(reinterpret_cast<uintptr_t>(&x), x);
+#else
+    uint32_t rng = prand_generator<half_t, seed>(reinterpret_cast<size_t>(&x), x);
+#endif
    return utils::cast_to_f8<half_t,
                             f8_fnuz_t,
                             negative_zero_nan,
@@ -265,7 +274,11 @@ template <>
 inline __host__ __device__ bf8_fnuz_t f8_convert_sr<bf8_fnuz_t, float>(float x)
 {
    constexpr int seed = 1254739;
-    uint32_t rng       = prand_generator<float, seed>(reinterpret_cast<uintptr_t>(&x), x);
+#ifndef CK_CODE_GEN_RTC
+    uint32_t rng = prand_generator<float, seed>(reinterpret_cast<uintptr_t>(&x), x);
+#else
+    uint32_t rng = prand_generator<float, seed>(reinterpret_cast<size_t>(&x), x);
+#endif
 #if defined(__gfx94__)
    union
    {
@@ -307,7 +320,12 @@ inline __host__ __device__ bf8_fnuz_t f8_convert_sr<bf8_fnuz_t, half_t>(half_t x
    constexpr bool clip              = true;
    constexpr f8_rounding_mode rm    = f8_rounding_mode::stochastic;
    constexpr int seed               = 1254739;
+
+#ifndef CK_CODE_GEN_RTC
    uint32_t rng = prand_generator<half_t, seed>(reinterpret_cast<uintptr_t>(&x), x);
+#else
+    uint32_t rng = prand_generator<half_t, seed>(reinterpret_cast<size_t>(&x), x);
+#endif
    return utils::cast_to_f8<half_t,
                             bf8_fnuz_t,
                             negative_zero_nan,
@@ -629,20 +647,22 @@ inline __host__ __device__ half_t type_convert<half_t, bf8_fnuz_t>(bf8_fnuz_t x)
 #endif
 }

-template <typename Y, typename X, std::size_t NumElems>
+#ifndef CK_CODE_GEN_RTC
+template <typename Y, typename X, size_t NumElems>
 inline __host__ __device__ void array_convert(std::array<Y, NumElems>& y,
                                              const std::array<X, NumElems>& x)
 {
-    for(std::size_t i = 0; i < NumElems; i++)
+    for(size_t i = 0; i < NumElems; i++)
    {
        y[i] = type_convert<Y>(x[i]);
    }
 }
+#endif

 template <typename Y, typename X, index_t NumElems>
 inline __host__ __device__ void array_convert(Array<Y, NumElems>& y, const Array<X, NumElems>& x)
 {
-    for(std::size_t i = 0; i < NumElems; i++)
+    for(size_t i = 0; i < NumElems; i++)
    {
        y[i] = type_convert<Y>(x[i]);
    }