added some more header guards in the utility files, replacing some standard header functionality

b05d9fab · Astha Rai · 24961297 · b05d9fab · b05d9fab · b05d9fab
Commit b05d9fab authored Sep 25, 2024 by Astha Rai
4 changed files
--- a/include/ck/utility/amd_wave_read_first_lane.hpp
+++ b/include/ck/utility/amd_wave_read_first_lane.hpp
@@ -7,10 +7,12 @@
 #include "ck/utility/functional2.hpp"
 #include "ck/utility/math.hpp"

+#ifndef CK_CODE_GEN_RTC
 #include <array>
 #include <cstddef>
 #include <cstdint>
 #include <type_traits>
+#endif

 namespace ck {
 namespace detail {
@@ -37,7 +39,7 @@ struct get_carrier<3>
    {
        using value_type = uint32_t;

-        std::array<std::byte, 3> bytes;
+        ck::byte bytes[3];
        static_assert(sizeof(bytes) <= sizeof(value_type));

        // replacement of host std::copy_n()
@@ -59,24 +61,21 @@ struct get_carrier<3>
        }

        // method to trigger template substitution failure
-        __device__ carrier(const carrier& other) noexcept
-        {
-            copy_n(other.bytes.begin(), bytes.size(), bytes.begin());
-        }
+        __device__ carrier(const carrier& other) noexcept { copy_n(&other.bytes[0], 3, &bytes[0]); }

        public:
        __device__ carrier& operator=(value_type value) noexcept
        {
-            copy_n(reinterpret_cast<const std::byte*>(&value), bytes.size(), bytes.begin());
+            copy_n(reinterpret_cast<const ck::byte*>(&value), 3, &bytes[0]);

            return *this;
        }

        __device__ operator value_type() const noexcept
        {
-            std::byte result[sizeof(value_type)];
+            ck::byte result[sizeof(value_type)];

-            copy_n(bytes.begin(), bytes.size(), result);
+            copy_n(&bytes[0], 3, result);

            return *reinterpret_cast<const value_type*>(result);
        }
@@ -109,8 +108,8 @@ __device__ inline int64_t amd_wave_read_first_lane(int64_t value)
 {
    constexpr unsigned object_size        = sizeof(int64_t);
    constexpr unsigned second_part_offset = object_size / 2;
-    auto* const from_obj                  = reinterpret_cast<const std::byte*>(&value);
-    alignas(int64_t) std::byte to_obj[object_size];
+    auto* const from_obj                  = reinterpret_cast<const ck::byte*>(&value);
+    alignas(int64_t) ck::byte to_obj[object_size];

    using Sgpr = uint32_t;

@@ -122,17 +121,17 @@ __device__ inline int64_t amd_wave_read_first_lane(int64_t value)
    return *reinterpret_cast<int64_t*>(to_obj);
 }

-template <
-    typename Object,
-    typename = std::enable_if_t<std::is_class_v<Object> && std::is_trivially_copyable_v<Object>>>
+template <typename Object,
+          typename = ck::enable_if_t<ck::is_class<Object>::value &&
+                                     ck::is_trivially_copyable<Object>::value>>
 __device__ auto amd_wave_read_first_lane(const Object& obj)
 {
    using Size                = unsigned;
    constexpr Size SgprSize   = 4;
    constexpr Size ObjectSize = sizeof(Object);

-    auto* const from_obj = reinterpret_cast<const std::byte*>(&obj);
-    alignas(Object) std::byte to_obj[ObjectSize];
+    auto* const from_obj = reinterpret_cast<const ck::byte*>(&obj);
+    alignas(Object) ck::byte to_obj[ObjectSize];

    constexpr Size RemainedSize             = ObjectSize % SgprSize;
    constexpr Size CompleteSgprCopyBoundary = ObjectSize - RemainedSize;

--- a/include/ck/utility/data_type.hpp
+++ b/include/ck/utility/data_type.hpp
@@ -4,9 +4,21 @@
 #pragma once

 #include "ck/utility/statically_indexed_array.hpp"
-
+#ifdef CK_CODE_GEN_RTC
+using int8_t   = signed char;
+using uint8_t  = unsigned char;
+using int16_t  = signed short;
+using uint16_t = unsigned short;
+using float_t  = float;
+#endif
 namespace ck {

+#ifdef __HIPCC_RTC__
+using byte = unsigned char;
+#else
+using std::byte;
+#endif
+
 using bhalf_t = ushort;
 using half_t  = _Float16;
 using int4_t  = _BitInt(4);
@@ -1060,6 +1072,9 @@ using uint8x16_t = typename vector_type<uint8_t, 16>::type;
 using uint8x32_t = typename vector_type<uint8_t, 32>::type;
 using uint8x64_t = typename vector_type<uint8_t, 64>::type;

+template <typename T>
+struct NumericLimits;
+
 template <typename T>
 struct NumericLimits
 {
@@ -1077,6 +1092,95 @@ struct NumericLimits
    __host__ __device__ static constexpr T Infinity() { return std::numeric_limits<T>::infinity(); }
 };

+template <>
+struct NumericLimits<int32_t>
+{
+    __host__ __device__ static constexpr int32_t Lowest() noexcept { return -2147483647 - 1; }
+
+    __host__ __device__ static constexpr int32_t Min() noexcept { return -2147483647 - 1; }
+
+    __host__ __device__ static constexpr int32_t Max() noexcept { return 2147483647; }
+
+    __host__ __device__ static constexpr int32_t Infinity() noexcept { return 0; }
+
+    __host__ __device__ static constexpr int32_t QuietNaN() { return 0; }
+};
+template <>
+struct NumericLimits<int16_t>
+{
+    __host__ __device__ static constexpr int16_t Lowest() noexcept { return -32768; }
+
+    __host__ __device__ static constexpr int16_t Min() noexcept { return -32768; }
+
+    __host__ __device__ static constexpr int16_t Max() noexcept { return 32767; }
+
+    __host__ __device__ static constexpr int16_t Infinity() noexcept { return 0; }
+
+    __host__ __device__ static constexpr int16_t QuietNaN() { return 0; }
+};
+
+template <>
+struct NumericLimits<int8_t>
+{
+    __host__ __device__ static constexpr int8_t Lowest() noexcept { return -128; }
+
+    __host__ __device__ static constexpr int8_t Min() noexcept { return -128; }
+
+    __host__ __device__ static constexpr int8_t Max() noexcept { return 127; }
+
+    __host__ __device__ static constexpr int8_t Infinity() noexcept { return 0; }
+
+    __host__ __device__ static constexpr int8_t QuietNaN() { return 0; }
+};
+
+template <>
+struct NumericLimits<uint32_t>
+{
+    __host__ __device__ static constexpr uint32_t Lowest() noexcept { return 0; }
+
+    __host__ __device__ static constexpr uint32_t Min() noexcept { return 0; }
+
+    __host__ __device__ static constexpr uint32_t Max() noexcept { return 4294967295U; }
+
+    __host__ __device__ static constexpr uint32_t Infinity() noexcept { return 0; }
+
+    __host__ __device__ static constexpr uint32_t QuietNaN() { return 0; }
+};
+
+template <>
+struct NumericLimits<uint16_t>
+{
+    __host__ __device__ static constexpr uint16_t Lowest() noexcept { return 0; }
+
+    __host__ __device__ static constexpr uint16_t Min() noexcept { return 0; }
+
+    __host__ __device__ static constexpr uint16_t Max() noexcept { return 65535U; }
+
+    __host__ __device__ static constexpr uint16_t Infinity() noexcept { return 0; }
+
+    __host__ __device__ static constexpr uint16_t QuietNaN() { return 0; }
+};
+
+template <>
+struct NumericLimits<float>
+{
+    static constexpr unsigned int binary_min    = 0x00800000;
+    static constexpr unsigned int binary_max    = 0x7F7FFFFF;
+    static constexpr unsigned int binary_lowest = 0xFF7FFFFF;
+    static constexpr unsigned int binary_qnan   = 0xFFC00001;
+    static constexpr unsigned int binary_inf    = 0x7F8000000;
+
+    __host__ __device__ static constexpr float Min() { return bit_cast<float>(binary_min); }
+
+    __host__ __device__ static constexpr float Max() { return bit_cast<float>(binary_max); }
+
+    __host__ __device__ static constexpr float Lowest() { return bit_cast<float>(binary_lowest); }
+
+    __host__ __device__ static constexpr float QuietNaN() { return bit_cast<float>(binary_qnan); }
+
+    __host__ __device__ static constexpr float Infinity() { return bit_cast<float>(binary_inf); }
+};
+
 template <>
 struct NumericLimits<half_t>
 {

--- a/include/ck/utility/loop_scheduler.hpp
+++ b/include/ck/utility/loop_scheduler.hpp
 // SPDX-License-Identifier: MIT
 // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
+
+#ifndef CK_CODE_GEN_RTC
 #include <ostream>
+#endif

 #pragma once

@@ -26,6 +29,7 @@ constexpr LoopScheduler make_default_loop_scheduler()

 } // namespace ck

+#ifndef CK_CODE_GEN_RTC
 inline std::ostream& operator<<(std::ostream& os, const ck::LoopScheduler& s)
 {
    switch(s)
@@ -36,3 +40,4 @@ inline std::ostream& operator<<(std::ostream& os, const ck::LoopScheduler& s)
    }
    return os;
 }
+#endif
--- a/include/ck/utility/sequence.hpp
+++ b/include/ck/utility/sequence.hpp
@@ -3,7 +3,9 @@

 #pragma once

+#ifndef CK_CODE_GEN_RTC
 #include <ostream>
+#endif

 #include "ck/utility/integral_constant.hpp"
 #include "ck/utility/type.hpp"
@@ -900,6 +902,7 @@ using uniform_sequence_gen_t = typename uniform_sequence_gen<NSize, I>::type;

 } // namespace ck

+#ifndef CK_CODE_GEN_RTC
 template <ck::index_t... Is>
 std::ostream& operator<<(std::ostream& os, const ck::Sequence<Is...>)
 {
@@ -910,3 +913,4 @@ std::ostream& operator<<(std::ostream& os, const ck::Sequence<Is...>)
    os << S::At(S::Size() - ck::Number<1>{}).value << "}";
    return os;
 }
+#endif