Commit b05d9fab authored by Astha Rai's avatar Astha Rai
Browse files

added some more header guards in the utility files, replacing some standard header functionality

parent 24961297
......@@ -7,10 +7,12 @@
#include "ck/utility/functional2.hpp"
#include "ck/utility/math.hpp"
#ifndef CK_CODE_GEN_RTC
#include <array>
#include <cstddef>
#include <cstdint>
#include <type_traits>
#endif
namespace ck {
namespace detail {
......@@ -37,7 +39,7 @@ struct get_carrier<3>
{
using value_type = uint32_t;
std::array<std::byte, 3> bytes;
ck::byte bytes[3];
static_assert(sizeof(bytes) <= sizeof(value_type));
// replacement of host std::copy_n()
......@@ -59,24 +61,21 @@ struct get_carrier<3>
}
// method to trigger template substitution failure
__device__ carrier(const carrier& other) noexcept
{
copy_n(other.bytes.begin(), bytes.size(), bytes.begin());
}
__device__ carrier(const carrier& other) noexcept { copy_n(&other.bytes[0], 3, &bytes[0]); }
public:
__device__ carrier& operator=(value_type value) noexcept
{
copy_n(reinterpret_cast<const std::byte*>(&value), bytes.size(), bytes.begin());
copy_n(reinterpret_cast<const ck::byte*>(&value), 3, &bytes[0]);
return *this;
}
__device__ operator value_type() const noexcept
{
std::byte result[sizeof(value_type)];
ck::byte result[sizeof(value_type)];
copy_n(bytes.begin(), bytes.size(), result);
copy_n(&bytes[0], 3, result);
return *reinterpret_cast<const value_type*>(result);
}
......@@ -109,8 +108,8 @@ __device__ inline int64_t amd_wave_read_first_lane(int64_t value)
{
constexpr unsigned object_size = sizeof(int64_t);
constexpr unsigned second_part_offset = object_size / 2;
auto* const from_obj = reinterpret_cast<const std::byte*>(&value);
alignas(int64_t) std::byte to_obj[object_size];
auto* const from_obj = reinterpret_cast<const ck::byte*>(&value);
alignas(int64_t) ck::byte to_obj[object_size];
using Sgpr = uint32_t;
......@@ -122,17 +121,17 @@ __device__ inline int64_t amd_wave_read_first_lane(int64_t value)
return *reinterpret_cast<int64_t*>(to_obj);
}
template <
typename Object,
typename = std::enable_if_t<std::is_class_v<Object> && std::is_trivially_copyable_v<Object>>>
template <typename Object,
typename = ck::enable_if_t<ck::is_class<Object>::value &&
ck::is_trivially_copyable<Object>::value>>
__device__ auto amd_wave_read_first_lane(const Object& obj)
{
using Size = unsigned;
constexpr Size SgprSize = 4;
constexpr Size ObjectSize = sizeof(Object);
auto* const from_obj = reinterpret_cast<const std::byte*>(&obj);
alignas(Object) std::byte to_obj[ObjectSize];
auto* const from_obj = reinterpret_cast<const ck::byte*>(&obj);
alignas(Object) ck::byte to_obj[ObjectSize];
constexpr Size RemainedSize = ObjectSize % SgprSize;
constexpr Size CompleteSgprCopyBoundary = ObjectSize - RemainedSize;
......
......@@ -4,9 +4,21 @@
#pragma once
#include "ck/utility/statically_indexed_array.hpp"
#ifdef CK_CODE_GEN_RTC
using int8_t = signed char;
using uint8_t = unsigned char;
using int16_t = signed short;
using uint16_t = unsigned short;
using float_t = float;
#endif
namespace ck {
#ifdef __HIPCC_RTC__
using byte = unsigned char;
#else
using std::byte;
#endif
using bhalf_t = ushort;
using half_t = _Float16;
using int4_t = _BitInt(4);
......@@ -1060,6 +1072,9 @@ using uint8x16_t = typename vector_type<uint8_t, 16>::type;
using uint8x32_t = typename vector_type<uint8_t, 32>::type;
using uint8x64_t = typename vector_type<uint8_t, 64>::type;
template <typename T>
struct NumericLimits;
template <typename T>
struct NumericLimits
{
......@@ -1077,6 +1092,95 @@ struct NumericLimits
__host__ __device__ static constexpr T Infinity() { return std::numeric_limits<T>::infinity(); }
};
template <>
struct NumericLimits<int32_t>
{
__host__ __device__ static constexpr int32_t Lowest() noexcept { return -2147483647 - 1; }
__host__ __device__ static constexpr int32_t Min() noexcept { return -2147483647 - 1; }
__host__ __device__ static constexpr int32_t Max() noexcept { return 2147483647; }
__host__ __device__ static constexpr int32_t Infinity() noexcept { return 0; }
__host__ __device__ static constexpr int32_t QuietNaN() { return 0; }
};
template <>
struct NumericLimits<int16_t>
{
__host__ __device__ static constexpr int16_t Lowest() noexcept { return -32768; }
__host__ __device__ static constexpr int16_t Min() noexcept { return -32768; }
__host__ __device__ static constexpr int16_t Max() noexcept { return 32767; }
__host__ __device__ static constexpr int16_t Infinity() noexcept { return 0; }
__host__ __device__ static constexpr int16_t QuietNaN() { return 0; }
};
template <>
struct NumericLimits<int8_t>
{
__host__ __device__ static constexpr int8_t Lowest() noexcept { return -128; }
__host__ __device__ static constexpr int8_t Min() noexcept { return -128; }
__host__ __device__ static constexpr int8_t Max() noexcept { return 127; }
__host__ __device__ static constexpr int8_t Infinity() noexcept { return 0; }
__host__ __device__ static constexpr int8_t QuietNaN() { return 0; }
};
template <>
struct NumericLimits<uint32_t>
{
__host__ __device__ static constexpr uint32_t Lowest() noexcept { return 0; }
__host__ __device__ static constexpr uint32_t Min() noexcept { return 0; }
__host__ __device__ static constexpr uint32_t Max() noexcept { return 4294967295U; }
__host__ __device__ static constexpr uint32_t Infinity() noexcept { return 0; }
__host__ __device__ static constexpr uint32_t QuietNaN() { return 0; }
};
template <>
struct NumericLimits<uint16_t>
{
__host__ __device__ static constexpr uint16_t Lowest() noexcept { return 0; }
__host__ __device__ static constexpr uint16_t Min() noexcept { return 0; }
__host__ __device__ static constexpr uint16_t Max() noexcept { return 65535U; }
__host__ __device__ static constexpr uint16_t Infinity() noexcept { return 0; }
__host__ __device__ static constexpr uint16_t QuietNaN() { return 0; }
};
template <>
struct NumericLimits<float>
{
static constexpr unsigned int binary_min = 0x00800000;
static constexpr unsigned int binary_max = 0x7F7FFFFF;
static constexpr unsigned int binary_lowest = 0xFF7FFFFF;
static constexpr unsigned int binary_qnan = 0xFFC00001;
static constexpr unsigned int binary_inf = 0x7F8000000;
__host__ __device__ static constexpr float Min() { return bit_cast<float>(binary_min); }
__host__ __device__ static constexpr float Max() { return bit_cast<float>(binary_max); }
__host__ __device__ static constexpr float Lowest() { return bit_cast<float>(binary_lowest); }
__host__ __device__ static constexpr float QuietNaN() { return bit_cast<float>(binary_qnan); }
__host__ __device__ static constexpr float Infinity() { return bit_cast<float>(binary_inf); }
};
template <>
struct NumericLimits<half_t>
{
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_CODE_GEN_RTC
#include <ostream>
#endif
#pragma once
......@@ -26,6 +29,7 @@ constexpr LoopScheduler make_default_loop_scheduler()
} // namespace ck
#ifndef CK_CODE_GEN_RTC
inline std::ostream& operator<<(std::ostream& os, const ck::LoopScheduler& s)
{
switch(s)
......@@ -36,3 +40,4 @@ inline std::ostream& operator<<(std::ostream& os, const ck::LoopScheduler& s)
}
return os;
}
#endif
......@@ -3,7 +3,9 @@
#pragma once
#ifndef CK_CODE_GEN_RTC
#include <ostream>
#endif
#include "ck/utility/integral_constant.hpp"
#include "ck/utility/type.hpp"
......@@ -900,6 +902,7 @@ using uniform_sequence_gen_t = typename uniform_sequence_gen<NSize, I>::type;
} // namespace ck
#ifndef CK_CODE_GEN_RTC
template <ck::index_t... Is>
std::ostream& operator<<(std::ostream& os, const ck::Sequence<Is...>)
{
......@@ -910,3 +913,4 @@ std::ostream& operator<<(std::ostream& os, const ck::Sequence<Is...>)
os << S::At(S::Size() - ck::Number<1>{}).value << "}";
return os;
}
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment