#pragma once #include "constant_integral.hip.hpp" #include "Sequence.hip.hpp" #include "Array.hip.hpp" #include "functional.hip.hpp" __device__ unsigned get_thread_local_1d_id() { return threadIdx.x; } __device__ unsigned get_block_1d_id() { return blockIdx.x; } template struct is_same { static const bool value = false; }; template struct is_same { static const bool value = true; }; template struct vector_type { }; template <> struct vector_type { using VectorType = float; }; template <> struct vector_type { using VectorType = float2; }; template <> struct vector_type { using VectorType = float4; }; #if 0 template <> struct vector_type { using VectorType = half_float::half; }; template <> struct vector_type { using VectorType = float; }; template <> struct vector_type { using VectorType = float2; }; template <> struct vector_type { using VectorType = float4; }; #endif #if 1 template <> struct vector_type { using VectorType = half; __host__ __device__ static VectorType pack(half s) { return s; } }; template <> struct vector_type { using VectorType = half2; union Data { VectorType vector; half scalar[2]; }; __host__ __device__ static VectorType pack(half s0, half s1) { Data data; data.scalar[0] = s0; data.scalar[1] = s1; return data.vector; } }; template <> struct vector_type { using VectorType = float2; }; template <> struct vector_type { using VectorType = float4; }; #endif template __host__ __device__ constexpr T max(T a, T b) { return a > b ? a : b; } template __host__ __device__ constexpr T min(T a, T b) { return a < b ? a : b; } __host__ __device__ constexpr unsigned integer_divide_ceil(unsigned a, unsigned b) { return (a + b - 1) / b; }