#pragma once __device__ unsigned get_thread_local_1d_id() { return threadIdx.x; } __device__ unsigned get_block_1d_id() { return blockIdx.x; } template struct is_same { static const bool value = false; }; template struct is_same { static const bool value = true; }; template struct vector_type { }; template <> struct vector_type { using type = float; }; template <> struct vector_type { using type = float2; }; template <> struct vector_type { using type = float4; }; #if 0 template <> struct vector_type { using type = half_float::half; }; template <> struct vector_type { using type = float; }; template <> struct vector_type { using type = float2; }; template <> struct vector_type { using type = float4; }; #endif #if 0 template <> struct vector_type { using type = half; }; template <> struct vector_type { using type = half2; }; template <> struct vector_type { using type = float2; }; template <> struct vector_type { using type = float4; }; #endif template struct integral_constant { static const T value = N; __host__ __device__ constexpr T Get() const { return value; } }; template using Number = integral_constant; template struct Sequence { using Type = Sequence; static constexpr unsigned nDim = sizeof...(Is); const unsigned mData[nDim] = {Is...}; template __host__ __device__ constexpr unsigned Get(Number) const { return mData[I]; } template __host__ __device__ constexpr auto ReorderByGetNewFromOld(Sequence) const { constexpr auto old_sequence = Type{}; constexpr unsigned NR0 = old_sequence.mData[I0]; constexpr unsigned NR1 = old_sequence.mData[I1]; constexpr unsigned NR2 = old_sequence.mData[I2]; constexpr unsigned NR3 = old_sequence.mData[I3]; return Sequence{}; } template __host__ __device__ constexpr auto ReorderByPutOldToNew(Sequence) const { // don't know how to implement this printf("Sequence::ReorderByPutOldToNew not implemented"); assert(false); } }; #if DEVICE_BACKEND_CUDA template __host__ __device__ constexpr T max(T a, T b) { return a > b ? a : b; } template __host__ __device__ constexpr T min(T a, T b) { return a < b ? a : b; } #endif __host__ __device__ constexpr unsigned integer_divide_ceil(unsigned a, unsigned b) { return (a + b - 1) / b; }