"...composable_kernel_rocm.git" did not exist on "e7dce4d247d2aad9afc7695b29b4c35eaf62b9cc"
Commit f000fe32 authored by Umang Yadav's avatar Umang Yadav
Browse files

remove unnecesssary changes

parent 795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -124,5 +121,3 @@ __device__ float2_t atomic_max<float2_t>(float2_t* p_dst, const float2_t& x) ...@@ -124,5 +121,3 @@ __device__ float2_t atomic_max<float2_t>(float2_t* p_dst, const float2_t& x)
} }
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -29,5 +26,3 @@ __device__ index_t get_grid_size() { return gridDim.x; } ...@@ -29,5 +26,3 @@ __device__ index_t get_grid_size() { return gridDim.x; }
__device__ index_t get_block_size() { return blockDim.x; } __device__ index_t get_block_size() { return blockDim.x; }
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -23,5 +20,3 @@ struct ignore_t ...@@ -23,5 +20,3 @@ struct ignore_t
inline constexpr detail::ignore_t ignore; inline constexpr detail::ignore_t ignore;
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -237,5 +234,3 @@ inner_product<int8x16_t, int8x16_t, int32_t>(const int8x16_t& a, const int8x16_t ...@@ -237,5 +234,3 @@ inner_product<int8x16_t, int8x16_t, int32_t>(const int8x16_t& a, const int8x16_t
} }
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -8,50 +5,47 @@ ...@@ -8,50 +5,47 @@
namespace ck { namespace ck {
template <class T, T v> struct integral_constant { template <class T, T v>
static constexpr T value = v; struct integral_constant
typedef T value_type; {
typedef integral_constant type; static constexpr T value = v;
__host__ __device__ constexpr operator value_type() const noexcept { typedef T value_type;
return value; typedef integral_constant type;
} __host__ __device__ constexpr operator value_type() const noexcept { return value; }
__host__ __device__ constexpr value_type operator()() const noexcept { __host__ __device__ constexpr value_type operator()() const noexcept { return value; }
return value;
}
}; };
template <typename TX, TX X, typename TY, TY Y> template <typename TX, TX X, typename TY, TY Y>
__host__ __device__ constexpr auto operator+(integral_constant<TX, X>, __host__ __device__ constexpr auto operator+(integral_constant<TX, X>, integral_constant<TY, Y>)
integral_constant<TY, Y>) { {
return integral_constant<decltype(X + Y), X + Y>{}; return integral_constant<decltype(X + Y), X + Y>{};
} }
template <typename TX, TX X, typename TY, TY Y> template <typename TX, TX X, typename TY, TY Y>
__host__ __device__ constexpr auto operator-(integral_constant<TX, X>, __host__ __device__ constexpr auto operator-(integral_constant<TX, X>, integral_constant<TY, Y>)
integral_constant<TY, Y>) { {
static_assert(Y <= X, "wrong!"); static_assert(Y <= X, "wrong!");
return integral_constant<decltype(X - Y), X - Y>{}; return integral_constant<decltype(X - Y), X - Y>{};
} }
template <typename TX, TX X, typename TY, TY Y> template <typename TX, TX X, typename TY, TY Y>
__host__ __device__ constexpr auto operator*(integral_constant<TX, X>, __host__ __device__ constexpr auto operator*(integral_constant<TX, X>, integral_constant<TY, Y>)
integral_constant<TY, Y>) { {
return integral_constant<decltype(X * Y), X * Y>{}; return integral_constant<decltype(X * Y), X * Y>{};
} }
template <typename TX, TX X, typename TY, TY Y> template <typename TX, TX X, typename TY, TY Y>
__host__ __device__ constexpr auto operator/(integral_constant<TX, X>, __host__ __device__ constexpr auto operator/(integral_constant<TX, X>, integral_constant<TY, Y>)
integral_constant<TY, Y>) { {
static_assert(Y > 0, "wrong!"); static_assert(Y > 0, "wrong!");
return integral_constant<decltype(X / Y), X / Y>{}; return integral_constant<decltype(X / Y), X / Y>{};
} }
template <typename TX, TX X, typename TY, TY Y> template <typename TX, TX X, typename TY, TY Y>
__host__ __device__ constexpr auto operator%(integral_constant<TX, X>, __host__ __device__ constexpr auto operator%(integral_constant<TX, X>, integral_constant<TY, Y>)
integral_constant<TY, Y>) { {
static_assert(Y > 0, "wrong!"); static_assert(Y > 0, "wrong!");
return integral_constant<decltype(X % Y), X % Y>{}; return integral_constant<decltype(X % Y), X % Y>{};
} }
} // namespace ck
#pragma clang diagnostic pop } // namespace ck
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -57,5 +54,3 @@ struct is_known_at_compile_time<Tuple<Ts...>> ...@@ -57,5 +54,3 @@ struct is_known_at_compile_time<Tuple<Ts...>>
}; };
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -18,135 +15,148 @@ namespace ck { ...@@ -18,135 +15,148 @@ namespace ck {
// magic number division // magic number division
// Caution: // Caution:
// 1. For uint32_t as dividend: magic number division implementation being // 1. For uint32_t as dividend: magic number division implementation being used would produce
// used would produce correct result if the dividend is uint32_t and its value // correct result if the dividend is uint32_t and its value is within 31-bit value range.
// is within 31-bit value range. // 2. For int32_t as dividendd: magic number division for int32_t dividened has not been
// 2. For int32_t as dividendd: magic number division for int32_t dividened // implemented, the int32_t dividend would be bit-wise interpreted as uint32_t and magic number
// has not been implemented, the int32_t dividend would be bit-wise // division implementation for uint32_t is then used. Therefore, dividend value need to be
// interpreted as uint32_t and magic number division implementation for // non-negative.
// uint32_t is then used. Therefore, dividend value need to be non-negative.
// TODO: // TODO:
// 1. Implement magic number divison for int32_t // 1. Implement magic number divison for int32_t
// 2. Implement magic number divison for unit32_t with 32-bit value range // 2. Implement magic number divison for unit32_t with 32-bit value range
struct MagicDivision { struct MagicDivision
// uint32_t {
__host__ __device__ static constexpr auto // uint32_t
CalculateMagicNumbers(uint32_t divisor) { __host__ __device__ static constexpr auto CalculateMagicNumbers(uint32_t divisor)
// WARNING: magic division is only applicable for division inside this {
// range. You should use the return value of CalculateMagicNumbers, if // WARNING: magic division is only applicable for division inside this range.
// division is not inside this range. The "else" logic below is to quiet // You should use the return value of CalculateMagicNumbers, if division is not inside this
// down run-time error. // range. The "else" logic below is to quiet down run-time error.
if (divisor >= 1 && divisor <= INT32_MAX) { if(divisor >= 1 && divisor <= INT32_MAX)
uint32_t shift = 0; {
for (shift = 0; shift < 32; ++shift) { uint32_t shift = 0;
if ((1U << shift) >= divisor) { for(shift = 0; shift < 32; ++shift)
break; {
if((1U << shift) >= divisor)
{
break;
}
}
uint64_t one = 1;
uint64_t multiplier = ((one << 32) * ((one << shift) - divisor)) / divisor + 1;
// assert(multiplier <= 0xffffffffUL);
return make_tuple(uint32_t(multiplier), shift);
} }
} else
{
uint64_t one = 1; return make_tuple(uint32_t(0), uint32_t(0));
uint64_t multiplier = }
((one << 32) * ((one << shift) - divisor)) / divisor + 1; }
// assert(multiplier <= 0xffffffffUL);
__host__ __device__ static constexpr uint32_t CalculateMagicMultiplier(uint32_t divisor)
return make_tuple(uint32_t(multiplier), shift); {
} else { auto tmp = CalculateMagicNumbers(divisor);
return make_tuple(uint32_t(0), uint32_t(0));
} return tmp[Number<0>{}];
} }
__host__ __device__ static constexpr uint32_t __host__ __device__ static constexpr uint32_t CalculateMagicShift(uint32_t divisor)
CalculateMagicMultiplier(uint32_t divisor) { {
auto tmp = CalculateMagicNumbers(divisor); auto tmp = CalculateMagicNumbers(divisor);
return tmp[Number<0>{}]; return tmp[Number<1>{}];
} }
__host__ __device__ static constexpr uint32_t // integral_constant<uint32_t, .>
CalculateMagicShift(uint32_t divisor) { template <uint32_t Divisor>
auto tmp = CalculateMagicNumbers(divisor); __host__ __device__ static constexpr auto
CalculateMagicNumbers(integral_constant<uint32_t, Divisor>)
return tmp[Number<1>{}]; {
} constexpr auto tmp = CalculateMagicNumbers(uint32_t{Divisor});
// integral_constant<uint32_t, .> constexpr uint32_t multiplier = tmp[Number<0>{}];
template <uint32_t Divisor> constexpr uint32_t shift = tmp[Number<1>{}];
__host__ __device__ static constexpr auto
CalculateMagicNumbers(integral_constant<uint32_t, Divisor>) { return make_tuple(integral_constant<uint32_t, multiplier>{},
constexpr auto tmp = CalculateMagicNumbers(uint32_t{Divisor}); integral_constant<uint32_t, shift>{});
}
constexpr uint32_t multiplier = tmp[Number<0>{}];
constexpr uint32_t shift = tmp[Number<1>{}]; template <uint32_t Divisor>
__host__ __device__ static constexpr auto
return make_tuple(integral_constant<uint32_t, multiplier>{}, CalculateMagicMultiplier(integral_constant<uint32_t, Divisor>)
integral_constant<uint32_t, shift>{}); {
} constexpr uint32_t multiplier = CalculateMagicMultiplier(uint32_t{Divisor});
template <uint32_t Divisor> return integral_constant<uint32_t, multiplier>{};
__host__ __device__ static constexpr auto }
CalculateMagicMultiplier(integral_constant<uint32_t, Divisor>) {
constexpr uint32_t multiplier = CalculateMagicMultiplier(uint32_t{Divisor}); template <uint32_t Divisor>
__host__ __device__ static constexpr auto
return integral_constant<uint32_t, multiplier>{}; CalculateMagicShift(integral_constant<uint32_t, Divisor>)
} {
constexpr uint32_t shift = CalculateMagicShift(uint32_t{Divisor});
template <uint32_t Divisor>
__host__ __device__ static constexpr auto return integral_constant<uint32_t, shift>{};
CalculateMagicShift(integral_constant<uint32_t, Divisor>) { }
constexpr uint32_t shift = CalculateMagicShift(uint32_t{Divisor});
// integral_constant<int32_t, .>
return integral_constant<uint32_t, shift>{}; template <int32_t Divisor>
} __host__ __device__ static constexpr auto
CalculateMagicNumbers(integral_constant<int32_t, Divisor>)
// integral_constant<int32_t, .> {
template <int32_t Divisor> return CalculateMagicNumbers(integral_constant<uint32_t, Divisor>{});
__host__ __device__ static constexpr auto }
CalculateMagicNumbers(integral_constant<int32_t, Divisor>) {
return CalculateMagicNumbers(integral_constant<uint32_t, Divisor>{}); template <int32_t Divisor>
} __host__ __device__ static constexpr auto
CalculateMagicMultiplier(integral_constant<int32_t, Divisor>)
template <int32_t Divisor> {
__host__ __device__ static constexpr auto return CalculateMagicMultiplier(integral_constant<uint32_t, Divisor>{});
CalculateMagicMultiplier(integral_constant<int32_t, Divisor>) { }
return CalculateMagicMultiplier(integral_constant<uint32_t, Divisor>{});
} template <int32_t Divisor>
__host__ __device__ static constexpr auto
template <int32_t Divisor> CalculateMagicShift(integral_constant<int32_t, Divisor>)
__host__ __device__ static constexpr auto {
CalculateMagicShift(integral_constant<int32_t, Divisor>) { return CalculateMagicShift(integral_constant<uint32_t, Divisor>{});
return CalculateMagicShift(integral_constant<uint32_t, Divisor>{}); }
}
// magic division for uint32_t
// magic division for uint32_t __device__ static constexpr uint32_t
__device__ static constexpr uint32_t DoMagicDivision(uint32_t dividend, uint32_t multiplier, uint32_t shift)
DoMagicDivision(uint32_t dividend, uint32_t multiplier, uint32_t shift) { {
uint32_t tmp = __umulhi(dividend, multiplier); uint32_t tmp = __umulhi(dividend, multiplier);
return (tmp + dividend) >> shift; return (tmp + dividend) >> shift;
} }
__host__ static constexpr uint32_t __host__ static constexpr uint32_t
DoMagicDivision(uint32_t dividend, uint32_t multiplier, uint32_t shift) { DoMagicDivision(uint32_t dividend, uint32_t multiplier, uint32_t shift)
uint32_t tmp = static_cast<uint64_t>(dividend) * multiplier >> 32; {
return (tmp + dividend) >> shift; uint32_t tmp = static_cast<uint64_t>(dividend) * multiplier >> 32;
} return (tmp + dividend) >> shift;
}
// magic division for int32_t
// HACK: use dividend_i32 as if it's uint32_t, dividend_i32 need to be // magic division for int32_t
// non-negative for result to be correct // HACK: use dividend_i32 as if it's uint32_t, dividend_i32 need to be
// TODO: figure out how to do magic number divison for int32_t as dividended // non-negative for result to be correct
__device__ static constexpr int32_t // TODO: figure out how to do magic number divison for int32_t as dividended
DoMagicDivision(int32_t dividend_i32, uint32_t multiplier, uint32_t shift) { __device__ static constexpr int32_t
uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32); DoMagicDivision(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
uint32_t tmp = __umulhi(dividend_u32, multiplier); {
return (tmp + dividend_u32) >> shift; uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
} uint32_t tmp = __umulhi(dividend_u32, multiplier);
return (tmp + dividend_u32) >> shift;
__host__ static constexpr int32_t }
DoMagicDivision(int32_t dividend_i32, uint32_t multiplier, uint32_t shift) {
uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32); __host__ static constexpr int32_t
uint32_t tmp = static_cast<uint64_t>(dividend_u32) * multiplier >> 32; DoMagicDivision(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
return (tmp + dividend_u32) >> shift; {
} uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
uint32_t tmp = static_cast<uint64_t>(dividend_u32) * multiplier >> 32;
return (tmp + dividend_u32) >> shift;
}
}; };
struct MDiv struct MDiv
...@@ -222,5 +232,3 @@ struct MDiv2 ...@@ -222,5 +232,3 @@ struct MDiv2
}; };
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "enable_if.hpp"
#include "integral_constant.hpp" #include "integral_constant.hpp"
#include "number.hpp" #include "number.hpp"
#include "type.hpp" #include "type.hpp"
#include "enable_if.hpp"
namespace ck { namespace ck {
namespace math { namespace math {
template <typename T, T s> struct scales { template <typename T, T s>
__host__ __device__ constexpr T operator()(T a) const { return s * a; } struct scales
{
__host__ __device__ constexpr T operator()(T a) const { return s * a; }
}; };
template <typename T> struct plus { template <typename T>
__host__ __device__ constexpr T operator()(T a, T b) const { return a + b; } struct plus
{
__host__ __device__ constexpr T operator()(T a, T b) const { return a + b; }
}; };
template <typename T> struct minus { template <typename T>
__host__ __device__ constexpr T operator()(T a, T b) const { return a - b; } struct minus
{
__host__ __device__ constexpr T operator()(T a, T b) const { return a - b; }
}; };
struct multiplies { struct multiplies
template <typename A, typename B> {
__host__ __device__ constexpr auto operator()(const A &a, const B &b) const { template <typename A, typename B>
return a * b; __host__ __device__ constexpr auto operator()(const A& a, const B& b) const
} {
return a * b;
}
}; };
template <typename T> struct maximize { template <typename T>
__host__ __device__ constexpr T operator()(T a, T b) const { struct maximize
return a >= b ? a : b; {
} __host__ __device__ constexpr T operator()(T a, T b) const { return a >= b ? a : b; }
}; };
template <typename T> struct minimize { template <typename T>
__host__ __device__ constexpr T operator()(T a, T b) const { struct minimize
return a <= b ? a : b; {
} __host__ __device__ constexpr T operator()(T a, T b) const { return a <= b ? a : b; }
}; };
template <typename T> struct integer_divide_ceiler { template <typename T>
__host__ __device__ constexpr T operator()(T a, T b) const { struct integer_divide_ceiler
static_assert(is_same<T, index_t>{} || is_same<T, int>{}, "wrong type"); {
__host__ __device__ constexpr T operator()(T a, T b) const
{
static_assert(is_same<T, index_t>{} || is_same<T, int>{}, "wrong type");
return (a + b - Number<1>{}) / b; return (a + b - Number<1>{}) / b;
} }
}; };
template <typename X, typename Y> template <typename X, typename Y>
__host__ __device__ constexpr auto integer_divide_floor(X x, Y y) { __host__ __device__ constexpr auto integer_divide_floor(X x, Y y)
return x / y; {
return x / y;
} }
template <typename X, typename Y> template <typename X, typename Y>
__host__ __device__ constexpr auto integer_divide_ceil(X x, Y y) { __host__ __device__ constexpr auto integer_divide_ceil(X x, Y y)
return (x + y - Number<1>{}) / y; {
return (x + y - Number<1>{}) / y;
} }
template <typename X, typename Y> template <typename X, typename Y>
__host__ __device__ constexpr auto integer_least_multiple(X x, Y y) { __host__ __device__ constexpr auto integer_least_multiple(X x, Y y)
return y * integer_divide_ceil(x, y); {
return y * integer_divide_ceil(x, y);
} }
template <typename T> __host__ __device__ constexpr T max(T x) { return x; } template <typename T>
__host__ __device__ constexpr T max(T x)
{
return x;
}
template <typename T> __host__ __device__ constexpr T max(T x, T y) { template <typename T>
return x > y ? x : y; __host__ __device__ constexpr T max(T x, T y)
{
return x > y ? x : y;
} }
template <index_t X> template <index_t X>
__host__ __device__ constexpr index_t max(Number<X>, index_t y) { __host__ __device__ constexpr index_t max(Number<X>, index_t y)
return X > y ? X : y; {
return X > y ? X : y;
} }
template <index_t Y> template <index_t Y>
__host__ __device__ constexpr index_t max(index_t x, Number<Y>) { __host__ __device__ constexpr index_t max(index_t x, Number<Y>)
return x > Y ? x : Y; {
return x > Y ? x : Y;
} }
template <typename X, typename... Ys> template <typename X, typename... Ys>
__host__ __device__ constexpr auto max(X x, Ys... ys) { __host__ __device__ constexpr auto max(X x, Ys... ys)
static_assert(sizeof...(Ys) > 0, "not enough argument"); {
static_assert(sizeof...(Ys) > 0, "not enough argument");
return max(x, max(ys...)); return max(x, max(ys...));
} }
template <typename T> __host__ __device__ constexpr T min(T x) { return x; } template <typename T>
__host__ __device__ constexpr T min(T x)
{
return x;
}
template <typename T> __host__ __device__ constexpr T min(T x, T y) { template <typename T>
return x < y ? x : y; __host__ __device__ constexpr T min(T x, T y)
{
return x < y ? x : y;
} }
template <index_t X> template <index_t X>
__host__ __device__ constexpr index_t min(Number<X>, index_t y) { __host__ __device__ constexpr index_t min(Number<X>, index_t y)
return X < y ? X : y; {
return X < y ? X : y;
} }
template <index_t Y> template <index_t Y>
__host__ __device__ constexpr index_t min(index_t x, Number<Y>) { __host__ __device__ constexpr index_t min(index_t x, Number<Y>)
return x < Y ? x : Y; {
return x < Y ? x : Y;
} }
template <typename X, typename... Ys> template <typename X, typename... Ys>
__host__ __device__ constexpr auto min(X x, Ys... ys) { __host__ __device__ constexpr auto min(X x, Ys... ys)
static_assert(sizeof...(Ys) > 0, "not enough argument"); {
static_assert(sizeof...(Ys) > 0, "not enough argument");
return min(x, min(ys...)); return min(x, min(ys...));
} }
template <typename T> template <typename T>
__host__ __device__ constexpr T clamp(const T &x, const T &lowerbound, __host__ __device__ constexpr T clamp(const T& x, const T& lowerbound, const T& upperbound)
const T &upperbound) { {
return min(max(x, lowerbound), upperbound); return min(max(x, lowerbound), upperbound);
} }
// disallow implicit type casting // disallow implicit type casting
template <typename T> __device__ T exp(T x); template <typename T>
__device__ T exp(T x);
// TODO: add f16 support using v_exp_f16 // TODO: add f16 support using v_exp_f16
template <> __device__ float exp<float>(float x) { return __expf(x); } template <>
__device__ float exp<float>(float x)
{
return __expf(x);
}
template <> __device__ double exp<double>(double x) { return exp(x); } template <>
__device__ double exp<double>(double x)
{
return exp(x);
}
// static inline __host__ float exp(float x) { return ::expf(x); } static inline __host__ float exp(float x) { return ::expf(x); }
// static inline __host__ double exp(double x) { return std::exp(x); } static inline __host__ double exp(double x) { return std::exp(x); }
// greatest common divisor, aka highest common factor // greatest common divisor, aka highest common factor
__host__ __device__ constexpr index_t gcd(index_t x, index_t y) { __host__ __device__ constexpr index_t gcd(index_t x, index_t y)
if (x < 0) { {
return gcd(-x, y); if(x < 0)
} else if (y < 0) { {
return gcd(x, -y); return gcd(-x, y);
} else if (x == y || x == 0) { }
return y; else if(y < 0)
} else if (y == 0) { {
return x; return gcd(x, -y);
} else if (x > y) { }
return gcd(x % y, y); else if(x == y || x == 0)
} else { {
return gcd(x, y % x); return y;
} }
else if(y == 0)
{
return x;
}
else if(x > y)
{
return gcd(x % y, y);
}
else
{
return gcd(x, y % x);
}
} }
template <index_t X, index_t Y> template <index_t X, index_t Y>
__host__ __device__ constexpr auto gcd(Number<X>, Number<Y>) { __host__ __device__ constexpr auto gcd(Number<X>, Number<Y>)
constexpr auto r = gcd(X, Y); {
constexpr auto r = gcd(X, Y);
return Number<r>{}; return Number<r>{};
} }
template <typename X, typename... Ys, template <typename X, typename... Ys, typename enable_if<sizeof...(Ys) >= 2, bool>::type = false>
typename enable_if<sizeof...(Ys) >= 2, bool>::type = false> __host__ __device__ constexpr auto gcd(X x, Ys... ys)
__host__ __device__ constexpr auto gcd(X x, Ys... ys) { {
return gcd(x, gcd(ys...)); return gcd(x, gcd(ys...));
} }
// least common multiple // least common multiple
template <typename X, typename Y> template <typename X, typename Y>
__host__ __device__ constexpr auto lcm(X x, Y y) { __host__ __device__ constexpr auto lcm(X x, Y y)
return (x * y) / gcd(x, y); {
return (x * y) / gcd(x, y);
} }
template <typename X, typename... Ys, template <typename X, typename... Ys, typename enable_if<sizeof...(Ys) >= 2, bool>::type = false>
typename enable_if<sizeof...(Ys) >= 2, bool>::type = false> __host__ __device__ constexpr auto lcm(X x, Ys... ys)
__host__ __device__ constexpr auto lcm(X x, Ys... ys) { {
return lcm(x, lcm(ys...)); return lcm(x, lcm(ys...));
} }
template <typename T> struct equal { template <typename T>
__host__ __device__ constexpr bool operator()(T x, T y) const { struct equal
return x == y; {
} __host__ __device__ constexpr bool operator()(T x, T y) const { return x == y; }
}; };
template <typename T> struct less { template <typename T>
__host__ __device__ constexpr bool operator()(T x, T y) const { struct less
return x < y; {
} __host__ __device__ constexpr bool operator()(T x, T y) const { return x < y; }
}; };
template <index_t X> template <index_t X>
...@@ -206,5 +258,3 @@ __host__ __device__ constexpr auto next_power_of_two(Number<X> x) ...@@ -206,5 +258,3 @@ __host__ __device__ constexpr auto next_power_of_two(Number<X> x)
} // namespace math } // namespace math
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -16,169 +13,177 @@ ...@@ -16,169 +13,177 @@
namespace ck { namespace ck {
namespace math { namespace math {
// math functions for the host, some are implemented by calling C++ std // math functions for the host, some are implemented by calling C++ std functions
// functions
static inline __host__ float abs(float x) { return x < 0 ? x * -1.0 : x; }; static inline __host__ float abs(float x) { return std::abs(x); };
static inline __host__ double abs(double x) { return x < 0 ? x * -1.0 : x; }; static inline __host__ double abs(double x) { return std::abs(x); };
static inline __host__ int8_t abs(int8_t x) { static inline __host__ int8_t abs(int8_t x)
int8_t sgn = x >> (8 - 1); {
int8_t sgn = x >> (8 - 1);
return (x ^ sgn) - sgn; return (x ^ sgn) - sgn;
}; };
static inline __host__ int32_t abs(int32_t x) { static inline __host__ int32_t abs(int32_t x)
int32_t sgn = x >> (32 - 1); {
int32_t sgn = x >> (32 - 1);
return (x ^ sgn) - sgn; return (x ^ sgn) - sgn;
}; };
static inline __host__ half_t abs(half_t x) { static inline __host__ half_t abs(half_t x)
uint16_t xx = ck::bit_cast<uint16_t>(x); {
uint16_t xx = ck::bit_cast<uint16_t>(x);
uint16_t abs_xx = xx & 0x7fff; uint16_t abs_xx = xx & 0x7fff;
half_t abs_x = ck::bit_cast<half_t>(abs_xx); half_t abs_x = ck::bit_cast<half_t>(abs_xx);
return abs_x; return abs_x;
}; };
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4 #ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
static inline __host__ int4_t abs(int4_t x) { static inline __host__ int4_t abs(int4_t x)
int4_t sgn = x >> (4 - 1); {
return (x ^ sgn) - sgn; int4_t sgn = x >> (4 - 1);
return (x ^ sgn) - sgn;
} }
#endif #endif
// TODO: to bit arithmetic to figure it out static inline __host__ bool isnan(float x) { return std::isnan(x); };
static inline __host__ bool isnan(float x) {
(void)x;
return false;
};
static inline __host__ bool isnan(double x) { static inline __host__ bool isnan(double x) { return std::isnan(x); };
(void)x;
return false;
};
static inline __host__ bool isnan(int8_t x) { static inline __host__ bool isnan(int8_t x)
(void)x; {
return false; (void)x;
return false;
}; };
static inline __host__ bool isnan(int32_t x) { static inline __host__ bool isnan(int32_t x)
(void)x; {
return false; (void)x;
return false;
}; };
static inline __host__ bool isnan(half_t x) { static inline __host__ bool isnan(half_t x)
uint16_t xx = ck::bit_cast<uint16_t>(x); {
uint16_t xx = ck::bit_cast<uint16_t>(x);
return (xx & 0x7FFF) > 0x7C00; return (xx & 0x7FFF) > 0x7C00;
}; };
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4 #ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
static inline __host__ bool isnan(int4_t x) { static inline __host__ bool isnan(int4_t x)
(void)x; {
return false; (void)x;
return false;
}; };
#endif #endif
// MIGRAPHX doesn't care about host compilation, just return identity values for static inline __host__ half_t sqrt(half_t x)
// now {
return static_cast<half_t>(std::sqrt(static_cast<float>(x)));
static inline __host__ half_t sqrt(half_t x) { return x; }; };
static inline __host__ float sqrt(float x) { return x; }; static inline __host__ float sqrt(float x) { return std::sqrt(x); };
static inline __host__ double sqrt(double x) { return x; }; static inline __host__ double sqrt(double x) { return std::sqrt(x); };
static inline __host__ half_t tanh(half_t x) { return x; }; static inline __host__ half_t tanh(half_t x)
{
return static_cast<half_t>(std::tanh(static_cast<float>(x)));
};
static inline __host__ float tanh(float x) { return x; }; static inline __host__ float tanh(float x) { return std::tanh(x); };
static inline __host__ double tanh(double x) { return x; }; static inline __host__ double tanh(double x) { return std::tanh(x); };
// math functions for the HIP kernel, some are implemented by calling hip // math functions for the HIP kernel, some are implemented by calling hip builtin functions
// builtin functions
static inline __device__ float abs(float x) { return ::abs(x); }; static inline __device__ float abs(float x) { return ::abs(x); };
static inline __device__ double abs(double x) { return ::abs(x); }; static inline __device__ double abs(double x) { return ::abs(x); };
static inline __device__ int8_t abs(int8_t x) { static inline __device__ int8_t abs(int8_t x)
int8_t sgn = x >> (8 - 1); {
int8_t sgn = x >> (8 - 1);
return (x ^ sgn) - sgn; return (x ^ sgn) - sgn;
}; };
static inline __device__ int32_t abs(int32_t x) { static inline __device__ int32_t abs(int32_t x)
int32_t sgn = x >> (32 - 1); {
int32_t sgn = x >> (32 - 1);
return (x ^ sgn) - sgn; return (x ^ sgn) - sgn;
}; };
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4 #ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
static inline __device__ int4_t abs(int4_t x) { static inline __device__ int4_t abs(int4_t x)
int4_t sgn = x >> (4 - 1); {
int4_t sgn = x >> (4 - 1);
return (x ^ sgn) - sgn; return (x ^ sgn) - sgn;
}; };
#endif #endif
static inline __device__ half_t abs(half_t x) { static inline __device__ half_t abs(half_t x)
uint16_t xx = ck::bit_cast<uint16_t>(x); {
uint16_t xx = ck::bit_cast<uint16_t>(x);
uint16_t abs_xx = xx & 0x7fff; uint16_t abs_xx = xx & 0x7fff;
half_t abs_x = ck::bit_cast<half_t>(abs_xx); half_t abs_x = ck::bit_cast<half_t>(abs_xx);
return abs_x; return abs_x;
}; };
static inline __device__ bool isnan(float x) { return ::isnan(x); }; static inline __device__ bool isnan(float x) { return ::isnan(x); };
static inline __device__ bool isnan(double x) { return ::isnan(x); }; static inline __device__ bool isnan(double x) { return ::isnan(x); };
static inline __device__ bool isnan(int8_t x) { static inline __device__ bool isnan(int8_t x)
(void)x; {
return false; (void)x;
return false;
}; };
static inline __device__ bool isnan(int32_t x) { static inline __device__ bool isnan(int32_t x)
(void)x; {
return false; (void)x;
return false;
}; };
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4 #ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
static inline __device__ bool isnan(int4_t x) { static inline __device__ bool isnan(int4_t x)
(void)x; {
return false; (void)x;
return false;
}; };
#endif #endif
static inline __device__ bool isnan(half_t x) { static inline __device__ bool isnan(half_t x)
uint16_t xx = ck::bit_cast<uint16_t>(x); {
uint16_t xx = ck::bit_cast<uint16_t>(x);
return (xx & 0x7FFF) > 0x7C00; return (xx & 0x7FFF) > 0x7C00;
}; };
static inline __device__ half_t sqrt(half_t x) { static inline __device__ half_t sqrt(half_t x)
return static_cast<half_t>(__builtin_amdgcn_sqrtf(static_cast<float>(x))); {
return static_cast<half_t>(__builtin_amdgcn_sqrtf(static_cast<float>(x)));
}; };
static inline __device__ float sqrt(float x) { static inline __device__ float sqrt(float x) { return __builtin_amdgcn_sqrtf(x); };
return __builtin_amdgcn_sqrtf(x);
};
static inline __device__ double sqrt(double x) { static inline __device__ double sqrt(double x) { return __builtin_amdgcn_sqrt(x); };
return __builtin_amdgcn_sqrt(x);
};
static inline __device__ half_t tanh(half_t x) { static inline __device__ half_t tanh(half_t x)
return static_cast<half_t>(::tanhf(static_cast<float>(x))); {
return static_cast<half_t>(::tanhf(static_cast<float>(x)));
}; };
static inline __device__ float tanh(float x) { return ::tanhf(x); }; static inline __device__ float tanh(float x) { return ::tanhf(x); };
...@@ -187,5 +192,3 @@ static inline __device__ double tanh(double x) { return ::tanh(x); }; ...@@ -187,5 +192,3 @@ static inline __device__ double tanh(double x) { return ::tanh(x); };
} // namespace math } // namespace math
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -13,5 +10,3 @@ ...@@ -13,5 +10,3 @@
#else #else
#include "statically_indexed_array_multi_index.hpp" #include "statically_indexed_array_multi_index.hpp"
#endif #endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -19,5 +16,3 @@ using LongNumber = integral_constant<long_index_t, N>; ...@@ -19,5 +16,3 @@ using LongNumber = integral_constant<long_index_t, N>;
} // namespace ck } // namespace ck
#endif #endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -29,5 +26,3 @@ struct float_equal_zero ...@@ -29,5 +26,3 @@ struct float_equal_zero
}; };
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -42,5 +39,3 @@ enum struct IndicesType ...@@ -42,5 +39,3 @@ enum struct IndicesType
}; };
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -116,5 +113,3 @@ struct AccumulateWithIndexAndNanCheck<true, ReduceOperation, AccDataType, IndexD ...@@ -116,5 +113,3 @@ struct AccumulateWithIndexAndNanCheck<true, ReduceOperation, AccDataType, IndexD
} // namespace detail } // namespace detail
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -294,5 +291,3 @@ struct InMemoryDataOperationSupportedOnDataType<InMemoryDataOperationEnum::Add, ...@@ -294,5 +291,3 @@ struct InMemoryDataOperationSupportedOnDataType<InMemoryDataOperationEnum::Add,
} // namespace reduce } // namespace reduce
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -900,5 +897,3 @@ template <index_t NSize, index_t I> ...@@ -900,5 +897,3 @@ template <index_t NSize, index_t I>
using uniform_sequence_gen_t = typename uniform_sequence_gen<NSize, I>::type; using uniform_sequence_gen_t = typename uniform_sequence_gen<NSize, I>::type;
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -38,5 +35,3 @@ __host__ __device__ constexpr auto to_sequence(Tuple<Number<Is>...>) ...@@ -38,5 +35,3 @@ __host__ __device__ constexpr auto to_sequence(Tuple<Number<Is>...>)
} }
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -196,5 +193,3 @@ __host__ __device__ constexpr auto make_static_buffer(LongNumber<N>) ...@@ -196,5 +193,3 @@ __host__ __device__ constexpr auto make_static_buffer(LongNumber<N>)
} }
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -106,5 +103,3 @@ struct StaticallyIndexedArray_v2 ...@@ -106,5 +103,3 @@ struct StaticallyIndexedArray_v2
} // namespace ck } // namespace ck
#endif #endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -163,5 +160,3 @@ __host__ __device__ void print_multi_index(const Tuple<Xs...>& x) ...@@ -163,5 +160,3 @@ __host__ __device__ void print_multi_index(const Tuple<Xs...>& x)
} // namespace ck } // namespace ck
#endif #endif
#pragma clang diagnostic pop
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment