Commit f1541994 authored by Chao Liu's avatar Chao Liu
Browse files

change clang-format to 5.0

parent 863e069b
......@@ -316,8 +316,7 @@ struct DynamicTransformedTensorDescriptor
constexpr bool is_valid_up_always_mapped_to_valid_low =
decltype(tran)::IsValidUpperIndexAlwaysMappedToValidLowerIndex();
if
constexpr(!is_valid_up_always_mapped_to_valid_low)
if constexpr(!is_valid_up_always_mapped_to_valid_low)
{
const auto up_dims_part = UpDimensionIds{}.At(itran);
const auto idx_up_part = pick_array_element(idx_up, up_dims_part);
......
......@@ -64,7 +64,7 @@ template <typename LowerTensorDescriptor,
index_t... LowerDimensionIds,
index_t... UpperDimensionIds>
__host__ __device__ constexpr auto
reorder_transformed_tensor_descriptor_impl(LowerTensorDescriptor,
reorder_transformed_tensor_descriptor_impl(LowerTensorDescriptor,
Sequence<LowerLengths...>,
Sequence<LowerDimensionIds...>,
Sequence<UpperDimensionIds...>)
......@@ -78,7 +78,7 @@ __host__ __device__ constexpr auto
// reorder a NativeTensorDescriptor
template <typename... Ts, typename MapLower2Upper>
__host__ __device__ constexpr auto
reorder_tensor_descriptor_given_lower2upper(NativeTensorDescriptor<Ts...>, MapLower2Upper)
reorder_tensor_descriptor_given_lower2upper(NativeTensorDescriptor<Ts...>, MapLower2Upper)
{
static_assert(is_valid_sequence_map<MapLower2Upper>{},
"wrong! MapLower2Upper is not a valid map");
......@@ -96,7 +96,7 @@ __host__ __device__ constexpr auto
// reorder a TransformedTensorDescriptor
template <typename... Ts, typename MapLower2Upper>
__host__ __device__ constexpr auto
reorder_tensor_descriptor_given_lower2upper(TransformedTensorDescriptor<Ts...>, MapLower2Upper)
reorder_tensor_descriptor_given_lower2upper(TransformedTensorDescriptor<Ts...>, MapLower2Upper)
{
static_assert(is_valid_sequence_map<MapLower2Upper>{},
"wrong! MapLower2Upper is not a valid map");
......
......@@ -210,15 +210,14 @@ struct BlockwiseBatchGemmBlockABlockBThreadCTransANormalBNormalC_V2
#pragma unroll
for(index_t m_repeat = 0; m_repeat < MRepeat; ++m_repeat)
{
threadwise_matrix_copy(
a_block_mtx,
threadwise_matrix_copy(a_block_mtx,
p_a_block +
a_block_mtx.GetOffsetFromMultiIndex(k_begin,
m_repeat * MPerLevel1Cluster) +
a_block_mtx.GetOffsetFromMultiIndex(
k_begin, m_repeat * MPerLevel1Cluster) +
ib * BlockMatrixStrideA + mMyThreadOffsetA,
a_thread_mtx,
p_a_thread +
a_thread_mtx.GetOffsetFromMultiIndex(0, m_repeat * MPerThreadSubC),
p_a_thread + a_thread_mtx.GetOffsetFromMultiIndex(
0, m_repeat * MPerThreadSubC),
a_thread_sub_mtx.GetLengths(),
Number<DataPerReadA>{});
}
......@@ -229,15 +228,14 @@ struct BlockwiseBatchGemmBlockABlockBThreadCTransANormalBNormalC_V2
#pragma unroll
for(index_t n_repeat = 0; n_repeat < NRepeat; ++n_repeat)
{
threadwise_matrix_copy(
b_block_mtx,
threadwise_matrix_copy(b_block_mtx,
p_b_block +
b_block_mtx.GetOffsetFromMultiIndex(k_begin,
n_repeat * NPerLevel1Cluster) +
b_block_mtx.GetOffsetFromMultiIndex(
k_begin, n_repeat * NPerLevel1Cluster) +
ib * BlockMatrixStrideB + mMyThreadOffsetB,
b_thread_mtx,
p_b_thread +
b_thread_mtx.GetOffsetFromMultiIndex(0, n_repeat * NPerThreadSubC),
p_b_thread + b_thread_mtx.GetOffsetFromMultiIndex(
0, n_repeat * NPerThreadSubC),
b_thread_sub_mtx.GetLengths(),
Number<DataPerReadB>{});
}
......@@ -391,9 +389,8 @@ struct BlockwiseBatchGemmBlockABlockBThreadCTransANormalBNormalC_V2
{
threadwise_matrix_copy(
c_thread_sub_mtx,
p_c_thread +
c_thread_sub_mtx.GetOffsetFromMultiIndex(m_repeat * MPerLevel1Cluster,
n_repeat * NPerLevel1Cluster),
p_c_thread + c_thread_sub_mtx.GetOffsetFromMultiIndex(
m_repeat * MPerLevel1Cluster, n_repeat * NPerLevel1Cluster),
c_block_mtx,
p_c_block +
c_block_mtx.GetOffsetFromMultiIndex(m_repeat * MPerLevel1Cluster,
......@@ -405,5 +402,5 @@ struct BlockwiseBatchGemmBlockABlockBThreadCTransANormalBNormalC_V2
}
};
} // namespace
} // namespace ck
#endif
......@@ -336,9 +336,9 @@ struct BlockwiseGemmBlockABlockBThreadCTransANormalBNormalC_v2
constexpr index_t MRepeat = MPerThread / MPerThreadSubC;
constexpr index_t NRepeat = NPerThread / NPerThreadSubC;
static_if<MRepeat == 2 && NRepeat == 2>{}([&](auto) {
Run_pipelined_2x2(p_a_block, p_b_block, p_c_thread);
}).Else([&](auto) { Run_naive(p_a_block, p_b_block, p_c_thread); });
static_if<MRepeat == 2 && NRepeat == 2>{}(
[&](auto) { Run_pipelined_2x2(p_a_block, p_b_block, p_c_thread); })
.Else([&](auto) { Run_naive(p_a_block, p_b_block, p_c_thread); });
#else
Run_naive(p_a_block, p_b_block, p_c_thread);
#endif
......
......@@ -153,9 +153,8 @@ struct ThreadwiseGemmTransANormalBNormalC
(is_same<FloatA, half2_t>{} && is_same<FloatB, half2_t>{}) ||
(is_same<FloatA, half4_t>{} && is_same<FloatB, half4_t>{}));
static_if<has_amd_asm>{}([&](auto fwd) {
Run_amd_asm(p_a, p_b, fwd(p_c));
}).Else([&](auto) { Run_source(p_a, p_b, p_c); });
static_if<has_amd_asm>{}([&](auto fwd) { Run_amd_asm(p_a, p_b, fwd(p_c)); })
.Else([&](auto) { Run_source(p_a, p_b, p_c); });
#else
Run_source(p_a, p_b, p_c);
#endif
......
......@@ -82,8 +82,8 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
constexpr auto long_vector_access_lengths = SliceLengths::Modify(
vector_access_dim, SliceLengths::Get(vector_access_dim) / long_vector_size);
ford<decltype(long_vector_access_lengths), SrcDstDimAccessOrder>{}([&](
auto long_vector_access_id) {
ford<decltype(long_vector_access_lengths), SrcDstDimAccessOrder>{}(
[&](auto long_vector_access_id) {
// data id w.r.t slicing-window
auto long_vector_data_begin_id = long_vector_access_id;
......@@ -109,9 +109,11 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
const index_t buffer_offset = i * src_data_per_access;
const auto src_coord = mSrcSliceOrigin + (long_vector_data_begin_id + scalar_id);
const auto src_coord =
mSrcSliceOrigin + (long_vector_data_begin_id + scalar_id);
// Check src data's valid mapping situation, only check the first data in this src
// Check src data's valid mapping situation, only check the first data in this
// src
// vector. It's user's responsiblity to make sure all data in the src vector
// has the valid/invalid mapping situation
transfer_data<SrcData,
......@@ -146,9 +148,11 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
const index_t buffer_offset = i * dst_data_per_access;
const auto dst_coord = mDstSliceOrigin + (long_vector_data_begin_id + scalar_id);
const auto dst_coord =
mDstSliceOrigin + (long_vector_data_begin_id + scalar_id);
// Check dst data's valid mapping situation, only check the first data in this dst
// Check dst data's valid mapping situation, only check the first data in this
// dst
// vector. It's user's responsiblity to make sure all data in the dst vector
// has the valid/invalid mapping situation
transfer_data<DstData,
......@@ -175,9 +179,8 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
{
const auto step_sizes = to_array(step_sizes_);
static_if<PositiveDirection>{}([&](auto) {
mSrcSliceOrigin += to_array(step_sizes);
}).Else([&](auto) { mSrcSliceOrigin -= step_sizes; });
static_if<PositiveDirection>{}([&](auto) { mSrcSliceOrigin += to_array(step_sizes); })
.Else([&](auto) { mSrcSliceOrigin -= step_sizes; });
}
template <typename T, bool PositiveDirection>
......@@ -186,9 +189,8 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
{
const auto step_sizes = to_array(step_sizes_);
static_if<PositiveDirection>{}([&](auto) {
mDstSliceOrigin += step_sizes;
}).Else([&](auto) { mDstSliceOrigin -= step_sizes; });
static_if<PositiveDirection>{}([&](auto) { mDstSliceOrigin += step_sizes; })
.Else([&](auto) { mDstSliceOrigin -= step_sizes; });
}
private:
......
......@@ -508,8 +508,8 @@ template <bool B>
struct bool_type : std::integral_constant<bool, B>
{
};
using std::true_type;
using std::false_type;
using std::true_type;
/// Type traits for floating-point types.
template <typename T>
......@@ -854,8 +854,8 @@ inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y,
((x & 0x7FFF) > 0x7C00 && !(x & 0x200)) || ((y & 0x7FFF) > 0x7C00 && !(y & 0x200)) ||
((z & 0x7FFF) > 0x7C00 && !(z & 0x200)));
#endif
return ((x & 0x7FFF) > 0x7C00) ? (x | 0x200) : ((y & 0x7FFF) > 0x7C00) ? (y | 0x200)
: (z | 0x200);
return ((x & 0x7FFF) > 0x7C00) ? (x | 0x200)
: ((y & 0x7FFF) > 0x7C00) ? (y | 0x200) : (z | 0x200);
}
/// Select value or signaling NaN.
......@@ -1756,9 +1756,9 @@ uint32 mulhi(uint32 x, uint32 y)
uint32 xy = (x >> 16) * (y & 0xFFFF), yx = (x & 0xFFFF) * (y >> 16),
c = (xy & 0xFFFF) + (yx & 0xFFFF) + (((x & 0xFFFF) * (y & 0xFFFF)) >> 16);
return (x >> 16) * (y >> 16) + (xy >> 16) + (yx >> 16) + (c >> 16) +
((R == std::round_to_nearest) ? ((c >> 15) & 1) : (R == std::round_toward_infinity)
? ((c & 0xFFFF) != 0)
: 0);
((R == std::round_to_nearest)
? ((c >> 15) & 1)
: (R == std::round_toward_infinity) ? ((c & 0xFFFF) != 0) : 0);
}
/// 64-bit multiplication.
......@@ -2379,10 +2379,12 @@ unsigned int erf(unsigned int arg)
t /
((x2.exp < 0) ? f31(exp2((x2.exp > -32) ? (x2.m >> -x2.exp) : 0, 30), 0)
: f31(exp2((x2.m << x2.exp) & 0x7FFFFFFF, 22), x2.m >> (31 - x2.exp)));
return (!C || sign) ? fixed2half<R, 31, false, true, true>(
return (!C || sign)
? fixed2half<R, 31, false, true, true>(
0x80000000 - (e.m >> (C - e.exp)), 14 + C, sign & (C - 1U))
: (e.exp < -25) ? underflow<R>() : fixed2half<R, 30, false, false, true>(
e.m >> 1, e.exp + 14, 0, e.m & 1);
: (e.exp < -25)
? underflow<R>()
: fixed2half<R, 30, false, false, true>(e.m >> 1, e.exp + 14, 0, e.m & 1);
}
/// Gamma function and postprocessing.
......@@ -2402,8 +2404,7 @@ unsigned int gamma(unsigned int arg)
for(unsigned int i=0; i<5; ++i)
s += p[i+1] / (arg+i);
return std::log(s) + (arg-0.5)*std::log(t) - t;
*/ static const f31
pi(0xC90FDAA2, 1),
*/ static const f31 pi(0xC90FDAA2, 1),
lbe(0xB8AA3B29, 0);
unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
bool bsign = sign != 0;
......@@ -2789,7 +2790,7 @@ inline half operator"" _h(long double value)
{
return half(detail::binary, detail::float2half<half::round_style>(value));
}
}
} // namespace literal
#endif
namespace detail {
......@@ -2837,8 +2838,8 @@ struct half_caster<half, half, R>
{
static half cast(half arg) { return arg; }
};
}
}
} // namespace detail
} // namespace half_float
/// Extensions to the C++ standard library.
namespace std {
......@@ -3003,7 +3004,7 @@ struct hash<half_float::half>
}
};
#endif
}
} // namespace std
namespace half_float {
/// \anchor compop
......@@ -3122,10 +3123,11 @@ inline half operator+(half x, half y)
return half(detail::binary,
(absx > 0x7C00 || absy > 0x7C00)
? detail::signal(x.data_, y.data_)
: (absy != 0x7C00) ? x.data_ : (sub && absx == 0x7C00) ? detail::invalid()
: y.data_);
: (absy != 0x7C00) ? x.data_
: (sub && absx == 0x7C00) ? detail::invalid() : y.data_);
if(!absx)
return absy ? y : half(detail::binary,
return absy ? y
: half(detail::binary,
(half::round_style == std::round_toward_neg_infinity)
? (x.data_ | y.data_)
: (x.data_ & y.data_));
......@@ -3449,9 +3451,10 @@ inline half fma(half x, half y, half z)
: (sign | 0x7C00))
: z;
if(!absx || !absy)
return absz ? z : half(detail::binary,
(half::round_style == std::round_toward_neg_infinity)
? (z.data_ | sign)
return absz
? z
: half(detail::binary,
(half::round_style == std::round_toward_neg_infinity) ? (z.data_ | sign)
: (z.data_ & sign));
for(; absx < 0x400; absx <<= 1, --exp)
;
......@@ -3516,8 +3519,7 @@ inline half fma(half x, half y, half z)
inline HALF_CONSTEXPR_NOERR half fmax(half x, half y)
{
return half(detail::binary,
(!isnan(y) && (isnan(x) ||
(x.data_ ^ (0x8000 | (0x8000 - (x.data_ >> 15)))) <
(!isnan(y) && (isnan(x) || (x.data_ ^ (0x8000 | (0x8000 - (x.data_ >> 15)))) <
(y.data_ ^ (0x8000 | (0x8000 - (y.data_ >> 15))))))
? detail::select(y.data_, x.data_)
: detail::select(x.data_, y.data_));
......@@ -3533,8 +3535,7 @@ inline HALF_CONSTEXPR_NOERR half fmax(half x, half y)
inline HALF_CONSTEXPR_NOERR half fmin(half x, half y)
{
return half(detail::binary,
(!isnan(y) && (isnan(x) ||
(x.data_ ^ (0x8000 | (0x8000 - (x.data_ >> 15)))) >
(!isnan(y) && (isnan(x) || (x.data_ ^ (0x8000 | (0x8000 - (x.data_ >> 15)))) >
(y.data_ ^ (0x8000 | (0x8000 - (y.data_ >> 15))))))
? detail::select(y.data_, x.data_)
: detail::select(x.data_, y.data_));
......@@ -3886,9 +3887,9 @@ inline half log1p(half arg)
#else
if(arg.data_ >= 0xBC00)
return half(detail::binary,
(arg.data_ == 0xBC00) ? detail::pole(0x8000) : (arg.data_ <= 0xFC00)
? detail::invalid()
: detail::signal(arg.data_));
(arg.data_ == 0xBC00)
? detail::pole(0x8000)
: (arg.data_ <= 0xFC00) ? detail::invalid() : detail::signal(arg.data_));
int abs = arg.data_ & 0x7FFF, exp = -15;
if(!abs || abs >= 0x7C00)
return (abs > 0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
......@@ -5354,13 +5355,13 @@ inline HALF_CONSTEXPR half copysign(half x, half y)
/// \retval FP_NORMAL for all other (normal) values
inline HALF_CONSTEXPR int fpclassify(half arg)
{
return !(arg.data_ & 0x7FFF) ? FP_ZERO : ((arg.data_ & 0x7FFF) < 0x400)
return !(arg.data_ & 0x7FFF)
? FP_ZERO
: ((arg.data_ & 0x7FFF) < 0x400)
? FP_SUBNORMAL
: ((arg.data_ & 0x7FFF) < 0x7C00)
? FP_NORMAL
: ((arg.data_ & 0x7FFF) == 0x7C00)
? FP_INFINITE
: FP_NAN;
: ((arg.data_ & 0x7FFF) == 0x7C00) ? FP_INFINITE : FP_NAN;
}
/// Check if finite number.
......@@ -5652,7 +5653,7 @@ inline void fethrowexcept(int excepts, const char* msg = "")
throw std::range_error(msg);
}
/// \}
}
} // namespace half_float
#undef HALF_UNUSED_NOERR
#undef HALF_CONSTEXPR
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment