Commit f000fe32 authored by Umang Yadav's avatar Umang Yadav
Browse files

remove unnecesssary changes

parent 795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -46,5 +43,3 @@ __host__ __device__ T CK_CONSTANT_ADDRESS_SPACE* cast_pointer_to_constant_addres
}
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -1315,5 +1312,3 @@ amd_buffer_atomic_max(const typename vector_type_maker<T, N>::type::type src_thr
}
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -372,5 +369,3 @@ __device__ void amd_assembly_wmma_f32_16x16x16_f16_w32(half16_t a, half16_t b, f
} // namespace ck
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
......@@ -20,65 +17,79 @@
namespace ck {
namespace detail {
template <unsigned SizeInBytes> struct get_carrier;
template <unsigned SizeInBytes>
struct get_carrier;
template <> struct get_carrier<1> {
using type = uint8_t;
template <>
struct get_carrier<1>
{
using type = uint8_t;
};
template <> struct get_carrier<2> {
using type = uint16_t;
template <>
struct get_carrier<2>
{
using type = uint16_t;
};
template <> struct get_carrier<3> {
using type = class carrier {
using value_type = uint32_t;
// std::array<std::byte, 3> bytes;
std::byte bytes[3];
static_assert(sizeof(bytes) <= sizeof(value_type));
// replacement of host std::copy_n()
template <typename InputIterator, typename Size, typename OutputIterator>
__device__ static OutputIterator copy_n(InputIterator from, Size size,
OutputIterator to) {
if (0 < size) {
*to = *from;
++to;
for (Size count = 1; count < size; ++count) {
*to = *++from;
++to;
template <>
struct get_carrier<3>
{
using type = class carrier
{
using value_type = uint32_t;
std::array<std::byte, 3> bytes;
static_assert(sizeof(bytes) <= sizeof(value_type));
// replacement of host std::copy_n()
template <typename InputIterator, typename Size, typename OutputIterator>
__device__ static OutputIterator copy_n(InputIterator from, Size size, OutputIterator to)
{
if(0 < size)
{
*to = *from;
++to;
for(Size count = 1; count < size; ++count)
{
*to = *++from;
++to;
}
}
return to;
}
}
return to;
}
// method to trigger template substitution failure
__device__ carrier(const carrier &other) noexcept {
copy_n(&other.bytes[0], 3, &bytes[0]);
}
// method to trigger template substitution failure
__device__ carrier(const carrier& other) noexcept
{
copy_n(other.bytes.begin(), bytes.size(), bytes.begin());
}
public:
__device__ carrier &operator=(value_type value) noexcept {
copy_n(reinterpret_cast<const std::byte *>(&value), 3, &bytes[0]);
public:
__device__ carrier& operator=(value_type value) noexcept
{
copy_n(reinterpret_cast<const std::byte*>(&value), bytes.size(), bytes.begin());
return *this;
}
return *this;
}
__device__ operator value_type() const noexcept {
std::byte result[sizeof(value_type)];
__device__ operator value_type() const noexcept
{
std::byte result[sizeof(value_type)];
copy_n(&bytes[0], 3, result);
copy_n(bytes.begin(), bytes.size(), result);
return *reinterpret_cast<const value_type *>(result);
}
};
return *reinterpret_cast<const value_type*>(result);
}
};
};
static_assert(sizeof(get_carrier<3>::type) == 3);
template <> struct get_carrier<4> {
using type = uint32_t;
template <>
struct get_carrier<4>
{
using type = uint32_t;
};
template <unsigned SizeInBytes>
......@@ -86,43 +97,44 @@ using get_carrier_t = typename get_carrier<SizeInBytes>::type;
} // namespace detail
__device__ inline int32_t amd_wave_read_first_lane(int32_t value) {
return __builtin_amdgcn_readfirstlane(value);
__device__ inline int32_t amd_wave_read_first_lane(int32_t value)
{
return __builtin_amdgcn_readfirstlane(value);
}
template <typename Object, typename = std::enable_if_t<
std::is_class<Object>::value &&
std::is_trivially_copyable<Object>::value>>
__device__ auto amd_wave_read_first_lane(const Object &obj) {
using Size = unsigned;
constexpr Size SgprSize = 4;
constexpr Size ObjectSize = sizeof(Object);
auto *const from_obj = reinterpret_cast<const std::byte *>(&obj);
alignas(Object) std::byte to_obj[ObjectSize];
constexpr Size RemainedSize = ObjectSize % SgprSize;
constexpr Size CompleteSgprCopyBoundary = ObjectSize - RemainedSize;
for (Size offset = 0; offset < CompleteSgprCopyBoundary; offset += SgprSize) {
using Sgpr = detail::get_carrier_t<SgprSize>;
*reinterpret_cast<Sgpr *>(to_obj + offset) = amd_wave_read_first_lane(
*reinterpret_cast<const Sgpr *>(from_obj + offset));
}
if constexpr (0 < RemainedSize) {
using Carrier = detail::get_carrier_t<RemainedSize>;
*reinterpret_cast<Carrier *>(to_obj + CompleteSgprCopyBoundary) =
amd_wave_read_first_lane(*reinterpret_cast<const Carrier *>(
from_obj + CompleteSgprCopyBoundary));
}
/// NOTE: Implicitly start object lifetime. It's better to use
/// std::start_lifetime_at() in this scenario
return *reinterpret_cast<Object *>(to_obj);
template <
typename Object,
typename = std::enable_if_t<std::is_class_v<Object> && std::is_trivially_copyable_v<Object>>>
__device__ auto amd_wave_read_first_lane(const Object& obj)
{
using Size = unsigned;
constexpr Size SgprSize = 4;
constexpr Size ObjectSize = sizeof(Object);
auto* const from_obj = reinterpret_cast<const std::byte*>(&obj);
alignas(Object) std::byte to_obj[ObjectSize];
constexpr Size RemainedSize = ObjectSize % SgprSize;
constexpr Size CompleteSgprCopyBoundary = ObjectSize - RemainedSize;
for(Size offset = 0; offset < CompleteSgprCopyBoundary; offset += SgprSize)
{
using Sgpr = detail::get_carrier_t<SgprSize>;
*reinterpret_cast<Sgpr*>(to_obj + offset) =
amd_wave_read_first_lane(*reinterpret_cast<const Sgpr*>(from_obj + offset));
}
if constexpr(0 < RemainedSize)
{
using Carrier = detail::get_carrier_t<RemainedSize>;
*reinterpret_cast<Carrier*>(to_obj + CompleteSgprCopyBoundary) = amd_wave_read_first_lane(
*reinterpret_cast<const Carrier*>(from_obj + CompleteSgprCopyBoundary));
}
/// NOTE: Implicitly start object lifetime. It's better to use std::start_lifetime_at() in this
/// scenario
return *reinterpret_cast<Object*>(to_obj);
}
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -259,5 +256,3 @@ struct intrin_wmma_i32_16x16x16_iu8_w64<16, 16, neg_a, neg_b, clamp>
} // namespace ck
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -422,5 +419,3 @@ struct intrin_mfma_f32_16x16x32f8f8<16, 16>
};
} // namespace ck
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -67,5 +64,3 @@ __host__ __device__ constexpr auto make_array()
} // namespace ck
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -81,5 +78,3 @@ __host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& a, const T
} // namespace ck
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -26,5 +23,3 @@ __host__ __device__ PY c_style_pointer_cast(PX p_x)
} // namespace ck
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -54,5 +51,3 @@
#ifdef CK_USE_AMD_MFMA
#include "ck/utility/amd_xdlops.hpp"
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -159,5 +156,3 @@ __host__ __device__ constexpr auto pick_container_element(const Arr& a, Picks)
} // namespace ck
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -394,5 +391,3 @@ __host__ __device__ constexpr auto sequence_to_tuple_of_number(Sequence<Is...>)
} // namespace ck
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -35,22 +32,21 @@ template <typename T, index_t N>
struct vector_type;
// Caution: DO NOT REMOVE
// intentionally have only declaration but no definition to cause compilation
// failure when trying to instantiate this template. The purpose is to catch
// user's mistake when trying to make "vector of vectors"
// intentionally have only declaration but no definition to cause compilation failure when trying to
// instantiate this template. The purpose is to catch user's mistake when trying to make "vector of
// vectors"
template <typename T, index_t V, index_t N>
struct vector_type<T __attribute__((ext_vector_type(V))), N>;
// Caution: DO NOT REMOVE
// intentionally have only declaration but no definition to cause compilation
// failure when trying to instantiate this template. The purpose is to catch
// user's mistake when trying to make "vector of vectors"
// intentionally have only declaration but no definition to cause compilation failure when trying to
// instantiate this template. The purpose is to catch user's mistake when trying to make "vector of
// vectors"
template <typename T, index_t V, index_t N>
struct vector_type<vector_type<T, V>, N>;
// vector_type_maker
// This is the right way to handle "vector of vectors": making a bigger vector
// instead
// This is the right way to handle "vector of vectors": making a bigger vector instead
template <typename T, index_t N>
struct vector_type_maker
{
......@@ -1253,5 +1249,3 @@ struct NumericLimits<f8_t>
};
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -11,76 +8,78 @@ namespace ck {
namespace debug {
namespace detail {
template <typename T, typename Enable = void> struct PrintAsType;
template <typename T, typename Enable = void>
struct PrintAsType;
template <typename T>
struct PrintAsType<
T, typename std::enable_if<std::is_floating_point<T>::value>::type> {
using type = float;
__host__ __device__ static void Print(const T &p) {
printf("%.3f ", static_cast<type>(p));
}
struct PrintAsType<T, typename std::enable_if<std::is_floating_point<T>::value>::type>
{
using type = float;
__host__ __device__ static void Print(const T& p) { printf("%.3f ", static_cast<type>(p)); }
};
template <> struct PrintAsType<ck::half_t, void> {
using type = float;
__host__ __device__ static void Print(const ck::half_t &p) {
printf("%.3f ", static_cast<type>(p));
}
template <>
struct PrintAsType<ck::half_t, void>
{
using type = float;
__host__ __device__ static void Print(const ck::half_t& p)
{
printf("%.3f ", static_cast<type>(p));
}
};
template <typename T>
struct PrintAsType<T,
typename std::enable_if<std::is_integral<T>::value>::type> {
using type = int;
__host__ __device__ static void Print(const T &p) {
printf("%d ", static_cast<type>(p));
}
struct PrintAsType<T, typename std::enable_if<std::is_integral<T>::value>::type>
{
using type = int;
__host__ __device__ static void Print(const T& p) { printf("%d ", static_cast<type>(p)); }
};
} // namespace detail
// Print at runtime the data in shared memory in 128 bytes per row format given
// shared mem pointer and the number of elements. Can optionally specify strides
// between elements and how many bytes' worth of data per row.
// Print at runtime the data in shared memory in 128 bytes per row format given shared mem pointer
// and the number of elements. Can optionally specify strides between elements and how many bytes'
// worth of data per row.
//
// Usage example:
//
// debug::print_shared(a_block_buf.p_data_,
// index_t(a_block_desc_k0_m_k1.GetElementSpaceSize()));
// debug::print_shared(a_block_buf.p_data_, index_t(a_block_desc_k0_m_k1.GetElementSpaceSize()));
//
template <typename T, index_t element_stride = 1, index_t row_bytes = 128>
__device__ void print_shared(T const *p_shared, index_t num_elements) {
constexpr index_t row_elements = row_bytes / sizeof(T);
static_assert((element_stride >= 1 && element_stride <= row_elements),
"element_stride should between [1, row_elements]");
__device__ void print_shared(T const* p_shared, index_t num_elements)
{
constexpr index_t row_elements = row_bytes / sizeof(T);
static_assert((element_stride >= 1 && element_stride <= row_elements),
"element_stride should between [1, row_elements]");
index_t wgid =
blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
index_t tid = (threadIdx.z * (blockDim.x * blockDim.y)) +
(threadIdx.y * blockDim.x) + threadIdx.x;
index_t wgid = blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
index_t tid =
(threadIdx.z * (blockDim.x * blockDim.y)) + (threadIdx.y * blockDim.x) + threadIdx.x;
__syncthreads();
__syncthreads();
if (tid == 0) {
printf("\nWorkgroup id %d, bytes per row %d, element stride %d\n\n", wgid,
row_bytes, element_stride);
for (index_t i = 0; i < num_elements; i += row_elements) {
printf("elem %5d: ", i);
for (index_t j = 0; j < row_elements; j += element_stride) {
detail::PrintAsType<T>::Print(p_shared[i + j]);
}
if(tid == 0)
{
printf("\nWorkgroup id %d, bytes per row %d, element stride %d\n\n",
wgid,
row_bytes,
element_stride);
for(index_t i = 0; i < num_elements; i += row_elements)
{
printf("elem %5d: ", i);
for(index_t j = 0; j < row_elements; j += element_stride)
{
detail::PrintAsType<T>::Print(p_shared[i + j]);
}
printf("\n");
printf("\n");
}
printf("\n");
}
printf("\n");
}
__syncthreads();
__syncthreads();
}
} // namespace debug
} // namespace ck
#endif // UTILITY_DEBUG_HPP
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -408,5 +405,3 @@ make_dynamic_buffer(T* p, ElementSpaceSize element_space_size, X invalid_element
}
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -13,11 +10,10 @@ using enable_if_t = typename enable_if<B, T>::type;
#endif
namespace ck {
template <bool B, typename T = void> using enable_if = std::enable_if<B, T>;
template <bool B, typename T = void>
using enable_if = std::enable_if<B, T>;
template <bool B, typename T = void>
using enable_if_t = typename std::enable_if<B, T>::type;
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -132,5 +129,3 @@ constexpr auto conditional_expr(X&& x, Y&& y)
}
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -50,5 +47,3 @@ struct static_for
};
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -145,5 +142,3 @@ struct ford
};
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -66,5 +63,3 @@ __host__ __device__ constexpr auto unpack2(F&& f, X&& x, Y&& y)
} // namespace ck
#endif
#pragma clang diagnostic pop
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment