Commit 253f942b authored by Umang Yadav's avatar Umang Yadav
Browse files

changes to make it compile

parent 8f9c0243
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -258,3 +261,5 @@ struct ThreadwiseTensorSliceTransfer_v6r2
};
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -308,3 +311,5 @@ struct ThreadwiseTensorSliceTransfer_v6r3
};
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -296,3 +299,5 @@ struct ThreadwiseTensorSliceTransfer_v7
};
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -135,3 +138,5 @@ struct ThreadwiseWelfordMerge
};
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -505,3 +508,5 @@ struct WmmaGemm
};
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -965,3 +968,5 @@ struct XdlopsGemm
};
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -286,3 +289,5 @@ struct TransformBatchedContractionContractionToBatchedGemmGemm
} // namespace tensor_operation
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -1132,3 +1135,5 @@ struct TransformConvBwdDataToGemm_v1
} // namespace tensor_operation
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -878,3 +881,5 @@ struct TransformConvFwdToGemm
} // namespace tensor_operation
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -43,3 +46,5 @@ __host__ __device__ T CK_CONSTANT_ADDRESS_SPACE* cast_pointer_to_constant_addres
}
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -1312,3 +1315,5 @@ amd_buffer_atomic_max(const typename vector_type_maker<T, N>::type::type src_thr
}
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -369,3 +372,5 @@ __device__ void amd_assembly_wmma_f32_16x16x16_f16_w32(half16_t a, half16_t b, f
} // namespace ck
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
......@@ -7,87 +10,75 @@
#include "ck/utility/functional2.hpp"
#include "ck/utility/math.hpp"
#ifndef __HIPCC_RTC__
#include <array>
#include <cstddef>
#include <cstdint>
#include <type_traits>
#endif
namespace ck {
namespace detail {
template <unsigned SizeInBytes>
struct get_carrier;
template <unsigned SizeInBytes> struct get_carrier;
template <>
struct get_carrier<1>
{
using type = uint8_t;
template <> struct get_carrier<1> {
using type = uint8_t;
};
template <>
struct get_carrier<2>
{
using type = uint16_t;
template <> struct get_carrier<2> {
using type = uint16_t;
};
template <>
struct get_carrier<3>
{
using type = class carrier
{
using value_type = uint32_t;
std::array<std::byte, 3> bytes;
static_assert(sizeof(bytes) <= sizeof(value_type));
// replacement of host std::copy_n()
template <typename InputIterator, typename Size, typename OutputIterator>
__device__ static OutputIterator copy_n(InputIterator from, Size size, OutputIterator to)
{
if(0 < size)
{
*to = *from;
++to;
for(Size count = 1; count < size; ++count)
{
*to = *++from;
++to;
}
}
return to;
template <> struct get_carrier<3> {
using type = class carrier {
using value_type = uint32_t;
// std::array<std::byte, 3> bytes;
std::byte bytes[3];
static_assert(sizeof(bytes) <= sizeof(value_type));
// replacement of host std::copy_n()
template <typename InputIterator, typename Size, typename OutputIterator>
__device__ static OutputIterator copy_n(InputIterator from, Size size,
OutputIterator to) {
if (0 < size) {
*to = *from;
++to;
for (Size count = 1; count < size; ++count) {
*to = *++from;
++to;
}
}
// method to trigger template substitution failure
__device__ carrier(const carrier& other) noexcept
{
copy_n(other.bytes.begin(), bytes.size(), bytes.begin());
}
return to;
}
public:
__device__ carrier& operator=(value_type value) noexcept
{
copy_n(reinterpret_cast<const std::byte*>(&value), bytes.size(), bytes.begin());
// method to trigger template substitution failure
__device__ carrier(const carrier &other) noexcept {
copy_n(&other.bytes[0], 3, &bytes[0]);
}
return *this;
}
public:
__device__ carrier &operator=(value_type value) noexcept {
copy_n(reinterpret_cast<const std::byte *>(&value), 3, &bytes[0]);
__device__ operator value_type() const noexcept
{
std::byte result[sizeof(value_type)];
return *this;
}
copy_n(bytes.begin(), bytes.size(), result);
__device__ operator value_type() const noexcept {
std::byte result[sizeof(value_type)];
return *reinterpret_cast<const value_type*>(result);
}
};
copy_n(&bytes[0], 3, result);
return *reinterpret_cast<const value_type *>(result);
}
};
};
static_assert(sizeof(get_carrier<3>::type) == 3);
template <>
struct get_carrier<4>
{
using type = uint32_t;
template <> struct get_carrier<4> {
using type = uint32_t;
};
template <unsigned SizeInBytes>
......@@ -95,44 +86,43 @@ using get_carrier_t = typename get_carrier<SizeInBytes>::type;
} // namespace detail
__device__ inline int32_t amd_wave_read_first_lane(int32_t value)
{
return __builtin_amdgcn_readfirstlane(value);
__device__ inline int32_t amd_wave_read_first_lane(int32_t value) {
return __builtin_amdgcn_readfirstlane(value);
}
template <
typename Object,
typename = std::enable_if_t<std::is_class_v<Object> && std::is_trivially_copyable_v<Object>>>
__device__ auto amd_wave_read_first_lane(const Object& obj)
{
using Size = unsigned;
constexpr Size SgprSize = 4;
constexpr Size ObjectSize = sizeof(Object);
auto* const from_obj = reinterpret_cast<const std::byte*>(&obj);
alignas(Object) std::byte to_obj[ObjectSize];
constexpr Size RemainedSize = ObjectSize % SgprSize;
constexpr Size CompleteSgprCopyBoundary = ObjectSize - RemainedSize;
for(Size offset = 0; offset < CompleteSgprCopyBoundary; offset += SgprSize)
{
using Sgpr = detail::get_carrier_t<SgprSize>;
*reinterpret_cast<Sgpr*>(to_obj + offset) =
amd_wave_read_first_lane(*reinterpret_cast<const Sgpr*>(from_obj + offset));
}
if constexpr(0 < RemainedSize)
{
using Carrier = detail::get_carrier_t<RemainedSize>;
*reinterpret_cast<Carrier*>(to_obj + CompleteSgprCopyBoundary) = amd_wave_read_first_lane(
*reinterpret_cast<const Carrier*>(from_obj + CompleteSgprCopyBoundary));
}
/// NOTE: Implicitly start object lifetime. It's better to use std::start_lifetime_at() in this
/// scenario
return *reinterpret_cast<Object*>(to_obj);
template <typename Object, typename = std::enable_if_t<
std::is_class<Object>::value &&
std::is_trivially_copyable<Object>::value>>
__device__ auto amd_wave_read_first_lane(const Object &obj) {
using Size = unsigned;
constexpr Size SgprSize = 4;
constexpr Size ObjectSize = sizeof(Object);
auto *const from_obj = reinterpret_cast<const std::byte *>(&obj);
alignas(Object) std::byte to_obj[ObjectSize];
constexpr Size RemainedSize = ObjectSize % SgprSize;
constexpr Size CompleteSgprCopyBoundary = ObjectSize - RemainedSize;
for (Size offset = 0; offset < CompleteSgprCopyBoundary; offset += SgprSize) {
using Sgpr = detail::get_carrier_t<SgprSize>;
*reinterpret_cast<Sgpr *>(to_obj + offset) = amd_wave_read_first_lane(
*reinterpret_cast<const Sgpr *>(from_obj + offset));
}
if constexpr (0 < RemainedSize) {
using Carrier = detail::get_carrier_t<RemainedSize>;
*reinterpret_cast<Carrier *>(to_obj + CompleteSgprCopyBoundary) =
amd_wave_read_first_lane(*reinterpret_cast<const Carrier *>(
from_obj + CompleteSgprCopyBoundary));
}
/// NOTE: Implicitly start object lifetime. It's better to use
/// std::start_lifetime_at() in this scenario
return *reinterpret_cast<Object *>(to_obj);
}
} // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -256,3 +259,5 @@ struct intrin_wmma_i32_16x16x16_iu8_w64<16, 16, neg_a, neg_b, clamp>
} // namespace ck
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -419,3 +422,5 @@ struct intrin_mfma_f32_16x16x32f8f8<16, 16>
};
} // namespace ck
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -64,3 +67,5 @@ __host__ __device__ constexpr auto make_array()
} // namespace ck
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -78,3 +81,5 @@ __host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& a, const T
} // namespace ck
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -23,3 +26,5 @@ __host__ __device__ PY c_style_pointer_cast(PX p_x)
} // namespace ck
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -51,3 +54,5 @@
#ifdef CK_USE_AMD_MFMA
#include "ck/utility/amd_xdlops.hpp"
#endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
......@@ -156,3 +159,5 @@ __host__ __device__ constexpr auto pick_container_element(const Arr& a, Picks)
} // namespace ck
#endif
#pragma clang diagnostic pop
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment