Commit 253f942b authored by Umang Yadav's avatar Umang Yadav
Browse files

changes to make it compile

parent 8f9c0243
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -258,3 +261,5 @@ struct ThreadwiseTensorSliceTransfer_v6r2 ...@@ -258,3 +261,5 @@ struct ThreadwiseTensorSliceTransfer_v6r2
}; };
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -308,3 +311,5 @@ struct ThreadwiseTensorSliceTransfer_v6r3 ...@@ -308,3 +311,5 @@ struct ThreadwiseTensorSliceTransfer_v6r3
}; };
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -296,3 +299,5 @@ struct ThreadwiseTensorSliceTransfer_v7 ...@@ -296,3 +299,5 @@ struct ThreadwiseTensorSliceTransfer_v7
}; };
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -135,3 +138,5 @@ struct ThreadwiseWelfordMerge ...@@ -135,3 +138,5 @@ struct ThreadwiseWelfordMerge
}; };
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -505,3 +508,5 @@ struct WmmaGemm ...@@ -505,3 +508,5 @@ struct WmmaGemm
}; };
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -965,3 +968,5 @@ struct XdlopsGemm ...@@ -965,3 +968,5 @@ struct XdlopsGemm
}; };
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -286,3 +289,5 @@ struct TransformBatchedContractionContractionToBatchedGemmGemm ...@@ -286,3 +289,5 @@ struct TransformBatchedContractionContractionToBatchedGemmGemm
} // namespace tensor_operation } // namespace tensor_operation
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -1132,3 +1135,5 @@ struct TransformConvBwdDataToGemm_v1 ...@@ -1132,3 +1135,5 @@ struct TransformConvBwdDataToGemm_v1
} // namespace tensor_operation } // namespace tensor_operation
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -878,3 +881,5 @@ struct TransformConvFwdToGemm ...@@ -878,3 +881,5 @@ struct TransformConvFwdToGemm
} // namespace tensor_operation } // namespace tensor_operation
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -43,3 +46,5 @@ __host__ __device__ T CK_CONSTANT_ADDRESS_SPACE* cast_pointer_to_constant_addres ...@@ -43,3 +46,5 @@ __host__ __device__ T CK_CONSTANT_ADDRESS_SPACE* cast_pointer_to_constant_addres
} }
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -1312,3 +1315,5 @@ amd_buffer_atomic_max(const typename vector_type_maker<T, N>::type::type src_thr ...@@ -1312,3 +1315,5 @@ amd_buffer_atomic_max(const typename vector_type_maker<T, N>::type::type src_thr
} }
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -369,3 +372,5 @@ __device__ void amd_assembly_wmma_f32_16x16x16_f16_w32(half16_t a, half16_t b, f ...@@ -369,3 +372,5 @@ __device__ void amd_assembly_wmma_f32_16x16x16_f16_w32(half16_t a, half16_t b, f
} // namespace ck } // namespace ck
#endif #endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
...@@ -7,87 +10,75 @@ ...@@ -7,87 +10,75 @@
#include "ck/utility/functional2.hpp" #include "ck/utility/functional2.hpp"
#include "ck/utility/math.hpp" #include "ck/utility/math.hpp"
#ifndef __HIPCC_RTC__
#include <array> #include <array>
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <type_traits> #include <type_traits>
#endif
namespace ck { namespace ck {
namespace detail { namespace detail {
template <unsigned SizeInBytes> template <unsigned SizeInBytes> struct get_carrier;
struct get_carrier;
template <> template <> struct get_carrier<1> {
struct get_carrier<1> using type = uint8_t;
{
using type = uint8_t;
}; };
template <> template <> struct get_carrier<2> {
struct get_carrier<2> using type = uint16_t;
{
using type = uint16_t;
}; };
template <> template <> struct get_carrier<3> {
struct get_carrier<3> using type = class carrier {
{ using value_type = uint32_t;
using type = class carrier
{ // std::array<std::byte, 3> bytes;
using value_type = uint32_t; std::byte bytes[3];
static_assert(sizeof(bytes) <= sizeof(value_type));
std::array<std::byte, 3> bytes;
static_assert(sizeof(bytes) <= sizeof(value_type)); // replacement of host std::copy_n()
template <typename InputIterator, typename Size, typename OutputIterator>
// replacement of host std::copy_n() __device__ static OutputIterator copy_n(InputIterator from, Size size,
template <typename InputIterator, typename Size, typename OutputIterator> OutputIterator to) {
__device__ static OutputIterator copy_n(InputIterator from, Size size, OutputIterator to) if (0 < size) {
{ *to = *from;
if(0 < size) ++to;
{ for (Size count = 1; count < size; ++count) {
*to = *from; *to = *++from;
++to; ++to;
for(Size count = 1; count < size; ++count)
{
*to = *++from;
++to;
}
}
return to;
} }
}
// method to trigger template substitution failure return to;
__device__ carrier(const carrier& other) noexcept }
{
copy_n(other.bytes.begin(), bytes.size(), bytes.begin());
}
public: // method to trigger template substitution failure
__device__ carrier& operator=(value_type value) noexcept __device__ carrier(const carrier &other) noexcept {
{ copy_n(&other.bytes[0], 3, &bytes[0]);
copy_n(reinterpret_cast<const std::byte*>(&value), bytes.size(), bytes.begin()); }
return *this; public:
} __device__ carrier &operator=(value_type value) noexcept {
copy_n(reinterpret_cast<const std::byte *>(&value), 3, &bytes[0]);
__device__ operator value_type() const noexcept return *this;
{ }
std::byte result[sizeof(value_type)];
copy_n(bytes.begin(), bytes.size(), result); __device__ operator value_type() const noexcept {
std::byte result[sizeof(value_type)];
return *reinterpret_cast<const value_type*>(result); copy_n(&bytes[0], 3, result);
}
}; return *reinterpret_cast<const value_type *>(result);
}
};
}; };
static_assert(sizeof(get_carrier<3>::type) == 3); static_assert(sizeof(get_carrier<3>::type) == 3);
template <> template <> struct get_carrier<4> {
struct get_carrier<4> using type = uint32_t;
{
using type = uint32_t;
}; };
template <unsigned SizeInBytes> template <unsigned SizeInBytes>
...@@ -95,44 +86,43 @@ using get_carrier_t = typename get_carrier<SizeInBytes>::type; ...@@ -95,44 +86,43 @@ using get_carrier_t = typename get_carrier<SizeInBytes>::type;
} // namespace detail } // namespace detail
__device__ inline int32_t amd_wave_read_first_lane(int32_t value) __device__ inline int32_t amd_wave_read_first_lane(int32_t value) {
{ return __builtin_amdgcn_readfirstlane(value);
return __builtin_amdgcn_readfirstlane(value);
} }
template < template <typename Object, typename = std::enable_if_t<
typename Object, std::is_class<Object>::value &&
typename = std::enable_if_t<std::is_class_v<Object> && std::is_trivially_copyable_v<Object>>> std::is_trivially_copyable<Object>::value>>
__device__ auto amd_wave_read_first_lane(const Object& obj) __device__ auto amd_wave_read_first_lane(const Object &obj) {
{ using Size = unsigned;
using Size = unsigned; constexpr Size SgprSize = 4;
constexpr Size SgprSize = 4; constexpr Size ObjectSize = sizeof(Object);
constexpr Size ObjectSize = sizeof(Object);
auto *const from_obj = reinterpret_cast<const std::byte *>(&obj);
auto* const from_obj = reinterpret_cast<const std::byte*>(&obj); alignas(Object) std::byte to_obj[ObjectSize];
alignas(Object) std::byte to_obj[ObjectSize];
constexpr Size RemainedSize = ObjectSize % SgprSize;
constexpr Size RemainedSize = ObjectSize % SgprSize; constexpr Size CompleteSgprCopyBoundary = ObjectSize - RemainedSize;
constexpr Size CompleteSgprCopyBoundary = ObjectSize - RemainedSize; for (Size offset = 0; offset < CompleteSgprCopyBoundary; offset += SgprSize) {
for(Size offset = 0; offset < CompleteSgprCopyBoundary; offset += SgprSize) using Sgpr = detail::get_carrier_t<SgprSize>;
{
using Sgpr = detail::get_carrier_t<SgprSize>; *reinterpret_cast<Sgpr *>(to_obj + offset) = amd_wave_read_first_lane(
*reinterpret_cast<const Sgpr *>(from_obj + offset));
*reinterpret_cast<Sgpr*>(to_obj + offset) = }
amd_wave_read_first_lane(*reinterpret_cast<const Sgpr*>(from_obj + offset));
} if constexpr (0 < RemainedSize) {
using Carrier = detail::get_carrier_t<RemainedSize>;
if constexpr(0 < RemainedSize)
{ *reinterpret_cast<Carrier *>(to_obj + CompleteSgprCopyBoundary) =
using Carrier = detail::get_carrier_t<RemainedSize>; amd_wave_read_first_lane(*reinterpret_cast<const Carrier *>(
from_obj + CompleteSgprCopyBoundary));
*reinterpret_cast<Carrier*>(to_obj + CompleteSgprCopyBoundary) = amd_wave_read_first_lane( }
*reinterpret_cast<const Carrier*>(from_obj + CompleteSgprCopyBoundary));
} /// NOTE: Implicitly start object lifetime. It's better to use
/// std::start_lifetime_at() in this scenario
/// NOTE: Implicitly start object lifetime. It's better to use std::start_lifetime_at() in this return *reinterpret_cast<Object *>(to_obj);
/// scenario
return *reinterpret_cast<Object*>(to_obj);
} }
} // namespace ck } // namespace ck
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -256,3 +259,5 @@ struct intrin_wmma_i32_16x16x16_iu8_w64<16, 16, neg_a, neg_b, clamp> ...@@ -256,3 +259,5 @@ struct intrin_wmma_i32_16x16x16_iu8_w64<16, 16, neg_a, neg_b, clamp>
} // namespace ck } // namespace ck
#endif #endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -419,3 +422,5 @@ struct intrin_mfma_f32_16x16x32f8f8<16, 16> ...@@ -419,3 +422,5 @@ struct intrin_mfma_f32_16x16x32f8f8<16, 16>
}; };
} // namespace ck } // namespace ck
#endif #endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -64,3 +67,5 @@ __host__ __device__ constexpr auto make_array() ...@@ -64,3 +67,5 @@ __host__ __device__ constexpr auto make_array()
} // namespace ck } // namespace ck
#endif #endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -78,3 +81,5 @@ __host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& a, const T ...@@ -78,3 +81,5 @@ __host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& a, const T
} // namespace ck } // namespace ck
#endif #endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -23,3 +26,5 @@ __host__ __device__ PY c_style_pointer_cast(PX p_x) ...@@ -23,3 +26,5 @@ __host__ __device__ PY c_style_pointer_cast(PX p_x)
} // namespace ck } // namespace ck
#endif #endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -51,3 +54,5 @@ ...@@ -51,3 +54,5 @@
#ifdef CK_USE_AMD_MFMA #ifdef CK_USE_AMD_MFMA
#include "ck/utility/amd_xdlops.hpp" #include "ck/utility/amd_xdlops.hpp"
#endif #endif
#pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...@@ -156,3 +159,5 @@ __host__ __device__ constexpr auto pick_container_element(const Arr& a, Picks) ...@@ -156,3 +159,5 @@ __host__ __device__ constexpr auto pick_container_element(const Arr& a, Picks)
} // namespace ck } // namespace ck
#endif #endif
#pragma clang diagnostic pop
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment