Commit cba8f7f2 authored by Anthony Chang's avatar Anthony Chang
Browse files

Merge remote-tracking branch 'upstream/develop' into gemm-layernorm-4

parents cc50b687 b653c5eb
#ifndef CK_THREADWISE_TENSOR_SLICE_TRANSFER_V4R1_HPP // SPDX-License-Identifier: MIT
#define CK_THREADWISE_TENSOR_SLICE_TRANSFER_V4R1_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "common_header.hpp" #pragma once
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/utility/common_header.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
namespace ck { namespace ck {
// Assume: // Assume:
...@@ -171,4 +173,3 @@ struct ThreadwiseTensorSliceTransfer_v4r1 ...@@ -171,4 +173,3 @@ struct ThreadwiseTensorSliceTransfer_v4r1
}; };
} // namespace ck } // namespace ck
#endif
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include "common_header.hpp" #include "ck/utility/common_header.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_description/tensor_space_filling_curve.hpp"
namespace ck { namespace ck {
......
#ifndef CK_THREADWISE_TENSOR_SLICE_TRANSFER_V6R1_HPP // SPDX-License-Identifier: MIT
#define CK_THREADWISE_TENSOR_SLICE_TRANSFER_V6R1_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "common_header.hpp" #pragma once
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/utility/common_header.hpp"
#include "tensor_space_filling_curve.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_description/tensor_space_filling_curve.hpp"
namespace ck { namespace ck {
...@@ -206,7 +208,6 @@ struct ThreadwiseTensorSliceTransfer_v6r1 ...@@ -206,7 +208,6 @@ struct ThreadwiseTensorSliceTransfer_v6r1
SrcCoord src_coord_; SrcCoord src_coord_;
DstCoord dst_coord_; DstCoord dst_coord_;
const ElementwiseOperation element_op_; const ElementwiseOperation element_op_;
}; // namespace ck };
} // namespace ck } // namespace ck
#endif
#ifndef CK_THREADWISE_TENSOR_SLICE_TRANSFER_V6R2_HPP // SPDX-License-Identifier: MIT
#define CK_THREADWISE_TENSOR_SLICE_TRANSFER_V6R2_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "common_header.hpp" #pragma once
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/utility/common_header.hpp"
#include "tensor_space_filling_curve.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_description/tensor_space_filling_curve.hpp"
namespace ck { namespace ck {
...@@ -256,4 +258,3 @@ struct ThreadwiseTensorSliceTransfer_v6r2 ...@@ -256,4 +258,3 @@ struct ThreadwiseTensorSliceTransfer_v6r2
}; };
} // namespace ck } // namespace ck
#endif
#ifndef CK_THREADWISE_TENSOR_SLICE_TRANSFER_V6R3_HPP // SPDX-License-Identifier: MIT
#define CK_THREADWISE_TENSOR_SLICE_TRANSFER_V6R3_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "common_header.hpp" #pragma once
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/utility/common_header.hpp"
#include "tensor_space_filling_curve.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_description/tensor_space_filling_curve.hpp"
namespace ck { namespace ck {
...@@ -306,4 +308,3 @@ struct ThreadwiseTensorSliceTransfer_v6r3 ...@@ -306,4 +308,3 @@ struct ThreadwiseTensorSliceTransfer_v6r3
}; };
} // namespace ck } // namespace ck
#endif
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include "common_header.hpp" #include "ck/utility/common_header.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "tensor_space_filling_curve.hpp" #include "ck/tensor_description/tensor_space_filling_curve.hpp"
namespace ck { namespace ck {
......
#ifndef CK_XDLOPS_GEMM_HPP // SPDX-License-Identifier: MIT
#define CK_XDLOPS_GEMM_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "common_header.hpp" #pragma once
#include "math.hpp"
#include "amd_xdlops.hpp" #include "ck/utility/common_header.hpp"
#include "ck/utility/math.hpp"
#include "ck/utility/amd_xdlops.hpp"
namespace ck { namespace ck {
...@@ -786,4 +788,3 @@ struct XdlopsGemm ...@@ -786,4 +788,3 @@ struct XdlopsGemm
}; };
} // namespace ck } // namespace ck
#endif
#ifndef CK_AMD_ADDRESS_SPACE_HPP // SPDX-License-Identifier: MIT
#define CK_AMD_ADDRESS_SPACE_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "config.hpp" #pragma once
#include "ck/ck.hpp"
#include "c_style_pointer_cast.hpp" #include "c_style_pointer_cast.hpp"
// Address Space for AMDGCN // Address Space for AMDGCN
...@@ -41,4 +43,3 @@ __host__ __device__ T CK_CONSTANT_ADDRESS_SPACE* cast_pointer_to_constant_addres ...@@ -41,4 +43,3 @@ __host__ __device__ T CK_CONSTANT_ADDRESS_SPACE* cast_pointer_to_constant_addres
} }
} // namespace ck } // namespace ck
#endif
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include "data_type.hpp" #include "data_type.hpp"
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_AMD_INLINE_ASM_HPP #ifndef CK_AMD_INLINE_ASM_HPP
#define CK_AMD_INLINE_ASM_HPP #define CK_AMD_INLINE_ASM_HPP
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_AMD_LLVM_INTRINSIC_HPP #ifndef CK_AMD_LLVM_INTRINSIC_HPP
#define CK_AMD_LLVM_INTRINSIC_HPP #define CK_AMD_LLVM_INTRINSIC_HPP
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_AMD_XDLOPS_HPP #ifndef CK_AMD_XDLOPS_HPP
#define CK_AMD_XDLOPS_HPP #define CK_AMD_XDLOPS_HPP
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_ARRAY_HPP #ifndef CK_ARRAY_HPP
#define CK_ARRAY_HPP #define CK_ARRAY_HPP
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_ARRAY_MULTI_INDEX_HPP #ifndef CK_ARRAY_MULTI_INDEX_HPP
#define CK_ARRAY_MULTI_INDEX_HPP #define CK_ARRAY_MULTI_INDEX_HPP
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_C_STYLE_POINTER_CAST_HPP #ifndef CK_C_STYLE_POINTER_CAST_HPP
#define CK_C_STYLE_POINTER_CAST_HPP #define CK_C_STYLE_POINTER_CAST_HPP
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include "config.hpp"
#include "array.hpp"
#include "container_helper.hpp"
#include "statically_indexed_array.hpp"
#include "container_element_picker.hpp"
#include "multi_index.hpp"
#include "data_type.hpp"
#include "data_type_enum.hpp"
#include "data_type_enum_helper.hpp"
#include "functional.hpp"
#include "functional2.hpp"
#include "functional3.hpp"
#include "functional4.hpp"
#include "enable_if.hpp"
#include "ignore.hpp"
#include "integral_constant.hpp"
#include "math.hpp"
#include "number.hpp"
#include "sequence.hpp"
#include "sequence_helper.hpp"
#include "tuple.hpp"
#include "tuple_helper.hpp"
#include "type.hpp"
#include "magic_division.hpp"
#include "c_style_pointer_cast.hpp"
#include "is_known_at_compile_time.hpp"
#include "transpose_vectors.hpp"
#include "inner_product.hpp"
#include "element_wise_operation.hpp"
#include "thread_group.hpp"
#include "debug.hpp"
#include "amd_buffer_addressing.hpp" #include "ck/ck.hpp"
#include "generic_memory_space_atomic.hpp" #include "ck/utility/array.hpp"
#include "get_id.hpp" #include "ck/utility/container_helper.hpp"
#include "synchronization.hpp" #include "ck/utility/statically_indexed_array.hpp"
#include "amd_address_space.hpp" #include "ck/utility/container_element_picker.hpp"
#include "static_buffer.hpp" #include "ck/utility/multi_index.hpp"
#include "dynamic_buffer.hpp" #include "ck/utility/data_type.hpp"
#include "ck/utility/functional.hpp"
#include "ck/utility/functional2.hpp"
#include "ck/utility/functional3.hpp"
#include "ck/utility/functional4.hpp"
#include "ck/utility/enable_if.hpp"
#include "ck/utility/ignore.hpp"
#include "ck/utility/integral_constant.hpp"
#include "ck/utility/math.hpp"
#include "ck/utility/number.hpp"
#include "ck/utility/sequence.hpp"
#include "ck/utility/sequence_helper.hpp"
#include "ck/utility/tuple.hpp"
#include "ck/utility/tuple_helper.hpp"
#include "ck/utility/type.hpp"
#include "ck/utility/magic_division.hpp"
#include "ck/utility/c_style_pointer_cast.hpp"
#include "ck/utility/is_known_at_compile_time.hpp"
#include "ck/utility/transpose_vectors.hpp"
#include "ck/utility/inner_product.hpp"
#include "ck/utility/thread_group.hpp"
#include "ck/utility/debug.hpp"
#include "ck/utility/amd_buffer_addressing.hpp"
#include "ck/utility/generic_memory_space_atomic.hpp"
#include "ck/utility/get_id.hpp"
#include "ck/utility/thread_group.hpp"
#include "ck/utility/synchronization.hpp"
#include "ck/utility/amd_address_space.hpp"
#include "ck/utility/static_buffer.hpp"
#include "ck/utility/dynamic_buffer.hpp"
// TODO: remove this // TODO: remove this
#if CK_USE_AMD_INLINE_ASM #if CK_USE_AMD_INLINE_ASM
#include "amd_inline_asm.hpp" #include "ck/utility/amd_inline_asm.hpp"
#endif #endif
#ifdef CK_USE_AMD_MFMA #ifdef CK_USE_AMD_MFMA
#include "amd_xdlops.hpp" #include "ck/utility/amd_xdlops.hpp"
#endif #endif
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_CONTAINER_ELEMENT_PICKER_HPP #ifndef CK_CONTAINER_ELEMENT_PICKER_HPP
#define CK_CONTAINER_ELEMENT_PICKER_HPP #define CK_CONTAINER_ELEMENT_PICKER_HPP
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_CONTAINER_HELPER_HPP #ifndef CK_CONTAINER_HELPER_HPP
#define CK_CONTAINER_HELPER_HPP #define CK_CONTAINER_HELPER_HPP
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include "statically_indexed_array.hpp" #include "ck/utility/statically_indexed_array.hpp"
namespace ck { namespace ck {
...@@ -1001,6 +1004,11 @@ struct NumericLimits ...@@ -1001,6 +1004,11 @@ struct NumericLimits
__host__ __device__ static constexpr T Max() { return std::numeric_limits<T>::max(); } __host__ __device__ static constexpr T Max() { return std::numeric_limits<T>::max(); }
__host__ __device__ static constexpr T Lowest() { return std::numeric_limits<T>::lowest(); } __host__ __device__ static constexpr T Lowest() { return std::numeric_limits<T>::lowest(); }
__host__ __device__ static constexpr T QuietNaN()
{
return std::numeric_limits<T>::quiet_NaN();
}
}; };
template <> template <>
...@@ -1009,12 +1017,15 @@ struct NumericLimits<half_t> ...@@ -1009,12 +1017,15 @@ struct NumericLimits<half_t>
static constexpr unsigned short binary_min = 0x0400; static constexpr unsigned short binary_min = 0x0400;
static constexpr unsigned short binary_max = 0x7BFF; static constexpr unsigned short binary_max = 0x7BFF;
static constexpr unsigned short binary_lowest = 0xFBFF; static constexpr unsigned short binary_lowest = 0xFBFF;
static constexpr unsigned short binary_qnan = 0x7FFF;
__host__ __device__ static constexpr half_t Min() { return bit_cast<half_t>(binary_min); } __host__ __device__ static constexpr half_t Min() { return bit_cast<half_t>(binary_min); }
__host__ __device__ static constexpr half_t Max() { return bit_cast<half_t>(binary_max); } __host__ __device__ static constexpr half_t Max() { return bit_cast<half_t>(binary_max); }
__host__ __device__ static constexpr half_t Lowest() { return bit_cast<half_t>(binary_lowest); } __host__ __device__ static constexpr half_t Lowest() { return bit_cast<half_t>(binary_lowest); }
__host__ __device__ static constexpr half_t QuietNaN() { return bit_cast<half_t>(binary_qnan); }
}; };
} // namespace ck } // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef UTILITY_DEBUG_HPP #ifndef UTILITY_DEBUG_HPP
#define UTILITY_DEBUG_HPP #define UTILITY_DEBUG_HPP
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment