Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
0b11569f
"setup.cfg" did not exist on "fd158e88e82c3fa848017c62a7eccb49a5c64f78"
Commit
0b11569f
authored
Jul 01, 2022
by
Chao Liu
Browse files
Merge remote-tracking branch 'origin/develop' into batched_gemm_c_permute
parents
e8d3a0fb
fa9a0a5c
Changes
554
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
77 additions
and
2 deletions
+77
-2
include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp
...r_operation/gpu/thread/reduction_functions_threadwise.hpp
+3
-0
include/ck/tensor_operation/gpu/thread/threadwise_contraction_dl.hpp
...tensor_operation/gpu/thread/threadwise_contraction_dl.hpp
+3
-0
include/ck/tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp
.../tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp
+3
-0
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_set.hpp
...nsor_operation/gpu/thread/threadwise_tensor_slice_set.hpp
+3
-0
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
...operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
+20
-2
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp
+3
-0
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r3.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v3r3.hpp
+3
-0
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp
+3
-0
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp
+3
-0
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp
+3
-0
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp
+3
-0
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp
+3
-0
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp
...ration/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp
+3
-0
include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp
include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp
+3
-0
include/ck/utility/amd_address_space.hpp
include/ck/utility/amd_address_space.hpp
+3
-0
include/ck/utility/amd_buffer_addressing.hpp
include/ck/utility/amd_buffer_addressing.hpp
+3
-0
include/ck/utility/amd_inline_asm.hpp
include/ck/utility/amd_inline_asm.hpp
+3
-0
include/ck/utility/amd_llvm_intrinsic.hpp
include/ck/utility/amd_llvm_intrinsic.hpp
+3
-0
include/ck/utility/amd_xdlops.hpp
include/ck/utility/amd_xdlops.hpp
+3
-0
include/ck/utility/array.hpp
include/ck/utility/array.hpp
+3
-0
No files found.
include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/utility/reduction_functions_accumulate.hpp"
...
...
include/ck/tensor_operation/gpu/thread/threadwise_contraction_dl.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/utility/common_header.hpp"
...
...
include/ck/tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_THREADWISE_GEMM_DLOPS_V3_HPP
#define CK_THREADWISE_GEMM_DLOPS_V3_HPP
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_set.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/utility/common_header.hpp"
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/utility/common_header.hpp"
...
...
@@ -233,9 +236,14 @@ template <typename SrcData,
index_t
SrcScalarPerVector
,
index_t
SrcScalarStrideInVector
,
bool
SrcResetCoordinateAfterRun
,
bool
InvalidElementAsNaN
=
false
,
typename
enable_if
<
DstDesc
::
IsKnownAtCompileTime
(),
bool
>
::
type
=
false
>
struct
ThreadwiseTensorSliceTransfer_v2
{
static_assert
((
InvalidElementAsNaN
&&
!
std
::
is_integral
<
DstData
>::
value
)
||
(
!
InvalidElementAsNaN
),
"Filling invalid element as NaN is only for floating point types"
);
static
constexpr
index_t
nDim
=
SliceLengths
::
Size
();
using
Index
=
MultiIndex
<
nDim
>
;
...
...
@@ -315,8 +323,18 @@ struct ThreadwiseTensorSliceTransfer_v2
dst_desc
.
CalculateOffset
(
to_multi_index
(
dst_slice_origin_idx
)
+
src_data_idx
+
i
*
src_scalar_step_in_vector
);
dst_buf
(
Number
<
dst_offset
>
{})
=
type_convert
<
DstData
>
(
src_vector
.
template
AsType
<
SrcData
>()[
i
]);
if
constexpr
(
InvalidElementAsNaN
)
{
dst_buf
(
Number
<
dst_offset
>
{})
=
is_src_valid
?
type_convert
<
DstData
>
(
src_vector
.
template
AsType
<
SrcData
>()[
i
])
:
NumericLimits
<
DstData
>::
QuietNaN
();
}
else
{
dst_buf
(
Number
<
dst_offset
>
{})
=
type_convert
<
DstData
>
(
src_vector
.
template
AsType
<
SrcData
>()[
i
]);
}
});
if
constexpr
(
idx_1d
.
value
!=
num_access
-
1
)
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/utility/common_header.hpp"
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r3.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_THREADWISE_TENSOR_SLICE_TRANSFER_V3R3_HPP
#define CK_THREADWISE_TENSOR_SLICE_TRANSFER_V3R3_HPP
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/utility/common_header.hpp"
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/utility/common_header.hpp"
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/utility/common_header.hpp"
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/utility/common_header.hpp"
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/utility/common_header.hpp"
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/utility/common_header.hpp"
...
...
include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/utility/common_header.hpp"
...
...
include/ck/utility/amd_address_space.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/ck.hpp"
...
...
include/ck/utility/amd_buffer_addressing.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "data_type.hpp"
...
...
include/ck/utility/amd_inline_asm.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_AMD_INLINE_ASM_HPP
#define CK_AMD_INLINE_ASM_HPP
...
...
include/ck/utility/amd_llvm_intrinsic.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_AMD_LLVM_INTRINSIC_HPP
#define CK_AMD_LLVM_INTRINSIC_HPP
...
...
include/ck/utility/amd_xdlops.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_AMD_XDLOPS_HPP
#define CK_AMD_XDLOPS_HPP
...
...
include/ck/utility/array.hpp
View file @
0b11569f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_ARRAY_HPP
#define CK_ARRAY_HPP
...
...
Prev
1
…
6
7
8
9
10
11
12
13
14
…
28
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment