Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
795bea35
Commit
795bea35
authored
Sep 26, 2023
by
Umang Yadav
Browse files
remove unnecessary changes
parent
8216854a
Changes
214
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
20 additions
and
120 deletions
+20
-120
include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_threadwise.hpp
...ion/gpu/device/impl/device_multiple_reduce_threadwise.hpp
+0
-5
include/ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp
...r_operation/gpu/device/impl/device_normalization_impl.hpp
+0
-5
include/ck/tensor_operation/gpu/device/impl/device_normalization_splitk_impl.hpp
...tion/gpu/device/impl/device_normalization_splitk_impl.hpp
+0
-5
include/ck/tensor_operation/gpu/device/impl/device_permute_impl.hpp
.../tensor_operation/gpu/device/impl/device_permute_impl.hpp
+0
-5
include/ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp
...operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp
+0
-5
include/ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp
...eration/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp
+0
-5
include/ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp
...tensor_operation/gpu/device/impl/device_reduce_common.hpp
+0
-5
include/ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp
...or_operation/gpu/device/impl/device_reduce_multiblock.hpp
+0
-5
include/ck/tensor_operation/gpu/device/impl/device_reduce_threadwise.hpp
...or_operation/gpu/device/impl/device_reduce_threadwise.hpp
+0
-5
include/ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp
.../tensor_operation/gpu/device/impl/device_softmax_impl.hpp
+1
-6
include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp
...evice/impl/device_sparse_embeddings_forward_layernorm.hpp
+0
-5
include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp
...mpl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp
+19
-24
include/ck/tensor_operation/gpu/device/masking_specialization.hpp
...ck/tensor_operation/gpu/device/masking_specialization.hpp
+0
-5
include/ck/tensor_operation/gpu/device/matrix_padder.hpp
include/ck/tensor_operation/gpu/device/matrix_padder.hpp
+0
-5
include/ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp
...ensor_operation/gpu/device/reduction_operator_mapping.hpp
+0
-5
include/ck/tensor_operation/gpu/device/tensor_layout.hpp
include/ck/tensor_operation/gpu/device/tensor_layout.hpp
+0
-5
include/ck/tensor_operation/gpu/device/tensor_specialization.hpp
.../ck/tensor_operation/gpu/device/tensor_specialization.hpp
+0
-5
include/ck/tensor_operation/gpu/device/welford_helper.hpp
include/ck/tensor_operation/gpu/device/welford_helper.hpp
+0
-5
include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp
...r_operation/gpu/element/binary_element_wise_operation.hpp
+0
-5
include/ck/tensor_operation/gpu/element/element_wise_operation.hpp
...k/tensor_operation/gpu/element/element_wise_operation.hpp
+0
-5
No files found.
include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_threadwise.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -423,5 +420,3 @@ struct DeviceMultipleReduceThreadWise : public DeviceMultipleReduce<Rank,
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -403,5 +400,3 @@ struct DeviceNormalizationImpl : public DeviceNormalization<XDataType,
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/impl/device_normalization_splitk_impl.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -659,5 +656,3 @@ struct DeviceNormalizationSplitKImpl : public DeviceNormalization<XDataType,
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/impl/device_permute_impl.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -284,5 +281,3 @@ struct DevicePermuteImpl : DevicePermute<NumDim, InDataType, OutDataType, Elemen
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -114,5 +111,3 @@ struct DevicePool2dFwd_NHWC_NHWC : public DevicePool3dFwd_NDHWC_NDHWC<InDataType
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -412,5 +409,3 @@ struct DevicePool3dFwd_NDHWC_NDHWC : public DevicePoolFwd<5,
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -143,5 +140,3 @@ shuffle_tensor_dimensions(const std::array<index_t, Rank>& origLengthsStrides,
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -552,5 +549,3 @@ struct DeviceReduceMultiBlock : public DeviceReduce<InDataType,
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/impl/device_reduce_threadwise.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -394,5 +391,3 @@ struct DeviceReduceThreadWise : public DeviceReduce<InDataType,
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -390,7 +387,7 @@ struct DeviceSoftmaxImpl : public DeviceSoftmax<InDataType,
acc_elementwise_op
);
};
#ifndef __HIPCC_RTC__
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
#endif
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
override
...
...
@@ -419,5 +416,3 @@ struct DeviceSoftmaxImpl : public DeviceSoftmax<InDataType,
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -194,5 +191,3 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -39,25 +36,25 @@ template <typename GridwiseGemm,
bool
HasMainKBlockLoop
>
__global__
void
#if CK_USE_LAUNCH_BOUNDS
__launch_bounds__
(
CK_MAX_THREAD_PER_BLOCK
,
CK_MIN_BLOCK_PER_CU
)
__launch_bounds__
(
CK_MAX_THREAD_PER_BLOCK
,
CK_MIN_BLOCK_PER_CU
)
#endif
kernel_contraction_multiple_d_xdl_cshuffle
(
const
FloatAB
*
__restrict__
p_a_grid
,
const
FloatAB
*
__restrict__
p_b_grid
,
FloatDsPointer
p_ds_grid
,
FloatE
*
__restrict__
p_e_grid
,
const
index_t
batch_count
,
const
AElementwiseOperation
a_element_op
,
const
BElementwiseOperation
b_element_op
,
const
CDEElementwiseOperation
cde_element_op
,
const
AGridDesc_AKB_AK0_M_AK1
a_grid_desc_akb_ak0_m_ak1
,
const
BGridDesc_BKB_BK0_N_BK1
b_grid_desc_bkb_bk0_n_bk1
,
const
DsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
ds_grid_desc_mblock_mperblock_nblock_nperblock
,
const
EGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
e_grid_desc_mblock_mperblock_nblock_nperblock
,
const
ComputePtrOffsetOfBatch
compute_ptr_offset_of_batch
,
const
Block2ETileMap
block_2_etile_map
)
kernel_contraction_multiple_d_xdl_cshuffle
(
const
FloatAB
*
__restrict__
p_a_grid
,
const
FloatAB
*
__restrict__
p_b_grid
,
FloatDsPointer
p_ds_grid
,
FloatE
*
__restrict__
p_e_grid
,
const
index_t
batch_count
,
const
AElementwiseOperation
a_element_op
,
const
BElementwiseOperation
b_element_op
,
const
CDEElementwiseOperation
cde_element_op
,
const
AGridDesc_AKB_AK0_M_AK1
a_grid_desc_akb_ak0_m_ak1
,
const
BGridDesc_BKB_BK0_N_BK1
b_grid_desc_bkb_bk0_n_bk1
,
const
DsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
ds_grid_desc_mblock_mperblock_nblock_nperblock
,
const
EGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
e_grid_desc_mblock_mperblock_nblock_nperblock
,
const
ComputePtrOffsetOfBatch
compute_ptr_offset_of_batch
,
const
Block2ETileMap
block_2_etile_map
)
{
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
...
...
@@ -1078,7 +1075,7 @@ struct DeviceSplitKContractionMultipleD_Xdl_CShuffle
}
#ifndef __HIPCC_RTC__
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
#endif
// polymorphic
std
::
unique_ptr
<
BaseArgument
>
...
...
@@ -1153,5 +1150,3 @@ struct DeviceSplitKContractionMultipleD_Xdl_CShuffle
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/masking_specialization.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -83,5 +80,3 @@ struct C0MatrixMask_impl
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/matrix_padder.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -383,5 +380,3 @@ struct MatrixPadder_v2
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -187,5 +184,3 @@ struct reduce_unary_operator<ReduceTensorOp::NORM2, false, true>
};
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/tensor_layout.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -418,5 +415,3 @@ std::ostream& operator<<(std::ostream& os, const Layout&)
}
// namespace tensor_layout
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/tensor_specialization.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -29,5 +26,3 @@ inline std::string getTensorSpecializationString(const TensorSpecialization& s)
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/welford_helper.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -90,5 +87,3 @@ struct GetReduceCountPerThreadForMultiblockWelford
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -326,5 +323,3 @@ struct AddFastGelu
}
// namespace element_wise
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/element/element_wise_operation.hpp
View file @
795bea35
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -395,5 +392,3 @@ struct UnaryTypeConvert<ck::bhalf_t, float>
}
// namespace element_wise
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
Prev
1
…
3
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment