Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
253f942b
Commit
253f942b
authored
Sep 22, 2023
by
Umang Yadav
Browse files
changes to make it compile
parent
8f9c0243
Changes
275
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
144 additions
and
18 deletions
+144
-18
include/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp
...ude/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp
+5
-0
include/ck/tensor_operation/gpu/device/device_grouped_contraction_multiple_d.hpp
...tion/gpu/device/device_grouped_contraction_multiple_d.hpp
+5
-0
include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp
...on/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp
+5
-0
include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp
...r_operation/gpu/device/device_grouped_conv_bwd_weight.hpp
+5
-0
include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp
...k/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp
+5
-0
include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp
...eration/gpu/device/device_grouped_conv_fwd_multiple_d.hpp
+5
-0
include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp
...de/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp
+5
-0
include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp
...n/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp
+5
-0
include/ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp
...ensor_operation/gpu/device/device_grouped_gemm_splitk.hpp
+5
-0
include/ck/tensor_operation/gpu/device/device_multiple_reduce.hpp
...ck/tensor_operation/gpu/device/device_multiple_reduce.hpp
+5
-0
include/ck/tensor_operation/gpu/device/device_normalization.hpp
...e/ck/tensor_operation/gpu/device/device_normalization.hpp
+5
-0
include/ck/tensor_operation/gpu/device/device_permute.hpp
include/ck/tensor_operation/gpu/device/device_permute.hpp
+7
-0
include/ck/tensor_operation/gpu/device/device_pool_fwd.hpp
include/ck/tensor_operation/gpu/device/device_pool_fwd.hpp
+5
-0
include/ck/tensor_operation/gpu/device/device_reduce.hpp
include/ck/tensor_operation/gpu/device/device_reduce.hpp
+5
-0
include/ck/tensor_operation/gpu/device/device_softmax.hpp
include/ck/tensor_operation/gpu/device/device_softmax.hpp
+5
-0
include/ck/tensor_operation/gpu/device/device_splitk_contraction_multiple_d.hpp
...ation/gpu/device/device_splitk_contraction_multiple_d.hpp
+5
-0
include/ck/tensor_operation/gpu/device/gemm_specialization.hpp
...de/ck/tensor_operation/gpu/device/gemm_specialization.hpp
+7
-1
include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp
...l/device_batched_contraction_multiple_d_wmma_cshuffle.hpp
+17
-6
include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp
...pl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp
+18
-6
include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp
...ion/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp
+20
-5
No files found.
include/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -62,3 +65,5 @@ using DeviceGemmSplitKPtr = std::unique_ptr<DeviceGemmSplitK<ALayout,
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/device_grouped_contraction_multiple_d.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -70,3 +73,5 @@ struct DeviceGroupedContractionMultipleD : public BaseOperator
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -65,3 +68,5 @@ struct DeviceGroupedConvBwdDataMultipleD : public BaseOperator
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -48,3 +51,5 @@ struct DeviceGroupedConvBwdWeight : public BaseOperator
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -53,3 +56,5 @@ struct DeviceGroupedConvFwd : public BaseOperator
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -63,3 +66,5 @@ struct DeviceGroupedConvFwdMultipleD : public BaseOperator
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -53,3 +56,5 @@ struct DeviceGroupedGemm : public BaseOperator
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -73,3 +76,5 @@ struct DeviceGroupedGemmSoftmaxGemmPermute : public BaseOperator
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
#pragma once
#include <iostream>
#include <vector>
...
...
@@ -37,3 +40,5 @@ struct DeviceGroupedGemmSplitK : public DeviceGroupedGemm<ALayout,
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/device_multiple_reduce.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -56,3 +59,5 @@ using DeviceMultipleReducePtr = std::unique_ptr<DeviceMultipleReduce<Rank,
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/device_normalization.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -60,3 +63,5 @@ using DeviceNormalizationPtr = std::unique_ptr<DeviceNormalization<XDataType,
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/device_permute.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#ifndef __HIPCC_RTC__
#include <array>
#include <memory>
#include <type_traits>
#endif
#include "ck/tensor_operation/gpu/device/device_base.hpp"
...
...
@@ -34,3 +39,5 @@ struct DevicePermute : BaseOperator
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/device_pool_fwd.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -45,3 +48,5 @@ struct DevicePoolFwd : public BaseOperator
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/device_reduce.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -69,3 +72,5 @@ using DeviceReducePtr = std::unique_ptr<DeviceReduce<InDataType,
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/device_softmax.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -70,3 +73,5 @@ using DeviceSoftmaxPtr = std::unique_ptr<DeviceSoftmax<InDataType,
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/device_splitk_contraction_multiple_d.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -63,3 +66,5 @@ struct DeviceSplitKContractionMultipleD : public BaseOperator
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/gemm_specialization.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -28,7 +31,7 @@ enum struct GemmSpecialization
NKOPadding
,
MNKOPadding
,
};
#ifndef __HIPCC_RTC__
inline
std
::
string
getGemmSpecializationString
(
const
GemmSpecialization
&
s
)
{
switch
(
s
)
...
...
@@ -52,7 +55,10 @@ inline std::string getGemmSpecializationString(const GemmSpecialization& s)
default:
return
"Unrecognized specialization!"
;
}
}
#endif
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#ifndef __HIPCC_RTC__
#include <iostream>
#include <sstream>
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp"
#endif
#include "ck/utility/common_header.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
...
...
@@ -15,8 +23,6 @@
#include "ck/tensor_operation/gpu/device/tensor_specialization.hpp"
#include "ck/tensor_operation/gpu/device/matrix_padder.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
@@ -687,7 +693,7 @@ struct DeviceBatchedContractionMultipleD_Wmma_CShuffle
// Batch Offset
ComputePtrOffsetOfStridedBatch
compute_ptr_offset_of_batch_
;
};
#ifndef __HIPCC_RTC__
// Invoker
struct
Invoker
:
public
BaseInvoker
{
...
...
@@ -761,13 +767,14 @@ struct DeviceBatchedContractionMultipleD_Wmma_CShuffle
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
stream_config
);
}
};
#endif
static
constexpr
bool
IsValidCompilationParameter
()
{
// TODO: properly implement this check
return
true
;
}
#ifndef __HIPCC_RTC__
static
bool
IsSupportedArgument
(
const
Argument
&
arg
)
{
if
(
ck
::
get_device_name
()
==
"gfx1100"
||
ck
::
get_device_name
()
==
"gfx1101"
||
...
...
@@ -869,7 +876,7 @@ struct DeviceBatchedContractionMultipleD_Wmma_CShuffle
{
return
IsSupportedArgument
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
#endif
static
auto
MakeArgument
(
const
void
*
p_a
,
const
void
*
p_b
,
...
...
@@ -943,7 +950,8 @@ struct DeviceBatchedContractionMultipleD_Wmma_CShuffle
cde_element_op
);
}
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
#ifndef __HIPCC_RTC__
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
// polymorphic
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
override
...
...
@@ -985,8 +993,11 @@ struct DeviceBatchedContractionMultipleD_Wmma_CShuffle
return
str
.
str
();
}
#endif
};
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#ifndef __HIPCC_RTC__
#include <iostream>
#include <sstream>
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp"
#endif
#include "ck/utility/common_header.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
...
...
@@ -15,8 +23,6 @@
#include "ck/tensor_operation/gpu/device/tensor_specialization.hpp"
#include "ck/tensor_operation/gpu/device/matrix_padder.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp"
namespace
ck
{
...
...
@@ -759,6 +765,7 @@ struct DeviceBatchedContractionMultipleD_Xdl_CShuffle
ComputePtrOffsetOfStridedBatch
compute_ptr_offset_of_batch_
;
};
#ifndef __HIPCC_RTC__
// Invoker
struct
Invoker
:
public
BaseInvoker
{
...
...
@@ -841,7 +848,6 @@ struct DeviceBatchedContractionMultipleD_Xdl_CShuffle
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
stream_config
);
}
};
static
bool
IsSupportedArgument
(
const
Argument
&
arg
)
{
if
(
!
ck
::
is_xdl_supported
())
...
...
@@ -935,7 +941,7 @@ struct DeviceBatchedContractionMultipleD_Xdl_CShuffle
{
return
IsSupportedArgument
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
#endif
static
auto
MakeArgument
(
const
void
*
p_a
,
const
void
*
p_b
,
...
...
@@ -970,7 +976,10 @@ struct DeviceBatchedContractionMultipleD_Xdl_CShuffle
cde_element_op
};
}
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
#ifndef __HIPCC_RTC__
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
#endif
// polymorphic
std
::
unique_ptr
<
BaseArgument
>
...
...
@@ -1006,7 +1015,7 @@ struct DeviceBatchedContractionMultipleD_Xdl_CShuffle
b_element_op
,
cde_element_op
);
}
#ifndef __HIPCC_RTC__
// polymorphic
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
override
{
...
...
@@ -1038,8 +1047,11 @@ struct DeviceBatchedContractionMultipleD_Xdl_CShuffle
return
str
.
str
();
}
#endif
};
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
#pragma once
#ifndef __HIPCC_RTC__
#include <iostream>
#include <sstream>
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp"
#endif
#include "ck/utility/common_header.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
...
...
@@ -11,8 +19,6 @@
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/matrix_padder.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
@@ -490,6 +496,7 @@ struct DeviceBatchedGemmEPermuteXdl : public DeviceBatchedGemmEPermute<ALayout,
CDEElementwiseOperation
cde_element_op_
;
};
#ifndef __HIPCC_RTC__
// Invoker
struct
Invoker
:
public
BaseInvoker
{
...
...
@@ -565,13 +572,14 @@ struct DeviceBatchedGemmEPermuteXdl : public DeviceBatchedGemmEPermute<ALayout,
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
stream_config
);
}
};
#endif
static
constexpr
bool
IsValidCompilationParameter
()
{
// TODO: properly implement this check
return
true
;
}
#ifndef __HIPCC_RTC__
static
bool
IsSupportedArgument
(
const
Argument
&
arg
)
{
if
(
!
ck
::
is_xdl_supported
())
...
...
@@ -591,7 +599,7 @@ struct DeviceBatchedGemmEPermuteXdl : public DeviceBatchedGemmEPermute<ALayout,
{
return
IsSupportedArgument
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
#endif
static
auto
MakeArgument
(
const
ADataType
*
p_a
,
const
BDataType
*
p_b
,
EDataType
*
p_e
,
...
...
@@ -625,7 +633,10 @@ struct DeviceBatchedGemmEPermuteXdl : public DeviceBatchedGemmEPermute<ALayout,
cde_element_op
};
}
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
#ifndef __HIPCC_RTC__
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
#endif
// polymorphic
std
::
unique_ptr
<
BaseArgument
>
...
...
@@ -662,6 +673,7 @@ struct DeviceBatchedGemmEPermuteXdl : public DeviceBatchedGemmEPermute<ALayout,
cde_element_op
);
}
#ifndef __HIPCC_RTC__
// polymorphic
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
override
{
...
...
@@ -685,8 +697,11 @@ struct DeviceBatchedGemmEPermuteXdl : public DeviceBatchedGemmEPermute<ALayout,
return
str
.
str
();
}
#endif
};
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
#pragma clang diagnostic pop
Prev
1
2
3
4
5
6
7
8
…
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment