Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
860433ea
Commit
860433ea
authored
Dec 16, 2024
by
root
Browse files
Remove unused mgroup
parent
ebb5522c
Changes
3
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
85 additions
and
149 deletions
+85
-149
example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp
..._weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp
+0
-1
include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp
...vice/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp
+4
-67
library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp
...wd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp
+81
-81
No files found.
example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp
View file @
860433ea
...
@@ -69,7 +69,6 @@ using DeviceConvBwdWeightInstance =
...
@@ -69,7 +69,6 @@ using DeviceConvBwdWeightInstance =
2
,
// CBlockTransferScalarPerVector_NWaveNPerXdl
2
,
// CBlockTransferScalarPerVector_NWaveNPerXdl
ck
::
BlockGemmPipelineScheduler
::
Intrawave
,
// BlkGemmPipeSched
ck
::
BlockGemmPipelineScheduler
::
Intrawave
,
// BlkGemmPipeSched
ck
::
BlockGemmPipelineVersion
::
v1
,
// BlkGemmPipelineVer
ck
::
BlockGemmPipelineVersion
::
v1
,
// BlkGemmPipelineVer
1
,
// NumGroupsToMerge
ComputeTypeA
,
// ComputeTypeA
ComputeTypeA
,
// ComputeTypeA
ComputeTypeB
>
;
// ComputeTypeB
ComputeTypeB
>
;
// ComputeTypeB
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp
View file @
860433ea
...
@@ -37,7 +37,6 @@ template <typename GridwiseGemm,
...
@@ -37,7 +37,6 @@ template <typename GridwiseGemm,
typename
BGridDesc_BK0_N_K1
,
typename
BGridDesc_BK0_N_K1
,
typename
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
,
typename
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
,
typename
ComputePtrOffsetOfBatch
,
typename
ComputePtrOffsetOfBatch
,
index_t
NumGroupsToMerge
,
bool
HasMainKBlockLoop
,
bool
HasMainKBlockLoop
,
InMemoryDataOperationEnum
CGlobalMemoryDataOperation
,
InMemoryDataOperationEnum
CGlobalMemoryDataOperation
,
index_t
MinimumOccupancy
=
1
,
index_t
MinimumOccupancy
=
1
,
...
@@ -57,7 +56,7 @@ __global__ void
...
@@ -57,7 +56,7 @@ __global__ void
{
{
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
defined(__gfx94__))
defined(__gfx94__))
const
index_t
g_idx
=
__builtin_amdgcn_readfirstlane
(
blockIdx
.
z
*
NumGroupsToMerge
);
const
index_t
g_idx
=
__builtin_amdgcn_readfirstlane
(
blockIdx
.
z
);
const
index_t
k_idx
=
__builtin_amdgcn_readfirstlane
(
blockIdx
.
y
*
num_k_per_block
);
const
index_t
k_idx
=
__builtin_amdgcn_readfirstlane
(
blockIdx
.
y
*
num_k_per_block
);
const
long_index_t
a_batch_offset
=
const
long_index_t
a_batch_offset
=
...
@@ -92,7 +91,6 @@ template <typename GridwiseGemm,
...
@@ -92,7 +91,6 @@ template <typename GridwiseGemm,
typename
BGridDesc_BK0_N_K1
,
typename
BGridDesc_BK0_N_K1
,
typename
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
,
typename
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
,
typename
ComputePtrOffsetOfBatch
,
typename
ComputePtrOffsetOfBatch
,
index_t
NumGroupsToMerge
,
bool
HasMainKBlockLoop
,
bool
HasMainKBlockLoop
,
InMemoryDataOperationEnum
CGlobalMemoryDataOperation
,
InMemoryDataOperationEnum
CGlobalMemoryDataOperation
,
index_t
MinimumOccupancy
=
1
,
index_t
MinimumOccupancy
=
1
,
...
@@ -113,7 +111,7 @@ __global__ void
...
@@ -113,7 +111,7 @@ __global__ void
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
// offset base pointer for each work-group
// offset base pointer for each work-group
const
index_t
g_idx
=
__builtin_amdgcn_readfirstlane
(
blockIdx
.
z
*
NumGroupsToMerge
);
const
index_t
g_idx
=
__builtin_amdgcn_readfirstlane
(
blockIdx
.
z
);
const
index_t
k_idx
=
__builtin_amdgcn_readfirstlane
(
blockIdx
.
y
*
num_k_per_block
);
const
index_t
k_idx
=
__builtin_amdgcn_readfirstlane
(
blockIdx
.
y
*
num_k_per_block
);
const
long_index_t
a_batch_offset
=
const
long_index_t
a_batch_offset
=
...
@@ -190,7 +188,6 @@ template <ck::index_t NDimSpatial,
...
@@ -190,7 +188,6 @@ template <ck::index_t NDimSpatial,
index_t
CBlockTransferScalarPerVector_NWaveNPerXdl
,
index_t
CBlockTransferScalarPerVector_NWaveNPerXdl
,
BlockGemmPipelineScheduler
BlkGemmPipeSched
=
BlockGemmPipelineScheduler
::
Intrawave
,
BlockGemmPipelineScheduler
BlkGemmPipeSched
=
BlockGemmPipelineScheduler
::
Intrawave
,
BlockGemmPipelineVersion
BlkGemmPipelineVer
=
BlockGemmPipelineVersion
::
v1
,
BlockGemmPipelineVersion
BlkGemmPipelineVer
=
BlockGemmPipelineVersion
::
v1
,
index_t
NumGroupsToMerge
=
1
,
typename
ComputeTypeA
=
InDataType
,
typename
ComputeTypeA
=
InDataType
,
typename
ComputeTypeB
=
ComputeTypeA
>
typename
ComputeTypeB
=
ComputeTypeA
>
struct
DeviceGroupedConvBwdWeight_Xdl_CShuffle
struct
DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -240,7 +237,7 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -240,7 +237,7 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
NPerBlock
,
NPerBlock
,
K1Number
,
K1Number
,
K0PerBlock
/
K1Number
,
K0PerBlock
/
K1Number
,
NumGroupsToMerge
,
1
/*
NumGroupsToMerge
*/
,
ConvBackwardWeightSpecialization
>
{};
ConvBackwardWeightSpecialization
>
{};
template
<
ck
::
index_t
NDim
,
typename
ck
::
enable_if
<
NDim
==
1
,
bool
>
::
type
=
false
>
template
<
ck
::
index_t
NDim
,
typename
ck
::
enable_if
<
NDim
==
1
,
bool
>
::
type
=
false
>
...
@@ -544,7 +541,7 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -544,7 +541,7 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
index_t
gdx
,
gdy
,
gdz
;
index_t
gdx
,
gdy
,
gdz
;
std
::
tie
(
gdx
,
gdy
,
gdz
)
=
GridwiseGemm
::
CalculateGridSize
(
std
::
tie
(
gdx
,
gdy
,
gdz
)
=
GridwiseGemm
::
CalculateGridSize
(
gemm_arg
.
M
,
gemm_arg
.
N
,
gemm_arg
.
KBatch
,
arg
.
Conv_G_
/
NumGroupsToMerge
);
gemm_arg
.
M
,
gemm_arg
.
N
,
gemm_arg
.
KBatch
,
arg
.
Conv_G_
);
float
ave_time
=
0
;
float
ave_time
=
0
;
...
@@ -621,7 +618,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -621,7 +618,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
AtomicAdd
,
InMemoryDataOperationEnum
::
AtomicAdd
,
minimum_occupancy
>
;
minimum_occupancy
>
;
...
@@ -636,7 +632,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -636,7 +632,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
Set
,
InMemoryDataOperationEnum
::
Set
,
minimum_occupancy
>
;
minimum_occupancy
>
;
...
@@ -657,7 +652,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -657,7 +652,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
AtomicAdd
,
InMemoryDataOperationEnum
::
AtomicAdd
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -674,7 +668,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -674,7 +668,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
AtomicAdd
,
InMemoryDataOperationEnum
::
AtomicAdd
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -693,7 +686,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -693,7 +686,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
AtomicAdd
,
InMemoryDataOperationEnum
::
AtomicAdd
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -714,7 +706,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -714,7 +706,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
AtomicAdd
,
InMemoryDataOperationEnum
::
AtomicAdd
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -735,7 +726,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -735,7 +726,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
AtomicAdd
,
InMemoryDataOperationEnum
::
AtomicAdd
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -756,7 +746,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -756,7 +746,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
AtomicAdd
,
InMemoryDataOperationEnum
::
AtomicAdd
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -776,7 +765,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -776,7 +765,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
AtomicAdd
,
InMemoryDataOperationEnum
::
AtomicAdd
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -797,7 +785,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -797,7 +785,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
AtomicAdd
,
InMemoryDataOperationEnum
::
AtomicAdd
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -817,7 +804,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -817,7 +804,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
Set
,
InMemoryDataOperationEnum
::
Set
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -834,7 +820,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -834,7 +820,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
Set
,
InMemoryDataOperationEnum
::
Set
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -853,7 +838,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -853,7 +838,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
Set
,
InMemoryDataOperationEnum
::
Set
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -874,7 +858,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -874,7 +858,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
Set
,
InMemoryDataOperationEnum
::
Set
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -895,7 +878,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -895,7 +878,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
Set
,
InMemoryDataOperationEnum
::
Set
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -916,7 +898,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -916,7 +898,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
Set
,
InMemoryDataOperationEnum
::
Set
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -936,7 +917,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -936,7 +917,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
Set
,
InMemoryDataOperationEnum
::
Set
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -957,7 +937,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -957,7 +937,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
Set
,
InMemoryDataOperationEnum
::
Set
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -982,7 +961,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -982,7 +961,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
AtomicAdd
,
InMemoryDataOperationEnum
::
AtomicAdd
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -998,7 +976,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -998,7 +976,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
AtomicAdd
,
InMemoryDataOperationEnum
::
AtomicAdd
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -1017,7 +994,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -1017,7 +994,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
Set
,
InMemoryDataOperationEnum
::
Set
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -1033,7 +1009,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -1033,7 +1009,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
Set
,
InMemoryDataOperationEnum
::
Set
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -1055,7 +1030,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -1055,7 +1030,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
AtomicAdd
,
InMemoryDataOperationEnum
::
AtomicAdd
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -1071,7 +1045,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -1071,7 +1045,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
AtomicAdd
,
InMemoryDataOperationEnum
::
AtomicAdd
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -1090,7 +1063,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -1090,7 +1063,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
Set
,
InMemoryDataOperationEnum
::
Set
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -1106,7 +1078,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -1106,7 +1078,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
true
,
true
,
InMemoryDataOperationEnum
::
Set
,
InMemoryDataOperationEnum
::
Set
,
minimum_occupancy
,
minimum_occupancy
,
...
@@ -1130,7 +1101,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -1130,7 +1101,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
false
,
false
,
InMemoryDataOperationEnum
::
AtomicAdd
,
InMemoryDataOperationEnum
::
AtomicAdd
,
minimum_occupancy
>
;
minimum_occupancy
>
;
...
@@ -1145,7 +1115,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -1145,7 +1115,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
DeviceOp
::
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
ComputePtrOffsetOfStridedBatch
<
I1
,
I1
,
I0
>
,
NumGroupsToMerge
,
false
,
false
,
InMemoryDataOperationEnum
::
Set
,
InMemoryDataOperationEnum
::
Set
,
minimum_occupancy
>
;
minimum_occupancy
>
;
...
@@ -1235,23 +1204,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -1235,23 +1204,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
}
}
}
}
if
constexpr
(
NumGroupsToMerge
>
1
)
{
// support only if whole M and N can be proccessed on one block
if
(
!
(
GemmM
<=
MPerBlock
&&
GemmN
<=
NPerBlock
))
{
return
false
;
}
if
(
!
(
arg
.
Conv_C_
==
1
&&
arg
.
Conv_K_
==
1
))
{
return
false
;
}
if
(
arg
.
Conv_G_
%
NumGroupsToMerge
!=
0
)
{
return
false
;
}
}
if
(
!
(
arg
.
Conv_C_
%
BBlockTransferSrcScalarPerVector
==
0
&&
if
(
!
(
arg
.
Conv_C_
%
BBlockTransferSrcScalarPerVector
==
0
&&
arg
.
Conv_K_
%
ABlockTransferSrcScalarPerVector
==
0
))
arg
.
Conv_K_
%
ABlockTransferSrcScalarPerVector
==
0
))
{
{
...
@@ -1400,21 +1352,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
...
@@ -1400,21 +1352,6 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
return
str
.
str
();
return
str
.
str
();
}
}
void
SetWorkSpacePointer
(
BaseArgument
*
p_arg
,
void
*
p_workspace
,
const
StreamConfig
&
=
StreamConfig
{})
const
override
{
auto
p_arg_
=
dynamic_cast
<
Argument
*>
(
p_arg
);
if
(
p_arg_
)
{
p_arg_
->
p_workspace_
=
p_workspace
;
}
else
throw
std
::
runtime_error
(
"The argument pointer is not an object of "
"DeviceGroupedConvBwdWeightTwoStage_Xdl_CShuffle::Argument structure!"
);
}
};
};
}
// namespace device
}
// namespace device
...
...
library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp
View file @
860433ea
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment