Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
253f942b
Commit
253f942b
authored
Sep 22, 2023
by
Umang Yadav
Browse files
changes to make it compile
parent
8f9c0243
Changes
275
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
129 additions
and
28 deletions
+129
-28
include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp
...lock_welford_second_half_multiblock_reduce_first_half.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
+34
-28
include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp
...dwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp
...mm_layernorm/gridwise_welford_second_half_layernorm2d.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp
...on/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp
...on/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp
...r_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp
...r_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp
...n/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp
...tched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp
..._batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp
...id/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp
...pu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp
...gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp
.../ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp
.../ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp
.../grid/gridwise_elementwise_layernorm_welford_variance.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp
...pu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp
...tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp
...de/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp
+5
-0
No files found.
include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -554,3 +557,5 @@ struct GridwiseWelfordSecondHalfReduceFirstHalf
...
@@ -554,3 +557,5 @@ struct GridwiseWelfordSecondHalfReduceFirstHalf
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -5,6 +8,7 @@
...
@@ -5,6 +8,7 @@
#include "ck/utility/math.hpp"
#include "ck/utility/math.hpp"
#include "ck/utility/number.hpp"
#include "ck/utility/number.hpp"
#include "ck/utility/tuple.hpp"
#include "ck/tensor_description/tensor_adaptor.hpp"
#include "ck/tensor_description/tensor_adaptor.hpp"
#include "ck/tensor_description/multi_index_transform_helper.hpp"
#include "ck/tensor_description/multi_index_transform_helper.hpp"
#include <limits>
#include <limits>
...
@@ -86,16 +90,16 @@ struct BlockToCTileMap_M00_N0_M01
...
@@ -86,16 +90,16 @@ struct BlockToCTileMap_M00_N0_M01
const
auto
M00
=
math
::
integer_divide_ceil
(
M0
,
M01
);
const
auto
M00
=
math
::
integer_divide_ceil
(
M0
,
M01
);
const
auto
m00_n0_m01_to_m0_n0_block_cluster_adaptor
=
make_single_stage_tensor_adaptor
(
const
auto
m00_n0_m01_to_m0_n0_block_cluster_adaptor
=
make_single_stage_tensor_adaptor
(
make_tuple
(
make_insert_transform
(
1
),
ck
::
make_tuple
(
make_insert_transform
(
1
),
make_unmerge_transform
(
make_tuple
(
M00
,
M01
)),
make_unmerge_transform
(
ck
::
make_tuple
(
M00
,
M01
)),
make_pass_through_transform
(
make_tuple
(
N0
))),
make_pass_through_transform
(
ck
::
make_tuple
(
N0
))),
make_tuple
(
Sequence
<>
{},
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
ck
::
make_tuple
(
Sequence
<>
{},
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
,
3
>
{},
Sequence
<
2
>
{}));
ck
::
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
,
3
>
{},
Sequence
<
2
>
{}));
const
auto
cblockid_to_m00_n0_m01_block_cluster_adaptor
=
make_single_stage_tensor_adaptor
(
const
auto
cblockid_to_m00_n0_m01_block_cluster_adaptor
=
make_single_stage_tensor_adaptor
(
make_tuple
(
make_merge_transform
(
make_tuple
(
1
,
M00
,
N0
,
M01
))),
ck
::
make_tuple
(
make_merge_transform
(
ck
::
make_tuple
(
1
,
M00
,
N0
,
M01
))),
make_tuple
(
Sequence
<
0
,
1
,
2
,
3
>
{}),
ck
::
make_tuple
(
Sequence
<
0
,
1
,
2
,
3
>
{}),
make_tuple
(
Sequence
<
0
>
{}));
ck
::
make_tuple
(
Sequence
<
0
>
{}));
const
auto
cblockid_to_m0_n0_block_cluster_adaptor
=
const
auto
cblockid_to_m0_n0_block_cluster_adaptor
=
chain_tensor_adaptors
(
m00_n0_m01_to_m0_n0_block_cluster_adaptor
,
chain_tensor_adaptors
(
m00_n0_m01_to_m0_n0_block_cluster_adaptor
,
...
@@ -231,7 +235,7 @@ struct BlockToCTileMap_M00_N0_M01Adapt<MPerBlock, NPerBlock, void>
...
@@ -231,7 +235,7 @@ struct BlockToCTileMap_M00_N0_M01Adapt<MPerBlock, NPerBlock, void>
* output {1, 2}
* output {1, 2}
*/
*/
return
make_tuple
(
idx_N0_M01_local
%
M01_adapt
+
idx_M00
*
M01_
,
return
ck
::
make_tuple
(
idx_N0_M01_local
%
M01_adapt
+
idx_M00
*
M01_
,
idx_N0_M01_local
/
M01_adapt
);
idx_N0_M01_local
/
M01_adapt
);
}
}
...
@@ -307,7 +311,7 @@ struct BlockToCTileMap_KSplit_M00_N0_M01Adapt
...
@@ -307,7 +311,7 @@ struct BlockToCTileMap_KSplit_M00_N0_M01Adapt
index_t
idx_M01
=
idx_M0
%
M01_
;
index_t
idx_M01
=
idx_M0
%
M01_
;
index_t
idx_N0_M01_local
=
idx_N0
+
idx_M01
*
N0
;
index_t
idx_N0_M01_local
=
idx_N0
+
idx_M01
*
N0
;
return
make_tuple
(
idx_ksplit
,
return
ck
::
make_tuple
(
idx_ksplit
,
idx_N0_M01_local
%
M01_adapt
+
idx_M00
*
M01_
,
idx_N0_M01_local
%
M01_adapt
+
idx_M00
*
M01_
,
idx_N0_M01_local
/
M01_adapt
);
idx_N0_M01_local
/
M01_adapt
);
}
}
...
@@ -406,17 +410,17 @@ struct BlockToCTileMap_M00_N00_M01_N01
...
@@ -406,17 +410,17 @@ struct BlockToCTileMap_M00_N00_M01_N01
const
auto
m00_m01_n00_n01_to_m0_n0_block_cluster_adaptor
=
const
auto
m00_m01_n00_n01_to_m0_n0_block_cluster_adaptor
=
make_single_stage_tensor_adaptor
(
make_single_stage_tensor_adaptor
(
make_tuple
(
make_insert_transform
(
1
),
// swallow the carry from lower dimensions
ck
::
make_tuple
(
make_insert_transform
(
1
),
// swallow the carry from lower dimensions
make_unmerge_transform
(
make_tuple
(
M00
,
M01
)),
make_unmerge_transform
(
ck
::
make_tuple
(
M00
,
M01
)),
make_unmerge_transform
(
make_tuple
(
N00
,
N01
))),
make_unmerge_transform
(
ck
::
make_tuple
(
N00
,
N01
))),
make_tuple
(
Sequence
<>
{},
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
ck
::
make_tuple
(
Sequence
<>
{},
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
,
3
>
{},
Sequence
<
2
,
4
>
{}));
ck
::
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
,
3
>
{},
Sequence
<
2
,
4
>
{}));
const
auto
cblockid_to_m00_m01_n00_n01_block_cluster_adaptor
=
const
auto
cblockid_to_m00_m01_n00_n01_block_cluster_adaptor
=
make_single_stage_tensor_adaptor
(
make_single_stage_tensor_adaptor
(
make_tuple
(
make_merge_transform
(
make_tuple
(
1
,
M00
,
N00
,
M01
,
N01
))),
ck
::
make_tuple
(
make_merge_transform
(
ck
::
make_tuple
(
1
,
M00
,
N00
,
M01
,
N01
))),
make_tuple
(
Sequence
<
0
,
1
,
2
,
3
,
4
>
{}),
ck
::
make_tuple
(
Sequence
<
0
,
1
,
2
,
3
,
4
>
{}),
make_tuple
(
Sequence
<
0
>
{}));
ck
::
make_tuple
(
Sequence
<
0
>
{}));
const
auto
cblockid_to_m0_n0_block_cluster_adaptor
=
const
auto
cblockid_to_m0_n0_block_cluster_adaptor
=
chain_tensor_adaptors
(
m00_m01_n00_n01_to_m0_n0_block_cluster_adaptor
,
chain_tensor_adaptors
(
m00_m01_n00_n01_to_m0_n0_block_cluster_adaptor
,
...
@@ -525,17 +529,17 @@ struct BlockToCTileMap_KSplit_M00_N00_M01_N01
...
@@ -525,17 +529,17 @@ struct BlockToCTileMap_KSplit_M00_N00_M01_N01
const
auto
ksplit_m00_m01_n00_n01_to_m0_n0_block_cluster_adaptor
=
const
auto
ksplit_m00_m01_n00_n01_to_m0_n0_block_cluster_adaptor
=
make_single_stage_tensor_adaptor
(
make_single_stage_tensor_adaptor
(
make_tuple
(
make_pass_through_transform
(
KSplit
),
ck
::
make_tuple
(
make_pass_through_transform
(
KSplit
),
make_unmerge_transform
(
make_tuple
(
M00
,
M01
)),
make_unmerge_transform
(
ck
::
make_tuple
(
M00
,
M01
)),
make_unmerge_transform
(
make_tuple
(
N00
,
N01
))),
make_unmerge_transform
(
ck
::
make_tuple
(
N00
,
N01
))),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{}),
ck
::
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
,
3
>
{},
Sequence
<
2
,
4
>
{}));
ck
::
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
,
3
>
{},
Sequence
<
2
,
4
>
{}));
const
auto
c_blockid_to_ksplit_m00_m01_n00_n01_block_cluster_adaptor
=
const
auto
c_blockid_to_ksplit_m00_m01_n00_n01_block_cluster_adaptor
=
make_single_stage_tensor_adaptor
(
make_single_stage_tensor_adaptor
(
make_tuple
(
make_merge_transform
(
make_tuple
(
KSplit
,
M00
,
N00
,
M01
,
N01
))),
ck
::
make_tuple
(
make_merge_transform
(
ck
::
make_tuple
(
KSplit
,
M00
,
N00
,
M01
,
N01
))),
make_tuple
(
Sequence
<
0
,
1
,
2
,
3
,
4
>
{}),
ck
::
make_tuple
(
Sequence
<
0
,
1
,
2
,
3
,
4
>
{}),
make_tuple
(
Sequence
<
0
>
{}));
ck
::
make_tuple
(
Sequence
<
0
>
{}));
const
auto
c_blockid_to_ksplit_m0_n0_block_cluster_adaptor
=
const
auto
c_blockid_to_ksplit_m0_n0_block_cluster_adaptor
=
chain_tensor_adaptors
(
ksplit_m00_m01_n00_n01_to_m0_n0_block_cluster_adaptor
,
chain_tensor_adaptors
(
ksplit_m00_m01_n00_n01_to_m0_n0_block_cluster_adaptor
,
...
@@ -652,13 +656,13 @@ struct BlockToCTileMap_3DGrid_KSplit
...
@@ -652,13 +656,13 @@ struct BlockToCTileMap_3DGrid_KSplit
const
auto
M0
=
math
::
integer_divide_ceil
(
M
,
MPerBlock
);
const
auto
M0
=
math
::
integer_divide_ceil
(
M
,
MPerBlock
);
const
auto
N0
=
math
::
integer_divide_ceil
(
N
,
NPerBlock
);
const
auto
N0
=
math
::
integer_divide_ceil
(
N
,
NPerBlock
);
return
std
::
make_tuple
(
N0
,
M0
,
k_split
);
return
ck
::
make_tuple
(
N0
,
M0
,
k_split
);
}
}
template
<
typename
TopIdx
>
template
<
typename
TopIdx
>
__device__
constexpr
auto
CalculateBottomIndex
(
const
TopIdx
&
)
const
__device__
constexpr
auto
CalculateBottomIndex
(
const
TopIdx
&
)
const
{
{
return
make_tuple
(
blockIdx
.
z
,
blockIdx
.
y
,
blockIdx
.
x
);
return
ck
::
make_tuple
(
blockIdx
.
z
,
blockIdx
.
y
,
blockIdx
.
x
);
}
}
template
<
typename
CTileIdx
,
typename
CTileDim
>
template
<
typename
CTileIdx
,
typename
CTileDim
>
...
@@ -1078,3 +1082,5 @@ struct BlockToCTileMap_GemmStreamK
...
@@ -1078,3 +1082,5 @@ struct BlockToCTileMap_GemmStreamK
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -1113,3 +1116,5 @@ struct GridwiseGemmMultipleDWelfordFirstHalf_xdl_cshuffle
...
@@ -1113,3 +1116,5 @@ struct GridwiseGemmMultipleDWelfordFirstHalf_xdl_cshuffle
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -392,3 +395,5 @@ struct GridwiseWelfordSecondHalfLayernorm2d
...
@@ -392,3 +395,5 @@ struct GridwiseWelfordSecondHalfLayernorm2d
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -319,3 +322,5 @@ struct GridwiseMultipleReduction_mk_to_m_multiblock
...
@@ -319,3 +322,5 @@ struct GridwiseMultipleReduction_mk_to_m_multiblock
};
// namespace ck
};
// namespace ck
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -262,3 +265,5 @@ struct GridwiseMultipleReduction_mk_to_m_threadwise
...
@@ -262,3 +265,5 @@ struct GridwiseMultipleReduction_mk_to_m_threadwise
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -611,3 +614,5 @@ struct GridwiseReduction_mk_to_m_multiblock
...
@@ -611,3 +614,5 @@ struct GridwiseReduction_mk_to_m_multiblock
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -486,3 +489,5 @@ struct GridwiseReduction_mk_to_m_threadwise
...
@@ -486,3 +489,5 @@ struct GridwiseReduction_mk_to_m_threadwise
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -930,3 +933,5 @@ struct GridwiseBatchedGemmGemm_Xdl_CShuffle
...
@@ -930,3 +933,5 @@ struct GridwiseBatchedGemmGemm_Xdl_CShuffle
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -1257,3 +1260,5 @@ struct GridwiseBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle
...
@@ -1257,3 +1260,5 @@ struct GridwiseBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -1301,3 +1304,5 @@ struct GridwiseBatchedGemmMultipleDSoftmaxGemm_Xdl_CShuffle
...
@@ -1301,3 +1304,5 @@ struct GridwiseBatchedGemmMultipleDSoftmaxGemm_Xdl_CShuffle
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -1126,3 +1129,5 @@ struct GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle
...
@@ -1126,3 +1129,5 @@ struct GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -552,3 +555,5 @@ struct GridwiseBatchNormBackwardWithBlockwiseWelford
...
@@ -552,3 +555,5 @@ struct GridwiseBatchNormBackwardWithBlockwiseWelford
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -481,3 +484,5 @@ struct GridwiseBatchNormForwardWithBlockwiseWelford
...
@@ -481,3 +484,5 @@ struct GridwiseBatchNormForwardWithBlockwiseWelford
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -193,3 +196,5 @@ struct GridwiseElementwise_1D
...
@@ -193,3 +196,5 @@ struct GridwiseElementwise_1D
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
//
//
...
@@ -228,3 +231,5 @@ struct GridwiseElementwise_2D
...
@@ -228,3 +231,5 @@ struct GridwiseElementwise_2D
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -498,3 +501,5 @@ struct GridwiseElementwiseLayernormWelfordVariance_mk_to_mk
...
@@ -498,3 +501,5 @@ struct GridwiseElementwiseLayernormWelfordVariance_mk_to_mk
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -999,3 +1002,5 @@ struct GridwiseGemmBiasAddReduce_k0mk1_k0nk1_mn_xdl_cshuffle_v1
...
@@ -999,3 +1002,5 @@ struct GridwiseGemmBiasAddReduce_k0mk1_k0nk1_mn_xdl_cshuffle_v1
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -679,3 +682,5 @@ struct GridwiseGemmDlMultipleD_km_kn_mn
...
@@ -679,3 +682,5 @@ struct GridwiseGemmDlMultipleD_km_kn_mn
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp
View file @
253f942b
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -1156,3 +1159,5 @@ struct GridwiseGemmDl_bkm_bkn_mn_v1r3
...
@@ -1156,3 +1159,5 @@ struct GridwiseGemmDl_bkm_bkn_mn_v1r3
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
Prev
1
…
5
6
7
8
9
10
11
12
13
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment