Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
13587ab3
Unverified
Commit
13587ab3
authored
Oct 23, 2022
by
arai713
Committed by
GitHub
Oct 23, 2022
Browse files
Merge branch 'develop' into gridwise_2d
parents
7e44fd84
685860c2
Changes
338
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
21 additions
and
21 deletions
+21
-21
example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp
example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp
+1
-1
example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp
example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp
+1
-1
example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp
...mm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp
+1
-1
example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp
...gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp
+1
-1
example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp
...softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp
+1
-1
example/33_multiple_reduce/dual_reduce_multiblock.cpp
example/33_multiple_reduce/dual_reduce_multiblock.cpp
+1
-1
example/33_multiple_reduce/dual_reduce_threadwise.cpp
example/33_multiple_reduce/dual_reduce_threadwise.cpp
+1
-1
example/34_batchnorm/batchnorm_forward_impl.hpp
example/34_batchnorm/batchnorm_forward_impl.hpp
+2
-2
example/34_batchnorm/batchnorm_infer_impl.hpp
example/34_batchnorm/batchnorm_infer_impl.hpp
+1
-1
example/35_splitK_gemm/splitK_gemm_xdl_bfp16.cpp
example/35_splitK_gemm/splitK_gemm_xdl_bfp16.cpp
+1
-1
example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp
example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp
+1
-1
example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp
example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp
+1
-1
example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp
example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp
+1
-1
example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp
example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp
+1
-1
example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp
..._sparse_embedding/sparse_embedding3_forward_layernorm.cpp
+1
-1
example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp
..._gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp
+1
-1
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp
..._grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp
+1
-1
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp
..._grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp
+1
-1
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp
..._grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp
+1
-1
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp
..._grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp
+1
-1
No files found.
example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp
View file @
13587ab3
...
...
@@ -20,7 +20,7 @@ Gemm + Gemm fused operation. Computes C_m_o = A_m_k * B0_k_n * B1_n_o
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp
View file @
13587ab3
...
...
@@ -16,7 +16,7 @@ Gemm + Gemm fused operation. Computes C_m_o = A_m_k * B0_k_n * B1_n_o
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp
View file @
13587ab3
...
...
@@ -17,7 +17,7 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_o = Softmax(A_g_m_k * B0_g
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/tensor_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp
View file @
13587ab3
...
...
@@ -17,7 +17,7 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_o = Softmax(A_g_m_k * B0_g
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/tensor_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp
View file @
13587ab3
...
...
@@ -16,7 +16,7 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_o = Softmax(A_g_m_k * B0_g
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/33_multiple_reduce/dual_reduce_multiblock.cpp
View file @
13587ab3
...
...
@@ -13,7 +13,7 @@
#include "ck/utility/data_type.hpp"
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/tensor_operation/gpu/device/device_multiple_reduce_multiblock.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_multiple_reduce_multiblock.hpp"
#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
#include "dual_reduce_common.hpp"
...
...
example/33_multiple_reduce/dual_reduce_threadwise.cpp
View file @
13587ab3
...
...
@@ -13,7 +13,7 @@
#include "ck/utility/data_type.hpp"
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/tensor_operation/gpu/device/device_multiple_reduce_threadwise.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_multiple_reduce_threadwise.hpp"
#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
#include "dual_reduce_common.hpp"
...
...
example/34_batchnorm/batchnorm_forward_impl.hpp
View file @
13587ab3
...
...
@@ -9,8 +9,8 @@
#include "ck/ck.hpp"
#include "ck/utility/reduction_operator.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/device_multiple_reduce_multiblock.hpp"
#include "ck/tensor_operation/gpu/device/device_elementwise.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_multiple_reduce_multiblock.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_elementwise.hpp"
#include "batchnorm_common.hpp"
...
...
example/34_batchnorm/batchnorm_infer_impl.hpp
View file @
13587ab3
...
...
@@ -10,7 +10,7 @@
#include "ck/utility/sequence.hpp"
#include "ck/utility/tuple.hpp"
#include "ck/utility/reduction_operator.hpp"
#include "ck/tensor_operation/gpu/device/device_elementwise.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_elementwise.hpp"
#include "batchnorm_common.hpp"
...
...
example/35_splitK_gemm/splitK_gemm_xdl_bfp16.cpp
View file @
13587ab3
...
...
@@ -8,7 +8,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_splitk_c_shuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_gemm_xdl_splitk_c_shuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp
View file @
13587ab3
...
...
@@ -8,7 +8,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_splitk_c_shuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_gemm_xdl_splitk_c_shuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp
View file @
13587ab3
...
...
@@ -8,7 +8,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_splitk_c_shuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_gemm_xdl_splitk_c_shuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp
View file @
13587ab3
...
...
@@ -8,7 +8,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_splitk_c_shuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_gemm_xdl_splitk_c_shuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp
View file @
13587ab3
...
...
@@ -8,7 +8,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_splitk_c_shuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_gemm_xdl_splitk_c_shuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp
View file @
13587ab3
...
...
@@ -9,7 +9,7 @@
#include <ctime>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/device_sparse_embedding3_forward_layernorm.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_sparse_embedding3_forward_layernorm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
...
...
example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp
View file @
13587ab3
...
...
@@ -12,7 +12,7 @@ Computes C_m_o = Relu(A0[m, k] * B0[n, k] + D00[m, n] + D01[mn]) * B1[n, o] + D1
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp
View file @
13587ab3
...
...
@@ -8,7 +8,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp
View file @
13587ab3
...
...
@@ -8,7 +8,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp
View file @
13587ab3
...
...
@@ -8,7 +8,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp
View file @
13587ab3
...
...
@@ -12,7 +12,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
Prev
1
2
3
4
5
6
7
8
9
…
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment