Absolute include path (#281)

* ad gelu and fast_gelu * added GeLU and fast GeLU * clean up * add gemm+fastgelu example * add gemm+gelu instances * update profiler * clean up * clean up * adding gemm+bias+activation * clean * adding bias * clean * adding gemm multiple d * debugging * add gemm bias add fastgelu * rename, clean * refactoring; add readme * refactor * refactor * refactor * refactor * refactor * refactor * fix * fix * update example * update example * rename * update example * add ckProfiler * clean * clean * clean * clean * add client app example * update readme * delete obselete files * remove old client app * delete old file * cleaning * clean * remove half * fix header path * fix header path * fix header path * fix header path * fix header path * fix header path for all examples * fix header path * fix header path * fix header path * fix header path * fix header path * fix header path * fix header path * fix header path * fix header path * revert client app example * clean build * fix build * temporary disable client test on Jenkins * clean * clean * clean

Absolute include path (#281)
* ad gelu and fast_gelu * added GeLU and fast GeLU * clean up * add gemm+fastgelu example * add gemm+gelu instances * update profiler * clean up * clean up * adding gemm+bias+activation * clean * adding bias * clean * adding gemm multiple d * debugging * add gemm bias add fastgelu * rename, clean * refactoring; add readme * refactor * refactor * refactor * refactor * refactor * refactor * fix * fix * update example * update example * rename * update example * add ckProfiler * clean * clean * clean * clean * add client app example * update readme * delete obselete files * remove old client app * delete old file * cleaning * clean * remove half * fix header path * fix header path * fix header path * fix header path * fix header path * fix header path for all examples * fix header path * fix header path * fix header path * fix header path * fix header path * fix header path * fix header path * fix header path * fix header path * revert client app example * clean build * fix build * temporary disable client test on Jenkins * clean * clean * clean
d1db6a0c · Chao Liu · GitHub · a49115b9 · d1db6a0c · d1db6a0c
Unverified Commit d1db6a0c authored Jun 24, 2022 by Chao Liu Committed by GitHub Jun 24, 2022
20 changed files
--- a/example/12_reduce/reduce_blockwise.cpp
+++ b/example/12_reduce/reduce_blockwise.cpp
@@ -4,20 +4,17 @@
 #include <cstdlib>
 #include <getopt.h>
-#include "check_err.hpp"
+#include "ck/ck.hpp"
-#include "config.hpp"
+#include "ck/utility/reduction_enums.hpp"
-#include "print.hpp"
+#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
-#include "device.hpp"
+#include "ck/tensor_operation/gpu/device/device_reduce_multiblock.hpp"
-#include "host_tensor.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/library/utility/check_err.hpp"
-#include "device_tensor.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "device_base.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "device_reduce_multiblock.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
-#include "host_common_util.hpp"
+#include "ck/library/host_tensor/host_common_util.hpp"
-#include "host_reduction.hpp"
+#include "ck/library/host_tensor/host_reduction.hpp"
-#include "reduction_enums.hpp"
-#include "reduction_operator_mapping.hpp"
 using namespace ck;
 using namespace ck::tensor_operation::device;

--- a/example/12_reduce/reduce_blockwise_two_call.cpp
+++ b/example/12_reduce/reduce_blockwise_two_call.cpp
@@ -5,20 +5,17 @@
 #include <cstdlib>
 #include <getopt.h>
-#include "check_err.hpp"
+#include "ck/ck.hpp"
-#include "config.hpp"
+#include "ck/utility/reduction_enums.hpp"
-#include "print.hpp"
+#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
-#include "device.hpp"
+#include "ck/tensor_operation/gpu/device/device_reduce_multiblock.hpp"
-#include "host_tensor.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/library/utility/check_err.hpp"
-#include "device_tensor.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "device_base.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "device_reduce_multiblock.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
-#include "host_common_util.hpp"
+#include "ck/library/host_tensor/host_common_util.hpp"
-#include "host_reduction.hpp"
+#include "ck/library/host_tensor/host_reduction.hpp"
-#include "reduction_enums.hpp"
-#include "reduction_operator_mapping.hpp"
 using namespace ck;
 using namespace ck::tensor_operation::device;

--- a/example/13_pool2d_fwd/pool2d_fwd_common.hpp
+++ b/example/13_pool2d_fwd/pool2d_fwd_common.hpp
@@ -2,19 +2,17 @@
 #include <iostream>
-#include "check_err.hpp"
+#include "ck/ck.hpp"
-#include "config.hpp"
+#include "ck/utility/reduction_enums.hpp"
-#include "print.hpp"
+#include "ck/utility/reduction_functions_accumulate.hpp"
-#include "device.hpp"
+#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
-#include "host_tensor.hpp"
+#include "ck/tensor_operation/gpu/device/device_pool2d_fwd_nhwc_nhwc.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
-#include "device_tensor.hpp"
-#include "tensor_layout.hpp"
+#include "ck/library/utility/check_err.hpp"
-#include "reduction_enums.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "reduction_operator_mapping.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "reduction_functions_accumulate.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
-#include "device_pool2d_fwd_nhwc_nhwc.hpp"
 template <typename InDataType,
          typename OutDataType,

--- a/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp
+++ b/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp
 #include <iostream>
 #include <cstdlib>
-#include "config.hpp"
+#include "ck/ck.hpp"
-#include "tensor_layout.hpp"
+#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
-#include "reduction_enums.hpp"
+#include "ck/utility/reduction_enums.hpp"
 #include "pool2d_fwd_common.hpp"

--- a/example/13_pool2d_fwd/pool2d_fwd_fp32.cpp
+++ b/example/13_pool2d_fwd/pool2d_fwd_fp32.cpp
 #include <iostream>
 #include <cstdlib>
-#include "config.hpp"
+#include "ck/ck.hpp"
-#include "tensor_layout.hpp"
+#include "ck/utility/reduction_enums.hpp"
-#include "reduction_enums.hpp"
+#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
 #include "pool2d_fwd_common.hpp"

--- a/example/14_gemm_xdl_requant_relu_requant/gemm_xdl_requant_relu_requant_int8.cpp
+++ b/example/14_gemm_xdl_requant_relu_requant/gemm_xdl_requant_relu_requant_int8.cpp
@@ -2,21 +2,18 @@
 #include <numeric>
 #include <initializer_list>
 #include <cstdlib>
-#include <stdlib.h>
-#include <half.hpp>
+#include "ck/ck.hpp"
+#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
-#include "check_err.hpp"
+#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
-#include "config.hpp"
+#include "ck/tensor_operation/gpu/device/device_gemm_xdl_cshuffle.hpp"
-#include "print.hpp"
+#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
-#include "device.hpp"
-#include "host_tensor.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "host_gemm.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
-#include "device_tensor.hpp"
+#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
-#include "device_gemm_xdl_cshuffle.hpp"
+#include "ck/library/utility/check_err.hpp"
-#include "element_wise_operation.hpp"
-#include "reference_gemm.hpp"
-#include "gemm_specialization.hpp"
 struct RequantReluRequant
 {

--- a/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp
+++ b/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp
@@ -2,21 +2,18 @@
 #include <numeric>
 #include <initializer_list>
 #include <cstdlib>
-#include <stdlib.h>
-#include <half.hpp>
+#include "ck/ck.hpp"
+#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
-#include "check_err.hpp"
+#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
-#include "config.hpp"
+#include "ck/tensor_operation/gpu/device/device_grouped_gemm_xdl.hpp"
-#include "print.hpp"
+#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
-#include "device.hpp"
-#include "host_tensor.hpp"
+#include "ck/library/utility/check_err.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "host_gemm.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "device_tensor.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
-#include "device_grouped_gemm_xdl.hpp"
+#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
-#include "element_wise_operation.hpp"
-#include "reference_gemm.hpp"
-#include "gemm_specialization.hpp"
 template <ck::index_t... Is>
 using S = ck::Sequence<Is...>;

--- a/example/16_gemm_reduce/gemm_reduce_xdl_max_fp16.cpp
+++ b/example/16_gemm_reduce/gemm_reduce_xdl_max_fp16.cpp
@@ -2,18 +2,18 @@
 #include <numeric>
 #include <initializer_list>
 #include <cstdlib>
-#include <stdlib.h>
+#include "ck/ck.hpp"
-#include "check_err.hpp"
+#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
-#include "config.hpp"
+#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
-#include "device.hpp"
+#include "ck/tensor_operation/gpu/device/device_gemm_reduce_xdl_cshuffle.hpp"
-#include "host_tensor.hpp"
+#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
-#include "host_tensor_generator.hpp"
-#include "device_tensor.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "device_gemm_reduce_xdl_cshuffle.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "element_wise_operation.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
-#include "reference_gemm.hpp"
+#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
-#include "gemm_specialization.hpp"
+#include "ck/library/utility/check_err.hpp"
 template <ck::index_t... Is>
 using S = ck::Sequence<Is...>;

--- a/example/16_gemm_reduce/gemm_reduce_xdl_mean_squaremean_fp16.cpp
+++ b/example/16_gemm_reduce/gemm_reduce_xdl_mean_squaremean_fp16.cpp
@@ -2,20 +2,19 @@
 #include <numeric>
 #include <initializer_list>
 #include <cstdlib>
-#include <stdlib.h>
+#include "ck/ck.hpp"
-#include "check_err.hpp"
+#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
-#include "config.hpp"
+#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
-#include "device.hpp"
+#include "ck/tensor_operation/gpu/device/device_gemm_reduce_xdl_cshuffle.hpp"
-#include "host_tensor.hpp"
+#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/utility/reduction_operator.hpp"
-#include "device_tensor.hpp"
-#include "device_gemm_reduce_xdl_cshuffle.hpp"
+#include "ck/library/utility/check_err.hpp"
-#include "element_wise_operation.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "reduction_operator.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "reference_gemm.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
-#include "gemm_specialization.hpp"
+#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
-#include "reduction_operator.hpp"
 template <ck::index_t... Is>
 using S = ck::Sequence<Is...>;

--- a/example/17_convnd_bwd_data_xdl/convnd_bwd_data_xdl.cpp
+++ b/example/17_convnd_bwd_data_xdl/convnd_bwd_data_xdl.cpp
@@ -2,20 +2,18 @@
 #include <numeric>
 #include <initializer_list>
 #include <cstdlib>
-#include <stdlib.h>
-#include <half.hpp>
+#include "ck/ck.hpp"
+#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
-#include "config.hpp"
+#include "ck/tensor_operation/gpu/device/device_convnd_bwd_data_xdl_ndhwc_kzyxc_ndhwk.hpp"
-#include "conv_util.hpp"
+#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
-#include "print.hpp"
-#include "device.hpp"
+#include "ck/library/utility/check_err.hpp"
-#include "host_tensor.hpp"
+#include "ck/library/utility/conv_util.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "device_tensor.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "tensor_layout.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
-#include "element_wise_operation.hpp"
+#include "ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp"
-#include "device_convnd_bwd_data_xdl_ndhwc_kzyxc_ndhwk.hpp"
-#include "reference_conv_bwd_data.hpp"
 using InDataType  = ck::half_t;
 using WeiDataType = ck::half_t;

--- a/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp
+++ b/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp
@@ -2,19 +2,18 @@
 #include <numeric>
 #include <initializer_list>
 #include <cstdlib>
-#include <stdlib.h>
-#include <half.hpp>
+#include "ck/ck.hpp"
-#include "check_err.hpp"
+#include "ck/utility/reduction_operator.hpp"
-#include "config.hpp"
+#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
-#include "device.hpp"
+#include "ck/tensor_operation/gpu/device/device_batched_gemm_reduce_xdl_cshuffle.hpp"
-#include "host_tensor.hpp"
+#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
-#include "host_tensor_generator.hpp"
-#include "device_tensor.hpp"
+#include "ck/library/utility/check_err.hpp"
-#include "device_batched_gemm_reduce_xdl_cshuffle.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "element_wise_operation.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "reduction_operator.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
-#include "reference_batched_gemm.hpp"
+#include "ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp"
-#include "gemm_specialization.hpp"
 template <ck::index_t... Is>
 using S = ck::Sequence<Is...>;

--- a/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp
+++ b/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp
-/*******************************************************************************
- *
- * MIT License
- *
- * Copyright (c) 2022 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- *******************************************************************************/
 #include <iostream>
 #include <cstdlib>
-#include "check_err.hpp"
-#include "config.hpp"
+#include "ck/ck.hpp"
-#include "device.hpp"
+#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
-#include "host_tensor.hpp"
+#include "ck/tensor_operation/gpu/device/device_binary_elementwise.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/library/utility/check_err.hpp"
-#include "device_tensor.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "binary_element_wise_operation.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "device_binary_elementwise.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
 using F16 = ck::half_t;
 using F32 = float;

--- a/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp
+++ b/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp
 #include <iostream>
 #include <cstdlib>
-#include "check_err.hpp"
-#include "config.hpp"
+#include "ck/ck.hpp"
-#include "device.hpp"
+#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
-#include "host_tensor.hpp"
+#include "ck/tensor_operation/gpu/device/device_binary_elementwise.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/library/utility/check_err.hpp"
-#include "device_tensor.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "binary_element_wise_operation.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "device_binary_elementwise.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
 using F16 = ck::half_t;
 using F32 = float;

--- a/example/19_binary_elementwise/elementwise_add_1d.cpp
+++ b/example/19_binary_elementwise/elementwise_add_1d.cpp
-/*******************************************************************************
- *
- * MIT License
- *
- * Copyright (c) 2022 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- *******************************************************************************/
 #include <iostream>
 #include <cstdlib>
-#include "check_err.hpp"
-#include "config.hpp"
+#include "ck/ck.hpp"
-#include "device.hpp"
+#include "ck/tensor_operation/gpu/device/device_binary_elementwise.hpp"
-#include "host_tensor.hpp"
+#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/library/utility/check_err.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "device_tensor.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "binary_element_wise_operation.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
-#include "device_binary_elementwise.hpp"
 using F16 = ck::half_t;
 using F32 = float;

--- a/example/19_binary_elementwise/elementwise_add_4d.cpp
+++ b/example/19_binary_elementwise/elementwise_add_4d.cpp
-/*******************************************************************************
- *
- * MIT License
- *
- * Copyright (c) 2020 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- *******************************************************************************/
 #include <iostream>
 #include <cstdlib>
-#include "check_err.hpp"
-#include "config.hpp"
+#include "ck/ck.hpp"
-#include "device.hpp"
+#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
-#include "host_tensor.hpp"
+#include "ck/tensor_operation/gpu/device/device_binary_elementwise.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/library/utility/check_err.hpp"
-#include "device_tensor.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "binary_element_wise_operation.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "device_binary_elementwise.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
 using F16 = ck::half_t;
 using F32 = float;

--- a/example/20_convnd_bwd_weight_xdl/convnd_bwd_weight_xdl.cpp
+++ b/example/20_convnd_bwd_weight_xdl/convnd_bwd_weight_xdl.cpp
@@ -2,21 +2,18 @@
 #include <numeric>
 #include <initializer_list>
 #include <cstdlib>
-#include <stdlib.h>
-#include <half.hpp>
+#include "ck/ck.hpp"
+#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
-#include "check_err.hpp"
+#include "ck/tensor_operation/gpu/device/device_convnd_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp"
-#include "conv_util.hpp"
+#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
-#include "config.hpp"
-#include "print.hpp"
+#include "ck/library/utility/check_err.hpp"
-#include "device.hpp"
+#include "ck/library/utility/conv_util.hpp"
-#include "host_tensor.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "device_tensor.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
-#include "tensor_layout.hpp"
+#include "ck/library/reference_tensor_operation/cpu/reference_conv_backward_weight.hpp"
-#include "element_wise_operation.hpp"
-#include "device_convnd_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp"
-#include "reference_conv_backward_weight.hpp"
 using InDataType  = ck::half_t;
 using WeiDataType = ck::half_t;

--- a/example/20_convnd_bwd_weight_xdl/convnd_bwd_weight_xdl_bf16_splitk.cpp
+++ b/example/20_convnd_bwd_weight_xdl/convnd_bwd_weight_xdl_bf16_splitk.cpp
@@ -2,22 +2,19 @@
 #include <numeric>
 #include <initializer_list>
 #include <cstdlib>
-#include <stdlib.h>
-#include <half.hpp>
+#include "ck/ck.hpp"
+#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
-#include "check_err.hpp"
+#include "ck/tensor_operation/gpu/device/device_convnd_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp"
-#include "conv_util.hpp"
+#include "ck/tensor_operation/gpu/device/device_unary_elementwise.hpp"
-#include "config.hpp"
+#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
-#include "print.hpp"
-#include "device.hpp"
+#include "ck/library/utility/check_err.hpp"
-#include "host_tensor.hpp"
+#include "ck/library/utility/conv_util.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "device_tensor.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "tensor_layout.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
-#include "element_wise_operation.hpp"
+#include "ck/library/reference_tensor_operation/cpu/reference_conv_backward_weight.hpp"
-#include "device_unary_elementwise.hpp"
-#include "device_convnd_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp"
-#include "reference_conv_backward_weight.hpp"
 using InDataType  = ck::bhalf_t;
 using WeiDataType = ck::bhalf_t;

--- a/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_fp16.cpp
+++ b/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_fp16.cpp
@@ -3,17 +3,18 @@
 #include <initializer_list>
 #include <cstdlib>
-#include "check_err.hpp"
+#include "ck/ck.hpp"
-#include "config.hpp"
+#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
-#include "device.hpp"
+#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
-#include "host_tensor.hpp"
+#include "ck/tensor_operation/gpu/device/device_gemm_bias_add_reduce_xdl_cshuffle.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/tensor_operation/gpu/device/device_5ary_elementwise.hpp"
-#include "device_tensor.hpp"
+#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
-#include "device_5ary_elementwise.hpp"
-#include "device_gemm_bias_add_reduce_xdl_cshuffle.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "element_wise_operation.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "reference_gemm.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
-#include "gemm_specialization.hpp"
+#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
+#include "ck/library/utility/check_err.hpp"
 template <ck::index_t... Is>
 using S = ck::Sequence<Is...>;

--- a/example/21_gemm_layernorm/gemm_layernorm_xdl_fp16.cpp
+++ b/example/21_gemm_layernorm/gemm_layernorm_xdl_fp16.cpp
@@ -3,17 +3,18 @@
 #include <initializer_list>
 #include <cstdlib>
-#include "check_err.hpp"
+#include "ck/ck.hpp"
-#include "config.hpp"
+#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
-#include "device.hpp"
+#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
-#include "host_tensor.hpp"
+#include "ck/tensor_operation/gpu/device/device_gemm_reduce_xdl_cshuffle.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/tensor_operation/gpu/device/device_5ary_elementwise.hpp"
-#include "device_tensor.hpp"
+#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
-#include "device_5ary_elementwise.hpp"
-#include "device_gemm_reduce_xdl_cshuffle.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "element_wise_operation.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "reference_gemm.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
-#include "gemm_specialization.hpp"
+#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
+#include "ck/library/utility/check_err.hpp"
 template <ck::index_t... Is>
 using S = ck::Sequence<Is...>;

--- a/example/22_cgemm/cgemm_xdl_fp16.cpp
+++ b/example/22_cgemm/cgemm_xdl_fp16.cpp
-/*******************************************************************************
- *
- * MIT License
- *
- * Copyright (c) 2022 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- *******************************************************************************/
 #include <iostream>
 #include <numeric>
 #include <initializer_list>
 #include <cstdlib>
-#include <stdlib.h>
-#include <half.hpp>
+#include "ck/ck.hpp"
+#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
-#include "check_err.hpp"
+#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
-#include "config.hpp"
+#include "ck/tensor_operation/gpu/device/device_cgemm_4gemm_xdl_cshuffle.hpp"
-#include "device.hpp"
-#include "host_tensor.hpp"
+#include "ck/library/utility/check_err.hpp"
-#include "host_tensor_generator.hpp"
+#include "ck/library/host_tensor/device_memory.hpp"
-#include "device_tensor.hpp"
+#include "ck/library/host_tensor/host_tensor.hpp"
-#include "device_cgemm_4gemm_xdl_cshuffle.hpp"
+#include "ck/library/host_tensor/host_tensor_generator.hpp"
-#include "element_wise_operation.hpp"
+#include "ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp"
-#include "reference_cgemm.hpp"
-#include "gemm_specialization.hpp"
 template <ck::index_t... Is>
 using S = ck::Sequence<Is...>;